www.pudn.com > HKJC2.rar > GetStarters.java
package hkjc2.logic;
import java.io.File;
import java.io.FileOutputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFDataFormat;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.Tag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.ScriptTag;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.tags.TableRow;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
public class GetStarters {
/**
* 更新排位表
* @param client
* @throws Exception
*/
public void UpdateStarters(HttpUtil client, File file) throws Exception {
String url = "http://www.hkjc.com/chinese/racing/startersR1_c.asp";
String html = client.getHTML(url);
if (html == null) return;
Parser parser = new Parser();
parser.setInputHTML(html);
int pageCount = 0;
String tag = null;
NodeList nl = parser.extractAllNodesThatMatch(Table3Filter);
if (nl.size() > 0) {
TableTag Table3 = (TableTag)nl.elementAt(0);
String Table3Html = Table3.toHtml();
parser.setInputHTML(Table3Html);
nl = parser.extractAllNodesThatMatch(TdNowrapCenterFilter);
for (int i=0;i 0) {
if (children.elementAt(0) instanceof LinkTag) {
LinkTag link = (LinkTag)children.elementAt(0);
tag = link.getLink();
if (tag != null) {
String[] arr = tag.split("_");
if (arr.length > 1) tag = arr[1];
}
//System.out.println(tag);
}
}
}
}
}
//如果无数据,结束
//没有页码,则表示无数据
if (pageCount < 1) return;
String racedateYear = "";
String racedateMonth = "";
String racedateDay = "";
String racedate = "";
String G = null;
parser.setInputHTML(html);
nl = parser.extractAllNodesThatMatch(FontSize2FaceAHSFilter);
if (nl.size() > 1) {
// for (int i=0;i 0) {
String date = temp[0].trim();
int yearIndex = date.indexOf("年");
int monthIndex = date.indexOf("月");
int dayIndex = date.indexOf("日");
racedateYear = date.substring(0, yearIndex);
racedateMonth = date.substring(yearIndex+1, monthIndex);
racedateMonth = "0"+racedateMonth;
racedateMonth = racedateMonth.substring(racedateMonth.length()-2, 2);
racedateDay = date.substring(monthIndex+1, dayIndex);
racedate = racedateYear+racedateMonth+racedateDay;
//System.out.println(racedateYear + "/" + racedateMonth + "/" + racedateDay);
}
if (temp.length > 2) {
G = temp[2].trim();
}
}
UpdateData(client, parser, pageCount, racedate, file, tag);
}
private void UpdateData(HttpUtil client, Parser parser, int pageCount,
String racedate, File file, String tag)
throws Exception, ParserException {
HSSFWorkbook wb = new HSSFWorkbook();
HSSFSheet sheet = wb.createSheet();
int count=0;
String[] head = new String[]{"season", "meetingno", "raceinyr",
"date", "raceno", "distance", "course", "track", "raceclass",
"going", "cup", "horse", "age", "drawing", "brandno", "rating",
"netload", "updn", "bleeding", "reserve", "jockey", "stable", "fp",
"time", "wintime", "pos1", "pos2", "pos3", "pos4", "pos5", "margin",
"winticket", "plcticket", "oddon", "oddbr", "oddfn", "d_win1", "d_win2",
"d_place1", "d_place2", "d_place3", "d_place4", "secttime1", "secttime2",
"secttime3", "secttime4", "secttime5", "secttime6", "d_quin", "bodyweight"
};
HSSFRow headRow = sheet.createRow((short) count);
for (int j = 0; j < head.length; j++) {
HSSFCell cell = headRow.createCell((short) j);
cell.setCellValue(head[j]);
}
count++;
for (int n=0;n 1) {
String str = nl.elementAt(1).getParent().toPlainTextString();
String[] temp = str.split(",");
if (temp.length > 2) {
G = temp[2].trim();
if ("沙田".equals(G)) {
G = "田";
} else if ("跑馬地".equals(G)) {
G = "谷";
}
}
}
if (nl.size() > 2) {
String str = nl.elementAt(2).getParent().toPlainTextString();
String[] temp = str.split(",");
if (temp.length > 1) {
H = temp[1].trim();
H = H.replaceAll("\"", "");
}
if (temp.length > 2) {
F = temp[2].trim();
int idx = F.indexOf("米");
if (idx > -1) F = F.substring(0, idx);
}
}
if (nl.size() > 4) {
String str = nl.elementAt(4).getParent().toPlainTextString();
int beginIndex = str.lastIndexOf("第");
int endIndex = str.lastIndexOf("班");
if (beginIndex > -1 && endIndex > -1)
I = str.substring(beginIndex+1, endIndex);
}
parser.setInputHTML(html);
nl = parser.extractAllNodesThatMatch(FontSize2FaceHKSCSFilter);
// for (int i=0;i 0) {
String str = nl.elementAt(0).getParent().toPlainTextString();
String[] temp = str.split(" ");
if (temp.length > 1) {
E = temp[1].trim();
}
if (temp.length > 4) {
K = temp[4].trim();
K = client.toChinese(K);
}
}
System.out.println("D="+D+" E="+E+" F="+F+" G="+G+" H="+H+" I="+I+" J="+J+" K="+K);
parser.setInputHTML(html);
nl = parser.extractAllNodesThatMatch(ScriptFilter);
// for (int i=0;i 0) {
String str = nl.elementAt(0).getParent().toPlainTextString();
int startIndex = str.indexOf("ColPos = new Array(30) ;") + "ColPos = new Array(30) ;".length();
int endIndex = str.indexOf("var PageName");
str = str.substring(startIndex + 1, endIndex);
String[] arr = str.split("\n");
if (arr.length > 2) {
for (int i=2;i -1 && endIndex > -1)
offset = U.substring(startIndex + 1, endIndex);
if (offset != null) {
try {Q = "" + (Integer.parseInt(Q) + Integer.parseInt(offset));}catch(Exception e){}
}
V = client.toChinese(cols[15]).trim();
W = cols[1];
System.out.println("L="+L+" M="+M+" N="+N+" O="+O+" P="+P+" Q="+Q+" U="+U+" V="+V+" W="+W);
List cells = new ArrayList();
for (int m=0;m<50;m++) cells.add(null);
cells.set(3, D);
cells.set(4, E);
cells.set(5, F);
cells.set(6, G);
cells.set(7, H);
cells.set(8, I);
cells.set(9, J);
cells.set(10, K);
cells.set(11, L);
cells.set(12, M);
cells.set(13, N);
cells.set(14, O);
cells.set(15, P);
cells.set(16, Q);
cells.set(19, T);
cells.set(20, U);
cells.set(21, V);
cells.set(22, W);
HSSFRow row = sheet.createRow((short) count);
setRow(wb, row, cells);
count++;
hourseCount++;
}
}
}
//T列
parser.setInputHTML(html);
nl = parser.extractAllNodesThatMatch(reversedFilter);
if (nl.size() > 0) {
// for (int i=0;i reversed = new ArrayList();
NodeList children = nl.elementAt(0).getParent().getParent().getParent().getChildren();
for (int i=0;i 2) {
for (int i=2;i cells = new ArrayList();
for (int m=0;m<50;m++) cells.add(null);
cells.set(3, D);
cells.set(4, E);
cells.set(5, F);
cells.set(6, G);
cells.set(7, H);
cells.set(8, I);
cells.set(9, J);
cells.set(10, K);
cells.set(11, Cols.elementAt(1).toPlainTextString().trim());
cells.set(12, Cols.elementAt(5).toPlainTextString().trim());
//cells.set(13, N);
//cells.set(14, O);
cells.set(15, Cols.elementAt(4).toPlainTextString().trim());
cells.set(16, Cols.elementAt(3).toPlainTextString().trim());
cells.set(19, Cols.elementAt(0).toPlainTextString().trim());
//cells.set(20, U);
cells.set(21, Cols.elementAt(7).toPlainTextString().trim());
cells.set(22, ""+hourseCount);
HSSFRow row = sheet.createRow((short) count);
setRow(wb, row, cells);
count++;
hourseCount++;
}
}
}
}
FileOutputStream fileOut = new FileOutputStream(file);
wb.write(fileOut);
fileOut.close();
System.out.println("导出排位表完毕");
}
private void setRow(HSSFWorkbook wb, HSSFRow row, List cells) {
HttpUtil util = new HttpUtil();
for (int i=0;i 0) {
try {
cell.setCellValue(Double.parseDouble(temp));
} catch (Exception e) {
//System.out.println("出错列:" + i);
//e.printStackTrace();
cell.setEncoding(HSSFCell.ENCODING_UTF_16);
cell.setCellValue(temp);
}
}
break;
case 48:
if (temp != null && temp.trim().length() > 0) {
temp = util.toChinese(temp);
try {
cell.setCellValue(Double.parseDouble(temp));
} catch (Exception e) {
//System.out.println("出错列:" + i);
//e.printStackTrace();
cell.setEncoding(HSSFCell.ENCODING_UTF_16);
cell.setCellValue(temp);
}
}
break;
case 6:
case 10:
case 11:
case 20:
case 21:
if (temp != null && temp.trim().length() > 0) {
temp = util.toChinese(temp);
if (i == 20) {
String[] arr = temp.split(" ");
if (arr.length > 0) temp = arr[0];
}
cell.setEncoding(HSSFCell.ENCODING_UTF_16);
cell.setCellValue(temp);
}
break;
case 3:
if (temp != null && temp.trim().length() > 0) {
try {
temp = temp.substring(0,4)+"-"+temp.substring(4,6)+"-"+temp.substring(6,8);
Date d = new SimpleDateFormat("yyyy-MM-dd").parse(temp);
HSSFCellStyle cellStyle = wb.createCellStyle();
cellStyle.setDataFormat(HSSFDataFormat.getBuiltinFormat("d-mmm-yy"));
cell.setCellValue(d);
cell.setCellStyle(cellStyle);
} catch (Exception e) {
//System.out.println("出错列:" + i);
//e.printStackTrace();
cell.setEncoding(HSSFCell.ENCODING_UTF_16);
cell.setCellValue(temp);
}
}
break;
default:
if (temp != null && temp.trim().length() > 0) {
cell.setEncoding(HSSFCell.ENCODING_UTF_16);
cell.setCellValue(temp);
}
}
}
}
private NodeFilter Table3Filter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
if (tag instanceof TableTag && "Table3".equals(tag.getAttribute("id"))) {
return true;
}
}
return false;
}
};
private NodeFilter TdNowrapCenterFilter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
if (tag instanceof TableColumn && "middle".equals(tag.getAttribute("align"))) {
return true;
}
}
return false;
}
};
private NodeFilter FontSize2FaceAHSFilter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
if ("font".equals(tag.getTagName().toLowerCase()) && "Arial, Helvetica, sans-serif".equals(tag.getAttribute("face")) && "2".equals(tag.getAttribute("size")) && tag.getAttribute("color") == null) {
return true;
}
}
return false;
}
};
private NodeFilter FontSize2FaceHKSCSFilter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
if ("font".equals(tag.getTagName().toLowerCase()) && "細明體_HKSCS,Arial, Helvetica, sans-serif".equals(tag.getAttribute("face")) && "2".equals(tag.getAttribute("size")) && tag.getAttribute("color") == null) {
return true;
}
}
return false;
}
};
private NodeFilter ScriptFilter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof ScriptTag) {
Tag tag = (Tag) node;
if ("javascript".equals(tag.getAttribute("language")) && tag.getAttribute("src") == null) {
return true;
}
}
return false;
}
};
private NodeFilter reversedFilter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
if ("b".equals(tag.getTagName().toLowerCase()) && tag.getParent().toPlainTextString().contains("後 備 馬 匹")) {
return true;
}
}
return false;
}
};
public static void main(String[] args) throws Exception {
GetStarters get = new GetStarters();
HttpUtil client = new HttpUtil();
get.UpdateStarters(client, new File("C:\\pre-race.xls"));
}
}