www.pudn.com > HKJC2.rar > GetResults.java
package hkjc2.logic;
import java.io.File;
import java.io.FileOutputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFDataFormat;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.Tag;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.tags.TableRow;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import com.singularsys.jep.Jep;
public class GetResults {
/**
* 生成EXCEL
* @param client
* @param racedate
* @param file
* @throws Exception
*/
public void genExcel(HttpUtil client, String racedate, File file) throws Exception {
String[] te = racedate.split("/");
String rd = te[2]+te[1]+te[0];
String D = rd;
String url = "http://www.hkjc.com/chinese/racing/results.asp?racedate="+racedate;
String html = client.getHTML(url);
if (html == null) {
return;
}
Parser parser = new Parser();
parser.setInputHTML(html);
int pageCount = 0;
NodeList nl = parser.extractAllNodesThatMatch(PageTableFilter);
if (nl.size() > 0) {
TableTag Table3 = (TableTag)nl.elementAt(0);
String Table3Html = Table3.toHtml();
parser.setInputHTML(Table3Html);
nl = parser.extractAllNodesThatMatch(TdNowrapCenterFilter);
for (int i=0;i 0) {
String tableHtml = nl.elementAt(0).toHtml();
parser.setInputHTML(tableHtml);
NodeList nl1 = parser.extractAllNodesThatMatch(TdFilter);
// for (int i=0;i 0) {
String str = nl1.elementAt(0).toPlainTextString();
int beginIndex = str.indexOf("(");
int endIndex = str.indexOf(")");
if (beginIndex > -1 && endIndex > -1)
C = str.substring(beginIndex+1, endIndex);
}
}
String G = null;
parser.setInputHTML(html);
nl = parser.extractAllNodesThatMatch(fontgeneral_textFilter);
if (nl.size() > 0) {
String str = nl.elementAt(0).getParent().toPlainTextString();
String[] arr = str.trim().split("\n");
if (arr.length > 0) {
G = arr[arr.length-1].trim();
if ("沙田".equals(G)) {
G = "田";
} else if ("跑馬地".equals(G)) {
G = "谷";
}
}
}
HSSFWorkbook wb = new HSSFWorkbook();
HSSFSheet sheet = wb.createSheet();
int count=0;
String[] head = new String[]{"season", "meetingno", "raceinyr",
"date", "raceno", "distance", "course", "track", "raceclass",
"going", "cup", "horse", "age", "drawing", "brandno", "rating",
"netload", "updn", "bleeding", "reserve", "jockey", "stable", "fp",
"time", "wintime", "pos1", "pos2", "pos3", "pos4", "pos5", "margin",
"winticket", "plcticket", "oddon", "oddbr", "oddfn", "d_win1", "d_win2",
"d_place1", "d_place2", "d_place3", "d_place4", "secttime1", "secttime2",
"secttime3", "secttime4", "secttime5", "secttime6", "d_quin", "bodyweight"
};
HSSFRow headRow = sheet.createRow((short) count);
for (int j = 0; j < head.length; j++) {
HSSFCell cell = headRow.createCell((short) j);
cell.setCellValue(head[j]);
}
count++;
//读取指定日期的网页
for (int n=0;n> rows = new ArrayList>();
//读取每一页的数据,并保存至数据库,可能是更新也可能是新增
parser.setInputHTML(html);
nl = parser.extractAllNodesThatMatch(InfoTableFilter);
if (nl.size() > 0) {
String tableHtml = nl.elementAt(0).toHtml();
parser.setInputHTML(tableHtml);
NodeList nl1 = parser.extractAllNodesThatMatch(TdFilter);
// for (int i=0;i 0) {
String str = nl1.elementAt(0).toPlainTextString();
int beginIndex = str.indexOf("(");
int endIndex = str.indexOf(")");
if (beginIndex > -1 && endIndex > -1)
C = str.substring(beginIndex+1, endIndex);
beginIndex = str.indexOf("第");
endIndex = str.indexOf("場");
if (beginIndex > -1 && endIndex > -1)
E = str.substring(beginIndex+1, endIndex).replaceAll(" ", "").trim();
}
if (nl1.size() > 2) {
String str = nl1.elementAt(2).toPlainTextString();
int beginIndex = str.indexOf("第");
int endIndex = str.indexOf("班");
if (beginIndex > -1 && endIndex > -1)
I = str.substring(beginIndex+1, endIndex);
beginIndex = str.indexOf("-");
endIndex = str.indexOf("米");
if (beginIndex > -1 && endIndex > -1)
F = str.substring(beginIndex+1, endIndex).replaceAll(" ", "").replaceAll(";", "").trim();
}
if (nl1.size() > 3) {
String str = nl1.elementAt(3).toPlainTextString();
str = str.replaceAll("\n", "");
str = str.replaceAll("\r", "");
K = client.toChinese(str).trim();
}
if (nl1.size() > 8) {
String str = nl1.elementAt(8).toPlainTextString();
str = str.replaceAll("\n", "");
str = str.replaceAll("\r", "");
J = client.toChinese(str).trim();
}
if (nl1.size() > 10) {
String str = nl1.elementAt(10).toPlainTextString();
int beginIndex = str.indexOf("\"");
int endIndex = str.lastIndexOf("\"");
if (beginIndex > -1 && endIndex > -1)
H = str.substring(beginIndex+1, endIndex);
}
for (int i=0;i AQV = new ArrayList();
for (int j=i+1;j 0) AV = AQV.get(AQV.size() - 1);
if (AQV.size() > 1) AU = AQV.get(AQV.size() - 2);
if (AQV.size() > 2) AT = AQV.get(AQV.size() - 3);
if (AQV.size() > 3) AS = AQV.get(AQV.size() - 4);
if (AQV.size() > 4) AR = AQV.get(AQV.size() - 5);
if (AQV.size() > 5) AQ = AQV.get(AQV.size() - 6);
break;
}
}
parser.setInputHTML(html);
nl = parser.extractAllNodesThatMatch(Table450S1P0B0Filter);
if (nl.size() > 0) {
String Table450S1P0B0HTML = nl.elementAt(0).toHtml();
parser.setInputHTML(Table450S1P0B0HTML);
nl = parser.extractAllNodesThatMatch(font2ArialFilter);
// for (int i=0;i 2) AK = nl.elementAt(2).getParent().toPlainTextString();
if (nl.size() > 5) AM = nl.elementAt(5).getParent().toPlainTextString();
if (nl.size() > 7) AN = nl.elementAt(7).getParent().toPlainTextString();
if (nl.size() > 9) AO = nl.elementAt(9).getParent().toPlainTextString();
if (nl.size() > 12) AW = nl.elementAt(12).getParent().toPlainTextString();
}
System.out.println("C="+C+" D="+rd+" E="+E+" F="+F+" G="+G+" H="+H+" I="+I+" J="+J+" AQ="+AQ+" AR="+AR+" AS="+AS+" AT="+AT+" AU="+AU+" AV="+AV
+" AK="+AK+" AM="+AM+" AN="+AN+" AO="+AO+" AW="+AW);
parser.setInputHTML(html);
NodeList nlList = parser.extractAllNodesThatMatch(ListTableFilter);
if (nlList.size() > 0) {
NodeList Rows = null;
String ListHtml = nlList.elementAt(0).toHtml();
parser.setInputHTML(ListHtml);
Rows = parser.extractAllNodesThatMatch(TrFilter);
String Y = null;
for (int i = 1; i < Rows.size(); i++) {
String L = null;
String N = null;
String O = null;
String Q = null;
String U = null;
String V = null;
String W = null;
String X = null;
String AE = null;
String AJ = null;
String AX = null;
Node node = Rows.elementAt(i);
String RowHtml = node.toHtml();
parser.setInputHTML(RowHtml);
try {
NodeList Cells = parser.extractAllNodesThatMatch(TdFilter);
for (int j=0;j 5) Q = Cells.elementAt(5).toPlainTextString();
if (Cells.size() > 0) W = Cells.elementAt(0).toPlainTextString();
if (Cells.size() > 3) U = client.toChinese(Cells.elementAt(3).toPlainTextString());
if (Cells.size() > 4) V = client.toChinese(Cells.elementAt(4).toPlainTextString());
if (Cells.size() > 9) X = Cells.elementAt(9).toPlainTextString();
if (i == 1) Y = X;
if (Cells.size() > 7) N = Cells.elementAt(7).toPlainTextString();
if (Cells.size() > 8) {
AE = Cells.elementAt(8).toPlainTextString().replaceAll(" ", "").trim();
if ("-".equals(AE)) {
AE = "0";
} else if ("短馬頭位".equals(AE)) {
AE = "0.1";
} else if ("一頭位".equals(AE)) {
AE = "0.2";
} else if ("頸位".equals(AE)) {
AE = "0.5";
} else if ("多個馬位".equals(AE)) {
AE = "99";
}
}
if (Cells.size() > 10) AJ = Cells.elementAt(10).toPlainTextString();
if (Cells.size() > 6) AX = Cells.elementAt(6).toPlainTextString();
if (Cells.size() > 2) {
O = Cells.elementAt(2).toPlainTextString();
int beginIndex = O.indexOf("(");
if (beginIndex > -1)
L = client.toChinese(O.substring(0, beginIndex));
int endIndex = O.indexOf(")");
if (beginIndex > -1 && endIndex > -1)
O = O.substring(beginIndex+1, endIndex);
}
System.out.println(" L="+L+" N="+N+" O="+O+" U="+U+" V="+V+"W="+W+" X="+X+" Y="+Y+" AE="+AE+" AJ="+AJ+" AX="+AX);
}
} catch (ParserException e) {
e.printStackTrace();
}
List cells = new ArrayList();
for (int m=0;m<50;m++) cells.add(null);
cells.set(2, C);
cells.set(3, D);
cells.set(4, E);
cells.set(5, F);
cells.set(6, G);
cells.set(7, H);
cells.set(8, I);
cells.set(9, J);
cells.set(10, K);
cells.set(11, L);
cells.set(13, N);
cells.set(14, O);
cells.set(16, Q);
cells.set(20, U);
cells.set(21, V);
cells.set(22, W);
cells.set(23, X);
cells.set(24, Y);
cells.set(30, AE);
cells.set(35, AJ);
cells.set(36, AK);
cells.set(38, AM);
cells.set(39, AN);
cells.set(40, AO);
cells.set(42, AQ);
cells.set(43, AR);
cells.set(44, AS);
cells.set(45, AT);
cells.set(46, AU);
cells.set(47, AV);
cells.set(48, AW);
cells.set(49, AX);
rows.add(cells);
}
}
}
//如果有分段时间及位置,读取分段时间及位置
String secHtml = client.getHTML("http://www.hkjc.com/chinese/racing/display_sectionaltime.asp?RaceDate="+racedate+"&Raceno="+(n+1));
// System.out.println(srcHtml);
if (secHtml == null) continue;
parser.setInputHTML(secHtml);
NodeList nlSecTable = parser.extractAllNodesThatMatch(SecTableFilter);
// for (int i=0;i 0) {
TableTag table = (TableTag)nlSecTable.elementAt(0);
List Rows = new ArrayList();
for (int i=0;i 3) {
for (int i=3;i ZAE = new ArrayList();
//System.out.println(i + ": " + Rows.get(i).toHtml());
NodeList Cells = Rows.get(i).getChildren();
// for (int j=0;j 5) {
O = Cells.elementAt(5).toPlainTextString().trim();
int beginIndex = O.indexOf("(");
int endIndex = O.indexOf(")");
if (beginIndex > -1 && endIndex > -1)
O = O.substring(beginIndex+1, endIndex);
}
for (int m=7;m 0) str = temp[0];
str = str.replace("\n", "");
str = str.replace("\r", "");
str = str.trim();
if (str.length() > 0) ZAE.add(str);
}
if (ZAE.size() > 0) ZAE.remove(ZAE.size() - 1);
if (ZAE.size() > 0) AD = ZAE.get(ZAE.size() - 1);
if (ZAE.size() > 1) AC = ZAE.get(ZAE.size() - 2);
if (ZAE.size() > 2) AB = ZAE.get(ZAE.size() - 3);
if (ZAE.size() > 3) AA = ZAE.get(ZAE.size() - 4);
if (ZAE.size() > 4) Z = ZAE.get(ZAE.size() - 5);
System.out.println("O="+O+" Z="+Z+" AA="+AA+" AB="+AB+" AC="+AC+" AD="+AD);
//根据日期和O列判断
if (O != null) {
for (int j=0;j cells = rows.get(j);
if (O.equals(cells.get(14))) {
cells.set(25, Z);
cells.set(26, AA);
cells.set(27, AB);
cells.set(28, AC);
cells.set(29, AD);
break;
}
}
}
}
}
}
for (int i=0;i cells = rows.get(i);
setRow(wb, row, cells);
count++;
}
}
FileOutputStream fileOut = new FileOutputStream(file);
wb.write(fileOut);
fileOut.close();
System.out.println("导出赛果及派彩完毕");
}
private void setRow(HSSFWorkbook wb, HSSFRow row, List cells) {
HttpUtil util = new HttpUtil();
for (int i=0;i 0) {
try {
cell.setCellValue(Double.parseDouble(temp));
} catch (Exception e) {
//System.out.println("出错列:" + i);
//e.printStackTrace();
cell.setEncoding(HSSFCell.ENCODING_UTF_16);
cell.setCellValue(temp);
}
}
break;
case 48:
if (temp != null && temp.trim().length() > 0) {
temp = util.toChinese(temp);
try {
cell.setCellValue(Double.parseDouble(temp));
} catch (Exception e) {
//System.out.println("出错列:" + i);
//e.printStackTrace();
cell.setEncoding(HSSFCell.ENCODING_UTF_16);
cell.setCellValue(temp);
}
}
break;
case 6:
case 10:
case 11:
case 20:
case 21:
if (temp != null && temp.trim().length() > 0) {
temp = util.toChinese(temp);
if (i == 20) {
String[] arr = temp.split(" ");
if (arr.length > 0) temp = arr[0];
}
cell.setEncoding(HSSFCell.ENCODING_UTF_16);
cell.setCellValue(temp);
}
break;
case 3:
if (temp != null && temp.trim().length() > 0) {
try {
temp = temp.substring(0,4)+"-"+temp.substring(4,6)+"-"+temp.substring(6,8);
Date d = new SimpleDateFormat("yyyy-MM-dd").parse(temp);
HSSFCellStyle cellStyle = wb.createCellStyle();
cellStyle.setDataFormat(HSSFDataFormat.getBuiltinFormat("d-mmm-yy"));
cell.setCellValue(d);
cell.setCellStyle(cellStyle);
} catch (Exception e) {
//System.out.println("出错列:" + i);
//e.printStackTrace();
cell.setEncoding(HSSFCell.ENCODING_UTF_16);
cell.setCellValue(temp);
}
}
break;
case 30:
if (temp != null && temp.trim().length() > 0) {
try {
temp = temp.replaceAll("-", "+");
Jep jep = new Jep();
jep.parse(temp);
temp = jep.evaluate().toString();
} catch (Exception e) {
//System.out.println("出错列:" + i);
//e.printStackTrace();
temp = temp.replace("+", "-");
}
try {
cell.setCellValue(Double.parseDouble(temp));
} catch (Exception e) {
//System.out.println("出错列:" + i);
//e.printStackTrace();
cell.setEncoding(HSSFCell.ENCODING_UTF_16);
cell.setCellValue(temp);
}
}
break;
default:
if (temp != null && temp.trim().length() > 0) {
cell.setEncoding(HSSFCell.ENCODING_UTF_16);
cell.setCellValue(temp);
}
}
}
}
private NodeFilter PageTableFilter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
if (tag instanceof TableTag
&& "0".equals(tag.getAttribute("border"))
&& "1".equals(tag.getAttribute("cellpadding"))
&& "1".equals(tag.getAttribute("cellspacing"))) {
return true;
}
}
return false;
}
};
private NodeFilter TdNowrapCenterFilter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
if (tag instanceof TableColumn
&& "middle".equals(tag.getAttribute("align"))
&& tag.getAttribute("valign") == null) {
return true;
}
}
return false;
}
};
private NodeFilter InfoTableFilter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
if (tag instanceof TableTag
&& "760".equals(tag.getAttribute("width"))
&& "0".equals(tag.getAttribute("border"))
&& "0".equals(tag.getAttribute("cellpadding"))
&& "1".equals(tag.getAttribute("cellspacing"))) {
return true;
}
}
return false;
}
};
private NodeFilter Table450S1P0B0Filter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
if (tag instanceof TableTag
&& "450".equals(tag.getAttribute("width"))
&& "0".equals(tag.getAttribute("border"))
&& "0".equals(tag.getAttribute("cellpadding"))
&& "1".equals(tag.getAttribute("cellspacing"))) {
return true;
}
}
return false;
}
};
private NodeFilter TableFilter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
if (tag instanceof TableTag) {
return true;
}
}
return false;
}
};
private NodeFilter TdFilter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
if (tag instanceof TableColumn) {
return true;
}
}
return false;
}
};
private NodeFilter TrFilter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
if (tag instanceof TableRow) {
return true;
}
}
return false;
}
};
private NodeFilter FontSize2FaceAHSFilter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
if ("font".equals(tag.getTagName().toLowerCase()) && "Arial, Helvetica, sans-serif".equals(tag.getAttribute("face")) && "2".equals(tag.getAttribute("size")) && tag.getAttribute("color") == null) {
return true;
}
}
return false;
}
};
private NodeFilter ListTableFilter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
if (tag.getTagName().toUpperCase().equals("TABLE") && "760".equals(tag.getAttribute("width")) && "1".equals(tag.getAttribute("cellspacing")) && "1".equals(tag.getAttribute("cellpadding")) && "0".equals(tag.getAttribute("border"))) {
return true;
}
}
return false;
}
};
private NodeFilter font2ArialFilter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
if (tag.getTagName().toUpperCase().equals("FONT") && "2".equals(tag.getAttribute("size")) && "Arial".equals(tag.getAttribute("face"))) {
return true;
}
}
return false;
}
};
private NodeFilter SecTableFilter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof TableTag) {
Tag tag = (Tag) node;
if ("100%".equals(tag.getAttribute("width")) && "1".equals(tag.getAttribute("cellspacing")) && "0".equals(tag.getAttribute("border"))) {
return true;
}
}
return false;
}
};
private NodeFilter fontgeneral_textFilter = new NodeFilter() {
public boolean accept(Node node) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
if (tag.getTagName().toUpperCase().equals("FONT") && "general_text".equals(tag.getAttribute("class"))){
return true;
}
}
return false;
}
};
public static void main(String[] args) throws Exception {
GetResults get = new GetResults();
HttpUtil client = new HttpUtil();
get.genExcel(client, "22/06/2008", new File("C:\\post-race.xls"));
}
}