www.pudn.com > HKJC2.rar > GetStarters.java


package hkjc2.logic; 
 
import java.io.File; 
import java.io.FileOutputStream; 
import java.text.SimpleDateFormat; 
import java.util.ArrayList; 
import java.util.Date; 
import java.util.List; 
 
import org.apache.poi.hssf.usermodel.HSSFCell; 
import org.apache.poi.hssf.usermodel.HSSFCellStyle; 
import org.apache.poi.hssf.usermodel.HSSFDataFormat; 
import org.apache.poi.hssf.usermodel.HSSFRow; 
import org.apache.poi.hssf.usermodel.HSSFSheet; 
import org.apache.poi.hssf.usermodel.HSSFWorkbook; 
import org.htmlparser.Node; 
import org.htmlparser.NodeFilter; 
import org.htmlparser.Parser; 
import org.htmlparser.Tag; 
import org.htmlparser.tags.LinkTag; 
import org.htmlparser.tags.ScriptTag; 
import org.htmlparser.tags.TableColumn; 
import org.htmlparser.tags.TableRow; 
import org.htmlparser.tags.TableTag; 
import org.htmlparser.util.NodeList; 
import org.htmlparser.util.ParserException; 
 
public class GetStarters { 
 
	/** 
	 * 更新排位表 
	 * @param client 
	 * @throws Exception  
	 */ 
	public void UpdateStarters(HttpUtil client, File file) throws Exception { 
		String url = "http://www.hkjc.com/chinese/racing/startersR1_c.asp"; 
		String html = client.getHTML(url); 
		if (html == null) return; 
		Parser parser = new Parser(); 
		parser.setInputHTML(html); 
		 
		int pageCount = 0; 
		String tag = null; 
		NodeList nl = parser.extractAllNodesThatMatch(Table3Filter); 
        if (nl.size() > 0) { 
        	TableTag Table3 = (TableTag)nl.elementAt(0); 
        	String Table3Html = Table3.toHtml(); 
        	parser.setInputHTML(Table3Html); 
            nl = parser.extractAllNodesThatMatch(TdNowrapCenterFilter); 
            for (int i=0;i 0) { 
	            		if (children.elementAt(0) instanceof LinkTag) { 
	            			LinkTag link = (LinkTag)children.elementAt(0); 
	            			tag = link.getLink(); 
	            			if (tag != null) { 
	            				String[] arr = tag.split("_"); 
	            				if (arr.length > 1) tag = arr[1]; 
	            			} 
	            			//System.out.println(tag); 
	            		} 
	            	} 
            	} 
            } 
        } 
        //如果无数据,结束 
		//没有页码,则表示无数据 
        if (pageCount < 1) return; 
		 
		String racedateYear = ""; 
		String racedateMonth = ""; 
		String racedateDay = ""; 
		String racedate = ""; 
		String G = null; 
		parser.setInputHTML(html); 
		nl = parser.extractAllNodesThatMatch(FontSize2FaceAHSFilter); 
		if (nl.size() > 1) { 
//			for (int i=0;i 0) { 
				String date = temp[0].trim(); 
				int yearIndex = date.indexOf("年"); 
				int monthIndex = date.indexOf("月"); 
				int dayIndex = date.indexOf("日"); 
				racedateYear = date.substring(0, yearIndex); 
				racedateMonth = date.substring(yearIndex+1, monthIndex); 
				racedateMonth = "0"+racedateMonth; 
				racedateMonth = racedateMonth.substring(racedateMonth.length()-2, 2); 
				racedateDay = date.substring(monthIndex+1, dayIndex); 
				racedate = racedateYear+racedateMonth+racedateDay; 
				//System.out.println(racedateYear + "/" + racedateMonth + "/" + racedateDay); 
			} 
			if (temp.length > 2) { 
				G = temp[2].trim(); 
			} 
		} 
		 
		UpdateData(client, parser, pageCount, racedate, file, tag); 
	} 
 
	private void UpdateData(HttpUtil client, Parser parser, int pageCount, 
			String racedate, File file, String tag) 
			throws Exception, ParserException { 
		HSSFWorkbook wb = new HSSFWorkbook(); 
        HSSFSheet sheet = wb.createSheet(); 
        int count=0; 
        String[] head = new String[]{"season", "meetingno", "raceinyr", 
                "date", "raceno", "distance", "course", "track", "raceclass", 
                "going", "cup", "horse", "age", "drawing", "brandno", "rating", 
                "netload", "updn", "bleeding", "reserve", "jockey", "stable", "fp", 
                "time", "wintime", "pos1", "pos2", "pos3", "pos4", "pos5", "margin", 
                "winticket", "plcticket", "oddon", "oddbr", "oddfn", "d_win1", "d_win2", 
                "d_place1", "d_place2", "d_place3", "d_place4", "secttime1", "secttime2", 
                "secttime3", "secttime4", "secttime5", "secttime6", "d_quin", "bodyweight" 
            }; 
        HSSFRow headRow = sheet.createRow((short) count); 
        for (int j = 0; j < head.length; j++) { 
            HSSFCell cell = headRow.createCell((short) j); 
            cell.setCellValue(head[j]); 
        } 
        count++; 
         
		for (int n=0;n 1) { 
				String str = nl.elementAt(1).getParent().toPlainTextString(); 
				String[] temp = str.split(","); 
				if (temp.length > 2) { 
					G = temp[2].trim(); 
					if ("沙田".equals(G)) { 
						G = "田"; 
					} else if ("跑馬地".equals(G)) { 
						G = "谷"; 
					} 
				} 
			} 
			if (nl.size() > 2) { 
				String str = nl.elementAt(2).getParent().toPlainTextString(); 
				String[] temp = str.split(","); 
				if (temp.length > 1) { 
					H = temp[1].trim(); 
					H = H.replaceAll("\"", ""); 
				} 
				if (temp.length > 2) { 
					F = temp[2].trim(); 
					int idx = F.indexOf("米"); 
					if (idx > -1) F = F.substring(0, idx); 
				} 
			} 
			if (nl.size() > 4) { 
				String str = nl.elementAt(4).getParent().toPlainTextString(); 
				int beginIndex = str.lastIndexOf("第"); 
				int endIndex = str.lastIndexOf("班"); 
				if (beginIndex > -1 && endIndex > -1)  
					I = str.substring(beginIndex+1, endIndex); 
			} 
			parser.setInputHTML(html); 
			nl = parser.extractAllNodesThatMatch(FontSize2FaceHKSCSFilter); 
//				for (int i=0;i 0) { 
				String str = nl.elementAt(0).getParent().toPlainTextString(); 
				String[] temp = str.split(" "); 
				if (temp.length > 1) { 
					E = temp[1].trim(); 
				} 
				if (temp.length > 4) { 
					K = temp[4].trim(); 
					K = client.toChinese(K); 
				} 
			} 
			 
			System.out.println("D="+D+" E="+E+" F="+F+" G="+G+" H="+H+" I="+I+" J="+J+" K="+K); 
			 
			parser.setInputHTML(html); 
			nl = parser.extractAllNodesThatMatch(ScriptFilter); 
//				for (int i=0;i 0) { 
				String str = nl.elementAt(0).getParent().toPlainTextString(); 
				int startIndex = str.indexOf("ColPos = new Array(30) ;") + "ColPos = new Array(30) ;".length(); 
				int endIndex = str.indexOf("var PageName"); 
				str = str.substring(startIndex + 1, endIndex); 
				String[] arr = str.split("\n"); 
				if (arr.length > 2) { 
					for (int i=2;i -1 && endIndex > -1) 
							offset = U.substring(startIndex + 1, endIndex); 
						if (offset != null) { 
							try {Q = "" + (Integer.parseInt(Q) + Integer.parseInt(offset));}catch(Exception e){} 
						} 
						 
						V = client.toChinese(cols[15]).trim(); 
						W = cols[1]; 
						System.out.println("L="+L+" M="+M+" N="+N+" O="+O+" P="+P+" Q="+Q+" U="+U+" V="+V+" W="+W); 
 
			    		List cells = new ArrayList(); 
			    		for (int m=0;m<50;m++) cells.add(null); 
			    		cells.set(3, D); 
			    		cells.set(4, E); 
			    		cells.set(5, F); 
			    		cells.set(6, G); 
			    		cells.set(7, H); 
			    		cells.set(8, I); 
			    		cells.set(9, J); 
			    		cells.set(10, K); 
			    		cells.set(11, L); 
			    		cells.set(12, M); 
			    		cells.set(13, N); 
			    		cells.set(14, O); 
			    		cells.set(15, P); 
			    		cells.set(16, Q); 
			    		cells.set(19, T); 
			    		cells.set(20, U); 
			    		cells.set(21, V); 
			    		cells.set(22, W); 
			    		 
			    		HSSFRow row = sheet.createRow((short) count); 
			    		setRow(wb, row, cells); 
						count++; 
						 
						hourseCount++; 
					} 
				} 
			} 
			//T列 
			parser.setInputHTML(html); 
			nl = parser.extractAllNodesThatMatch(reversedFilter); 
			if (nl.size() > 0) { 
//				for (int i=0;i reversed = new ArrayList(); 
				NodeList children = nl.elementAt(0).getParent().getParent().getParent().getChildren(); 
				for (int i=0;i 2) { 
					for (int i=2;i cells = new ArrayList(); 
			    		for (int m=0;m<50;m++) cells.add(null); 
			    		cells.set(3, D); 
			    		cells.set(4, E); 
			    		cells.set(5, F); 
			    		cells.set(6, G); 
			    		cells.set(7, H); 
			    		cells.set(8, I); 
			    		cells.set(9, J); 
			    		cells.set(10, K); 
			    		cells.set(11, Cols.elementAt(1).toPlainTextString().trim()); 
			    		cells.set(12, Cols.elementAt(5).toPlainTextString().trim()); 
			    		//cells.set(13, N); 
			    		//cells.set(14, O); 
			    		cells.set(15, Cols.elementAt(4).toPlainTextString().trim()); 
			    		cells.set(16, Cols.elementAt(3).toPlainTextString().trim()); 
			    		cells.set(19, Cols.elementAt(0).toPlainTextString().trim()); 
			    		//cells.set(20, U); 
			    		cells.set(21, Cols.elementAt(7).toPlainTextString().trim()); 
			    		cells.set(22, ""+hourseCount); 
			    		HSSFRow row = sheet.createRow((short) count); 
			    		setRow(wb, row, cells); 
						count++; 
						hourseCount++; 
					} 
				} 
			} 
		} 
		 
		FileOutputStream fileOut = new FileOutputStream(file); 
        wb.write(fileOut); 
        fileOut.close(); 
		System.out.println("导出排位表完毕"); 
	} 
	 
	private void setRow(HSSFWorkbook wb, HSSFRow row, List cells) { 
		HttpUtil util = new HttpUtil(); 
		for (int i=0;i 0) { 
                    try { 
                        cell.setCellValue(Double.parseDouble(temp)); 
                    } catch (Exception e) { 
                        //System.out.println("出错列:" + i); 
                        //e.printStackTrace(); 
                    	cell.setEncoding(HSSFCell.ENCODING_UTF_16); 
                        cell.setCellValue(temp); 
                    } 
            	} 
                break; 
            case 48: 
            	if (temp != null && temp.trim().length() > 0) { 
            		temp = util.toChinese(temp); 
                    try { 
                        cell.setCellValue(Double.parseDouble(temp)); 
                    } catch (Exception e) { 
                        //System.out.println("出错列:" + i); 
                        //e.printStackTrace(); 
                    	cell.setEncoding(HSSFCell.ENCODING_UTF_16); 
                        cell.setCellValue(temp); 
                    } 
            	} 
                break; 
            case 6: 
            case 10: 
            case 11: 
            case 20: 
            case 21: 
            	if (temp != null && temp.trim().length() > 0) { 
            		temp = util.toChinese(temp); 
            		if (i == 20) { 
            			String[] arr = temp.split(" "); 
            			if (arr.length > 0) temp = arr[0]; 
            		} 
            		cell.setEncoding(HSSFCell.ENCODING_UTF_16); 
                    cell.setCellValue(temp); 
            	} 
            	break; 
            case 3: 
            	if (temp != null && temp.trim().length() > 0) { 
            		try { 
            			temp = temp.substring(0,4)+"-"+temp.substring(4,6)+"-"+temp.substring(6,8); 
            			Date d = new SimpleDateFormat("yyyy-MM-dd").parse(temp); 
            			 
            			HSSFCellStyle cellStyle = wb.createCellStyle(); 
            		    cellStyle.setDataFormat(HSSFDataFormat.getBuiltinFormat("d-mmm-yy")); 
            		    cell.setCellValue(d); 
            		    cell.setCellStyle(cellStyle); 
            		} catch (Exception e) { 
                        //System.out.println("出错列:" + i); 
                        //e.printStackTrace(); 
            			cell.setEncoding(HSSFCell.ENCODING_UTF_16); 
                        cell.setCellValue(temp); 
                    } 
            	} 
            	break; 
            default: 
            	if (temp != null && temp.trim().length() > 0) { 
	                cell.setEncoding(HSSFCell.ENCODING_UTF_16); 
	                cell.setCellValue(temp); 
            	} 
			} 
		} 
	} 
	 
	private NodeFilter Table3Filter = new NodeFilter() { 
		public boolean accept(Node node) { 
            if (node instanceof Tag) { 
                Tag tag = (Tag) node; 
                if (tag instanceof TableTag && "Table3".equals(tag.getAttribute("id"))) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
    private NodeFilter TdNowrapCenterFilter = new NodeFilter() { 
		public boolean accept(Node node) { 
            if (node instanceof Tag) { 
                Tag tag = (Tag) node; 
                if (tag instanceof TableColumn && "middle".equals(tag.getAttribute("align"))) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
     
    private NodeFilter FontSize2FaceAHSFilter = new NodeFilter() { 
		public boolean accept(Node node) { 
            if (node instanceof Tag) { 
                Tag tag = (Tag) node; 
                if ("font".equals(tag.getTagName().toLowerCase()) && "Arial, Helvetica, sans-serif".equals(tag.getAttribute("face")) && "2".equals(tag.getAttribute("size")) && tag.getAttribute("color") == null) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
    private NodeFilter FontSize2FaceHKSCSFilter = new NodeFilter() { 
		public boolean accept(Node node) { 
            if (node instanceof Tag) { 
                Tag tag = (Tag) node; 
                if ("font".equals(tag.getTagName().toLowerCase()) && "細明體_HKSCS,Arial, Helvetica, sans-serif".equals(tag.getAttribute("face")) && "2".equals(tag.getAttribute("size")) && tag.getAttribute("color") == null) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
    private NodeFilter ScriptFilter = new NodeFilter() { 
		public boolean accept(Node node) { 
            if (node instanceof ScriptTag) { 
            	Tag tag = (Tag) node; 
                if ("javascript".equals(tag.getAttribute("language")) && tag.getAttribute("src") == null) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
    private NodeFilter reversedFilter = new NodeFilter() { 
		public boolean accept(Node node) { 
            if (node instanceof Tag) { 
            	Tag tag = (Tag) node; 
                if ("b".equals(tag.getTagName().toLowerCase()) && tag.getParent().toPlainTextString().contains("後 備 馬 匹")) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
     
    public static void main(String[] args) throws Exception { 
    	GetStarters get = new GetStarters(); 
    	HttpUtil client = new HttpUtil(); 
    	get.UpdateStarters(client, new File("C:\\pre-race.xls")); 
    } 
}