www.pudn.com > HKJC2.rar > GetResults.java


package hkjc2.logic; 
 
import java.io.File; 
import java.io.FileOutputStream; 
import java.text.SimpleDateFormat; 
import java.util.ArrayList; 
import java.util.Date; 
import java.util.List; 
 
import org.apache.poi.hssf.usermodel.HSSFCell; 
import org.apache.poi.hssf.usermodel.HSSFCellStyle; 
import org.apache.poi.hssf.usermodel.HSSFDataFormat; 
import org.apache.poi.hssf.usermodel.HSSFRow; 
import org.apache.poi.hssf.usermodel.HSSFSheet; 
import org.apache.poi.hssf.usermodel.HSSFWorkbook; 
import org.htmlparser.Node; 
import org.htmlparser.NodeFilter; 
import org.htmlparser.Parser; 
import org.htmlparser.Tag; 
import org.htmlparser.tags.TableColumn; 
import org.htmlparser.tags.TableRow; 
import org.htmlparser.tags.TableTag; 
import org.htmlparser.util.NodeList; 
import org.htmlparser.util.ParserException; 
 
import com.singularsys.jep.Jep; 
 
public class GetResults { 
 
	/** 
	 * 生成EXCEL 
	 * @param client 
	 * @param racedate 
	 * @param file 
	 * @throws Exception  
	 */ 
	public void genExcel(HttpUtil client, String racedate, File file) throws Exception { 
		String[] te = racedate.split("/"); 
		String rd = te[2]+te[1]+te[0]; 
		String D = rd; 
		String url = "http://www.hkjc.com/chinese/racing/results.asp?racedate="+racedate; 
		String html = client.getHTML(url); 
		if (html == null) { 
			return; 
		} 
		Parser parser = new Parser(); 
		parser.setInputHTML(html); 
		 
		int pageCount = 0; 
		NodeList nl = parser.extractAllNodesThatMatch(PageTableFilter); 
        if (nl.size() > 0) { 
        	TableTag Table3 = (TableTag)nl.elementAt(0); 
        	String Table3Html = Table3.toHtml(); 
        	parser.setInputHTML(Table3Html); 
            nl = parser.extractAllNodesThatMatch(TdNowrapCenterFilter); 
            for (int i=0;i 0) { 
        	String tableHtml = nl.elementAt(0).toHtml(); 
        	parser.setInputHTML(tableHtml); 
        	NodeList nl1 = parser.extractAllNodesThatMatch(TdFilter); 
//        	for (int i=0;i 0) { 
        		String str = nl1.elementAt(0).toPlainTextString(); 
				int beginIndex = str.indexOf("("); 
				int endIndex = str.indexOf(")"); 
				if (beginIndex > -1 && endIndex > -1) 
					C = str.substring(beginIndex+1, endIndex); 
        	} 
        } 
         
        String G = null; 
        parser.setInputHTML(html); 
        nl = parser.extractAllNodesThatMatch(fontgeneral_textFilter); 
        if (nl.size() > 0) { 
        	String str = nl.elementAt(0).getParent().toPlainTextString(); 
        	String[] arr = str.trim().split("\n"); 
        	if (arr.length > 0) { 
        		G = arr[arr.length-1].trim(); 
        		if ("沙田".equals(G)) { 
					G = "田"; 
				} else if ("跑馬地".equals(G)) { 
					G = "谷"; 
				} 
        	} 
        } 
         
        HSSFWorkbook wb = new HSSFWorkbook(); 
        HSSFSheet sheet = wb.createSheet(); 
        int count=0; 
        String[] head = new String[]{"season", "meetingno", "raceinyr", 
                "date", "raceno", "distance", "course", "track", "raceclass", 
                "going", "cup", "horse", "age", "drawing", "brandno", "rating", 
                "netload", "updn", "bleeding", "reserve", "jockey", "stable", "fp", 
                "time", "wintime", "pos1", "pos2", "pos3", "pos4", "pos5", "margin", 
                "winticket", "plcticket", "oddon", "oddbr", "oddfn", "d_win1", "d_win2", 
                "d_place1", "d_place2", "d_place3", "d_place4", "secttime1", "secttime2", 
                "secttime3", "secttime4", "secttime5", "secttime6", "d_quin", "bodyweight" 
            }; 
        HSSFRow headRow = sheet.createRow((short) count); 
        for (int j = 0; j < head.length; j++) { 
            HSSFCell cell = headRow.createCell((short) j); 
            cell.setCellValue(head[j]); 
        } 
        count++; 
         
		//读取指定日期的网页 
		for (int n=0;n 0) { 
	        		String str = nl1.elementAt(0).toPlainTextString(); 
					int beginIndex = str.indexOf("("); 
					int endIndex = str.indexOf(")"); 
					if (beginIndex > -1 && endIndex > -1) 
						C = str.substring(beginIndex+1, endIndex); 
					 
					beginIndex = str.indexOf("第"); 
					endIndex = str.indexOf("場"); 
					if (beginIndex > -1 && endIndex > -1) 
						E = str.substring(beginIndex+1, endIndex).replaceAll(" ", "").trim(); 
	        	} 
	        	if (nl1.size() > 2) { 
	        		String str = nl1.elementAt(2).toPlainTextString(); 
					int beginIndex = str.indexOf("第"); 
					int endIndex = str.indexOf("班"); 
					if (beginIndex > -1 && endIndex > -1) 
						I = str.substring(beginIndex+1, endIndex); 
					 
					beginIndex = str.indexOf("-"); 
					endIndex = str.indexOf("米"); 
					if (beginIndex > -1 && endIndex > -1) 
						F = str.substring(beginIndex+1, endIndex).replaceAll(" ", "").replaceAll(";", "").trim(); 
	        	} 
	        	if (nl1.size() > 3) { 
	        		String str = nl1.elementAt(3).toPlainTextString(); 
	        		str = str.replaceAll("\n", ""); 
	        		str = str.replaceAll("\r", ""); 
					K = client.toChinese(str).trim(); 
	        	} 
	        	if (nl1.size() > 8) { 
	        		String str = nl1.elementAt(8).toPlainTextString(); 
	        		str = str.replaceAll("\n", ""); 
	        		str = str.replaceAll("\r", ""); 
					J = client.toChinese(str).trim(); 
	        	} 
	        	if (nl1.size() > 10) { 
	        		String str = nl1.elementAt(10).toPlainTextString(); 
	        		int beginIndex = str.indexOf("\""); 
					int endIndex = str.lastIndexOf("\""); 
					if (beginIndex > -1 && endIndex > -1) 
						H = str.substring(beginIndex+1, endIndex); 
	        	} 
	        	for (int i=0;i AQV = new ArrayList(); 
	        			for (int j=i+1;j 0) AV = AQV.get(AQV.size() - 1); 
        				if (AQV.size() > 1) AU = AQV.get(AQV.size() - 2); 
        				if (AQV.size() > 2) AT = AQV.get(AQV.size() - 3); 
        				if (AQV.size() > 3) AS = AQV.get(AQV.size() - 4); 
        				if (AQV.size() > 4) AR = AQV.get(AQV.size() - 5); 
        				if (AQV.size() > 5) AQ = AQV.get(AQV.size() - 6); 
	        			break; 
	        		} 
	        	} 
	        	 
	        	parser.setInputHTML(html); 
	            nl = parser.extractAllNodesThatMatch(Table450S1P0B0Filter); 
	            if (nl.size() > 0) { 
	            	String Table450S1P0B0HTML = nl.elementAt(0).toHtml(); 
	            	parser.setInputHTML(Table450S1P0B0HTML); 
	            	nl = parser.extractAllNodesThatMatch(font2ArialFilter); 
//		            	for (int i=0;i 2) AK = nl.elementAt(2).getParent().toPlainTextString(); 
	            	if (nl.size() > 5) AM = nl.elementAt(5).getParent().toPlainTextString(); 
	            	if (nl.size() > 7) AN = nl.elementAt(7).getParent().toPlainTextString(); 
	            	if (nl.size() > 9) AO = nl.elementAt(9).getParent().toPlainTextString(); 
	            	if (nl.size() > 12) AW = nl.elementAt(12).getParent().toPlainTextString(); 
	            } 
	            System.out.println("C="+C+" D="+rd+" E="+E+" F="+F+" G="+G+" H="+H+" I="+I+" J="+J+" AQ="+AQ+" AR="+AR+" AS="+AS+" AT="+AT+" AU="+AU+" AV="+AV 
	            		+" AK="+AK+" AM="+AM+" AN="+AN+" AO="+AO+" AW="+AW); 
	             
	        	parser.setInputHTML(html); 
	        	NodeList nlList = parser.extractAllNodesThatMatch(ListTableFilter); 
	        	if (nlList.size() > 0) { 
	        		NodeList Rows = null; 
	                String ListHtml = nlList.elementAt(0).toHtml(); 
                    parser.setInputHTML(ListHtml); 
                    Rows = parser.extractAllNodesThatMatch(TrFilter); 
                    String Y = null; 
	                for (int i = 1; i < Rows.size(); i++) { 
	                	String L = null; 
	                	String N = null; 
	                	String O = null; 
	                	String Q = null; 
	                	String U = null; 
	                	String V = null; 
	                	String W = null; 
	                	String X = null; 
	                	String AE = null; 
	                	String AJ = null; 
	                	String AX = null; 
	                    Node node = Rows.elementAt(i); 
	                    String RowHtml = node.toHtml(); 
	                    parser.setInputHTML(RowHtml); 
	                    try { 
	                    	NodeList Cells = parser.extractAllNodesThatMatch(TdFilter); 
	                    	for (int j=0;j 5) Q = Cells.elementAt(5).toPlainTextString(); 
	                    		if (Cells.size() > 0) W = Cells.elementAt(0).toPlainTextString(); 
	                    		if (Cells.size() > 3) U = client.toChinese(Cells.elementAt(3).toPlainTextString()); 
	                    		if (Cells.size() > 4) V = client.toChinese(Cells.elementAt(4).toPlainTextString()); 
	                    		if (Cells.size() > 9) X = Cells.elementAt(9).toPlainTextString(); 
	                    		if (i == 1) Y = X; 
	                    		if (Cells.size() > 7) N = Cells.elementAt(7).toPlainTextString(); 
	                    		if (Cells.size() > 8) { 
	                    			AE = Cells.elementAt(8).toPlainTextString().replaceAll(" ", "").trim(); 
	                    			if ("-".equals(AE)) { 
	                    				AE = "0"; 
	                    			} else if ("短馬頭位".equals(AE)) { 
	                    				AE = "0.1"; 
	                    			} else if ("一頭位".equals(AE)) { 
	                    				AE = "0.2"; 
	                    			} else if ("頸位".equals(AE)) { 
	                    				AE = "0.5"; 
	                    			} else if ("多個馬位".equals(AE)) { 
	                    				AE = "99"; 
	                    			} 
	                    		} 
	                    		if (Cells.size() > 10) AJ = Cells.elementAt(10).toPlainTextString(); 
	                    		if (Cells.size() > 6) AX = Cells.elementAt(6).toPlainTextString(); 
	                    		if (Cells.size() > 2) { 
	                    			O = Cells.elementAt(2).toPlainTextString(); 
	                    			int beginIndex = O.indexOf("("); 
	                    			if (beginIndex > -1) 
	                    				L = client.toChinese(O.substring(0, beginIndex)); 
									int endIndex = O.indexOf(")"); 
									if (beginIndex > -1 && endIndex > -1) 
										O = O.substring(beginIndex+1, endIndex); 
	                    		} 
	                    		System.out.println(" L="+L+" N="+N+" O="+O+" U="+U+" V="+V+"W="+W+" X="+X+" Y="+Y+" AE="+AE+" AJ="+AJ+" AX="+AX); 
	                    	} 
	                    } catch (ParserException e) { 
	                        e.printStackTrace(); 
	                    } 
	                     
	                    List cells = new ArrayList(); 
			    		for (int m=0;m<50;m++) cells.add(null); 
			    		cells.set(2, C); 
			    		cells.set(3, D); 
			    		cells.set(4, E); 
			    		cells.set(5, F); 
			    		cells.set(6, G); 
			    		cells.set(7, H); 
			    		cells.set(8, I); 
			    		cells.set(9, J); 
			    		cells.set(10, K); 
			    		cells.set(11, L); 
			    		cells.set(13, N); 
			    		cells.set(14, O); 
			    		cells.set(16, Q); 
			    		cells.set(20, U); 
			    		cells.set(21, V); 
			    		cells.set(22, W); 
			    		cells.set(23, X); 
			    		cells.set(24, Y); 
			    		cells.set(30, AE); 
			    		cells.set(35, AJ); 
			    		cells.set(36, AK); 
			    		cells.set(38, AM); 
			    		cells.set(39, AN); 
			    		cells.set(40, AO); 
			    		cells.set(42, AQ); 
			    		cells.set(43, AR); 
			    		cells.set(44, AS); 
			    		cells.set(45, AT); 
			    		cells.set(46, AU); 
			    		cells.set(47, AV); 
			    		cells.set(48, AW); 
			    		cells.set(49, AX); 
			    		rows.add(cells); 
	                } 
	        	} 
	        } 
	         
	        //如果有分段时间及位置,读取分段时间及位置 
        	String secHtml = client.getHTML("http://www.hkjc.com/chinese/racing/display_sectionaltime.asp?RaceDate="+racedate+"&Raceno="+(n+1)); 
//	    		System.out.println(srcHtml); 
    		if (secHtml == null) continue; 
    		 
    		parser.setInputHTML(secHtml); 
        	NodeList nlSecTable = parser.extractAllNodesThatMatch(SecTableFilter); 
//	        	for (int i=0;i 0) { 
        		TableTag table = (TableTag)nlSecTable.elementAt(0); 
        		List Rows = new ArrayList(); 
        		for (int i=0;i 3) { 
        			for (int i=3;i ZAE = new ArrayList(); 
        				//System.out.println(i + ": " + Rows.get(i).toHtml()); 
        				NodeList Cells = Rows.get(i).getChildren(); 
//	        				for (int j=0;j 5) { 
        					O = Cells.elementAt(5).toPlainTextString().trim(); 
        					int beginIndex = O.indexOf("("); 
							int endIndex = O.indexOf(")"); 
							if (beginIndex > -1 && endIndex > -1) 
								O = O.substring(beginIndex+1, endIndex); 
        				} 
        				for (int m=7;m 0) str = temp[0]; 
        					str = str.replace("\n", ""); 
        					str = str.replace("\r", ""); 
        					str = str.trim(); 
        					if (str.length() > 0) ZAE.add(str); 
        				} 
        				if (ZAE.size() > 0) ZAE.remove(ZAE.size() - 1); 
        				if (ZAE.size() > 0) AD = ZAE.get(ZAE.size() - 1); 
        				if (ZAE.size() > 1) AC = ZAE.get(ZAE.size() - 2); 
        				if (ZAE.size() > 2) AB = ZAE.get(ZAE.size() - 3); 
        				if (ZAE.size() > 3) AA = ZAE.get(ZAE.size() - 4); 
        				if (ZAE.size() > 4) Z = ZAE.get(ZAE.size() - 5); 
        				 
        				System.out.println("O="+O+" Z="+Z+" AA="+AA+" AB="+AB+" AC="+AC+" AD="+AD); 
        				//根据日期和O列判断 
        				 
        				if (O != null) { 
	        				for (int j=0;j cells = rows.get(j); 
	        					if (O.equals(cells.get(14))) { 
	        						cells.set(25, Z); 
	        						cells.set(26, AA); 
	        						cells.set(27, AB); 
	        						cells.set(28, AC); 
	        						cells.set(29, AD); 
	        						break; 
	        					} 
	        				} 
        				} 
        			} 
        		} 
        	} 
        	 
        	for (int i=0;i cells = rows.get(i); 
        		setRow(wb, row, cells); 
        		count++; 
        	} 
		} 
		 
		FileOutputStream fileOut = new FileOutputStream(file); 
        wb.write(fileOut); 
        fileOut.close(); 
		System.out.println("导出赛果及派彩完毕"); 
	} 
	 
	private void setRow(HSSFWorkbook wb, HSSFRow row, List cells) { 
		HttpUtil util = new HttpUtil(); 
		for (int i=0;i 0) { 
                    try { 
                        cell.setCellValue(Double.parseDouble(temp)); 
                    } catch (Exception e) { 
                        //System.out.println("出错列:" + i); 
                        //e.printStackTrace(); 
                    	cell.setEncoding(HSSFCell.ENCODING_UTF_16); 
                        cell.setCellValue(temp); 
                    } 
            	} 
                break; 
            case 48: 
            	if (temp != null && temp.trim().length() > 0) { 
            		temp = util.toChinese(temp); 
                    try { 
                        cell.setCellValue(Double.parseDouble(temp)); 
                    } catch (Exception e) { 
                        //System.out.println("出错列:" + i); 
                        //e.printStackTrace(); 
                    	cell.setEncoding(HSSFCell.ENCODING_UTF_16); 
                        cell.setCellValue(temp); 
                    } 
            	} 
                break; 
            case 6: 
            case 10: 
            case 11: 
            case 20: 
            case 21: 
            	if (temp != null && temp.trim().length() > 0) { 
            		temp = util.toChinese(temp); 
            		if (i == 20) { 
            			String[] arr = temp.split(" "); 
            			if (arr.length > 0) temp = arr[0]; 
            		} 
            		cell.setEncoding(HSSFCell.ENCODING_UTF_16); 
                    cell.setCellValue(temp); 
            	} 
            	break; 
            case 3: 
            	if (temp != null && temp.trim().length() > 0) { 
            		try { 
            			temp = temp.substring(0,4)+"-"+temp.substring(4,6)+"-"+temp.substring(6,8); 
            			Date d = new SimpleDateFormat("yyyy-MM-dd").parse(temp); 
            			 
            			HSSFCellStyle cellStyle = wb.createCellStyle(); 
            		    cellStyle.setDataFormat(HSSFDataFormat.getBuiltinFormat("d-mmm-yy")); 
            		    cell.setCellValue(d); 
            		    cell.setCellStyle(cellStyle); 
            		} catch (Exception e) { 
                        //System.out.println("出错列:" + i); 
                        //e.printStackTrace(); 
            			cell.setEncoding(HSSFCell.ENCODING_UTF_16); 
                        cell.setCellValue(temp); 
                    } 
            	} 
            	break; 
            case 30: 
            	if (temp != null && temp.trim().length() > 0) { 
            		try { 
            			temp = temp.replaceAll("-", "+"); 
            			Jep jep = new Jep(); 
            			jep.parse(temp); 
            			temp = jep.evaluate().toString(); 
            		} catch (Exception e) { 
                        //System.out.println("出错列:" + i); 
                        //e.printStackTrace(); 
            			temp = temp.replace("+", "-"); 
                    } 
            		try { 
                        cell.setCellValue(Double.parseDouble(temp)); 
                    } catch (Exception e) { 
                        //System.out.println("出错列:" + i); 
                        //e.printStackTrace(); 
                    	cell.setEncoding(HSSFCell.ENCODING_UTF_16); 
                        cell.setCellValue(temp); 
                    } 
            	} 
            	break; 
            default: 
            	if (temp != null && temp.trim().length() > 0) { 
	                cell.setEncoding(HSSFCell.ENCODING_UTF_16); 
	                cell.setCellValue(temp); 
            	} 
			} 
		} 
	} 
 
	private NodeFilter PageTableFilter = new NodeFilter() { 
		public boolean accept(Node node) { 
            if (node instanceof Tag) { 
                Tag tag = (Tag) node; 
                if (tag instanceof TableTag  
                		&& "0".equals(tag.getAttribute("border")) 
                		&& "1".equals(tag.getAttribute("cellpadding")) 
                		&& "1".equals(tag.getAttribute("cellspacing"))) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
    private NodeFilter TdNowrapCenterFilter = new NodeFilter() { 
		public boolean accept(Node node) { 
            if (node instanceof Tag) { 
                Tag tag = (Tag) node; 
                if (tag instanceof TableColumn  
                		&& "middle".equals(tag.getAttribute("align")) 
                		&& tag.getAttribute("valign") == null) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
    private NodeFilter InfoTableFilter = new NodeFilter() { 
		public boolean accept(Node node) { 
            if (node instanceof Tag) { 
                Tag tag = (Tag) node; 
                if (tag instanceof TableTag  
                		&& "760".equals(tag.getAttribute("width")) 
                		&& "0".equals(tag.getAttribute("border")) 
                		&& "0".equals(tag.getAttribute("cellpadding")) 
                		&& "1".equals(tag.getAttribute("cellspacing"))) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
    private NodeFilter Table450S1P0B0Filter = new NodeFilter() { 
		public boolean accept(Node node) { 
            if (node instanceof Tag) { 
                Tag tag = (Tag) node; 
                if (tag instanceof TableTag  
                		&& "450".equals(tag.getAttribute("width")) 
                		&& "0".equals(tag.getAttribute("border")) 
                		&& "0".equals(tag.getAttribute("cellpadding")) 
                		&& "1".equals(tag.getAttribute("cellspacing"))) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
    private NodeFilter TableFilter = new NodeFilter() { 
		public boolean accept(Node node) { 
            if (node instanceof Tag) { 
                Tag tag = (Tag) node; 
                if (tag instanceof TableTag) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
    private NodeFilter TdFilter = new NodeFilter() { 
		public boolean accept(Node node) { 
            if (node instanceof Tag) { 
                Tag tag = (Tag) node; 
                if (tag instanceof TableColumn) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
    private NodeFilter TrFilter = new NodeFilter() { 
		public boolean accept(Node node) { 
            if (node instanceof Tag) { 
                Tag tag = (Tag) node; 
                if (tag instanceof TableRow) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
    private NodeFilter FontSize2FaceAHSFilter = new NodeFilter() { 
		public boolean accept(Node node) { 
            if (node instanceof Tag) { 
                Tag tag = (Tag) node; 
                if ("font".equals(tag.getTagName().toLowerCase()) && "Arial, Helvetica, sans-serif".equals(tag.getAttribute("face")) && "2".equals(tag.getAttribute("size")) && tag.getAttribute("color") == null) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
    private NodeFilter ListTableFilter = new NodeFilter() { 
 
        public boolean accept(Node node) { 
            if (node instanceof Tag) { 
                Tag tag = (Tag) node; 
                if (tag.getTagName().toUpperCase().equals("TABLE") && "760".equals(tag.getAttribute("width")) && "1".equals(tag.getAttribute("cellspacing")) && "1".equals(tag.getAttribute("cellpadding")) && "0".equals(tag.getAttribute("border"))) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
     
    private NodeFilter font2ArialFilter = new NodeFilter() { 
 
        public boolean accept(Node node) { 
            if (node instanceof Tag) { 
                Tag tag = (Tag) node; 
                if (tag.getTagName().toUpperCase().equals("FONT") && "2".equals(tag.getAttribute("size")) && "Arial".equals(tag.getAttribute("face"))) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
     
    private NodeFilter SecTableFilter = new NodeFilter() { 
 
        public boolean accept(Node node) { 
            if (node instanceof TableTag) { 
                Tag tag = (Tag) node; 
                if ("100%".equals(tag.getAttribute("width")) && "1".equals(tag.getAttribute("cellspacing")) && "0".equals(tag.getAttribute("border"))) { 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
     
    private NodeFilter fontgeneral_textFilter = new NodeFilter() { 
 
        public boolean accept(Node node) { 
            if (node instanceof Tag) { 
                Tag tag = (Tag) node; 
                if (tag.getTagName().toUpperCase().equals("FONT") && "general_text".equals(tag.getAttribute("class"))){ 
                    return true; 
                } 
            } 
            return false; 
        } 
    }; 
     
    public static void main(String[] args) throws Exception { 
    	GetResults get = new GetResults(); 
    	HttpUtil client = new HttpUtil(); 
    	get.genExcel(client, "22/06/2008", new File("C:\\post-race.xls")); 
    } 
}