www.pudn.com > datamining.rar > Apriori.java


//package cn.edu.tsinghua.ss.liuhongxin; 
 
import java.util.Properties; 
import java.util.LinkedList; 
import java.util.ArrayList; 
import java.util.Arrays; 
import java.util.Iterator; 
import java.util.regex.Pattern; 
import java.io.File; 
import java.io.FileInputStream; 
import java.io.FileNotFoundException; 
import java.io.IOException; 
import java.io.FileReader; 
import java.io.BufferedReader; 
 
//import cn.edu.tsinghua.ss.liuhongxin.C; 
//import cn.edu.tsinghua.ss.liuhongxin.L; 
//import cn.edu.tsinghua.ss.liuhongxin.Ls; 
 
// Apriori算法 
 public class Apriori { 
   
  private double minimumSupport; 
   
  //   * 原始数据d,该算法中是由文件读得 
    
  private LinkedList d; 
  private int totalTransCount; 
    //   * 初始化数据 
   //* @param filename 配置文件的文件名 
    
  public Apriori(String filename) throws FileNotFoundException, IOException { 
    try { 
      Properties properties = new Properties(); 
      properties.load(new FileInputStream(new File(filename))); 
      this.minimumSupport = Double.parseDouble(properties.getProperty("min_sup")); 
      String data_file = properties.getProperty("data_file"); 
      BufferedReader br = new BufferedReader(new FileReader(new File(data_file))); 
     // 读配置文件config.property,得到最小支持度阈值和数据文件的文件名 
       
      d = new LinkedList(); 
      String TransID = ""; 
      ArrayList elements = new ArrayList(); 
       
      String fileLine = null; 
      String[] result = null; 
      String[] triSet = new String[3];  // triSet contains CustID, TransID, and Item of data file. 
      while ((fileLine = br.readLine()) != null) { 
        int count = 0; 
        result = Pattern.compile(" ").split(fileLine); 
        for (int i = 0; i < result.length; i++) { 
          if (result[i] != null && !"".equals(result[i])) { 
            triSet[count++] = result[i]; 
          } 
        } 
        if (!TransID.equals(triSet[1])) { 
          if (!elements.isEmpty()) { 
            String[] elementsArray = new String[elements.size()]; 
            for (int i = 0; i < elements.size(); i++) { 
              elementsArray[i] = (String) elements.get(i); 
            } 
            Arrays.sort(elementsArray); 
            ArrayList newElements = new ArrayList(); 
            for (int i = 0; i < elementsArray.length; i++) { 
              newElements.add(i, elementsArray[i]); 
            } 
            d.add(newElements.clone()); 
            elements.clear(); 
          } 
          TransID = triSet[1]; 
        } 
        elements.add(triSet[2]); 
      } 
      String[] elementsArray = new String[elements.size()]; 
      for (int i = 0; i < elements.size(); i++) { 
        elementsArray[i] = (String) elements.get(i); 
      } 
      Arrays.sort(elementsArray); 
      ArrayList newElements = new ArrayList(); 
      for (int i = 0; i < elementsArray.length; i++) { 
        newElements.add(i, elementsArray[i]); 
      } 
       
      d.add(newElements); 
      totalTransCount = d.size(); 
       
    } catch (FileNotFoundException ex) { 
      ex.printStackTrace(); 
      throw ex; 
    } catch (IOException ex) { 
      ex.printStackTrace(); 
      throw ex; 
    } 
  } 
 
  /** 
   * 返回所有l 
   */ 
  public Ls getLs() { 
    Ls ls = new Ls(); 
    L l1 = findFrequent1Itemsets(); 
    ls.put(l1); 
     
    for (int k = 2; !ls.get(k - 1).isEmpty(); k++) { 
      C c = ls.get(k - 1).aprioriGen(); 
      Item[] items = c.getItems(); 
      if (c.isEmpty()) { 
        break; 
      } 
      for (int i = 0; i < items.length; i++) { 
        int count = 0; 
        for (Iterator iterator = d.iterator(); iterator.hasNext(); ) { 
          ArrayList elements = (ArrayList) iterator.next(); 
          if (elements.containsAll(items[i].getElements())) { 
            count++; 
          } 
        } 
        items[i].setSupport((double) count / totalTransCount); 
      } 
      ls.put(c.getL(minimumSupport)); 
    } 
    return ls; 
  } 
 
  /** 
   * 返回频繁1-项集 
   */ 
  private L findFrequent1Itemsets() { 
    ArrayList total1Itemsets = new ArrayList(); 
    for (int i = 0; i < d.size(); i++) { 
      total1Itemsets.addAll((ArrayList) d.get(i)); 
    } 
    ArrayList items = new ArrayList(); 
     
    while (!total1Itemsets.isEmpty()) { 
      String item = (String) total1Itemsets.get(0); 
      int count = 0; 
      int j = 0; 
      while(j < total1Itemsets.size()) { 
        if (item.equals(total1Itemsets.get(j))) { 
          count++; 
          total1Itemsets.remove(item); 
         // Removes the first occurrence of an item from the list 
        } else { 
          j++; 
        } 
      } 
      double support = (double) count / totalTransCount; 
      if (support > minimumSupport) { 
        items.add(new Item(item, support)); 
      } 
    } 
    Item[] theItems = new Item[items.size()]; 
    for (int i = 0; i < items.size(); i++) { 
      theItems[i] = (Item) items.get(i); 
    } 
    return new L(theItems, 1); 
  } 
 
  public static void main(String[] args) { 
    //if (args.length < 1) { 
     // System.out.println("config.property is required!"); 
     // System.out.println("Please key in: java Apriori config.property"); 
   // } else { 
      try { 
        Apriori apriori = new Apriori("../config.property"); 
        Ls ls = apriori.getLs(); 
        System.out.println(ls); 
      } catch (Exception ex) { 
        System.out.println("Initialization error!"); 
        ex.printStackTrace(); 
      } 
     
  } 
 
}