www.pudn.com > datamining.rar > Apriori.java
//package cn.edu.tsinghua.ss.liuhongxin;
import java.util.Properties;
import java.util.LinkedList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.regex.Pattern;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.FileReader;
import java.io.BufferedReader;
//import cn.edu.tsinghua.ss.liuhongxin.C;
//import cn.edu.tsinghua.ss.liuhongxin.L;
//import cn.edu.tsinghua.ss.liuhongxin.Ls;
// Apriori算法
public class Apriori {
private double minimumSupport;
// * 原始数据d,该算法中是由文件读得
private LinkedList d;
private int totalTransCount;
// * 初始化数据
//* @param filename 配置文件的文件名
public Apriori(String filename) throws FileNotFoundException, IOException {
try {
Properties properties = new Properties();
properties.load(new FileInputStream(new File(filename)));
this.minimumSupport = Double.parseDouble(properties.getProperty("min_sup"));
String data_file = properties.getProperty("data_file");
BufferedReader br = new BufferedReader(new FileReader(new File(data_file)));
// 读配置文件config.property,得到最小支持度阈值和数据文件的文件名
d = new LinkedList();
String TransID = "";
ArrayList elements = new ArrayList();
String fileLine = null;
String[] result = null;
String[] triSet = new String[3]; // triSet contains CustID, TransID, and Item of data file.
while ((fileLine = br.readLine()) != null) {
int count = 0;
result = Pattern.compile(" ").split(fileLine);
for (int i = 0; i < result.length; i++) {
if (result[i] != null && !"".equals(result[i])) {
triSet[count++] = result[i];
}
}
if (!TransID.equals(triSet[1])) {
if (!elements.isEmpty()) {
String[] elementsArray = new String[elements.size()];
for (int i = 0; i < elements.size(); i++) {
elementsArray[i] = (String) elements.get(i);
}
Arrays.sort(elementsArray);
ArrayList newElements = new ArrayList();
for (int i = 0; i < elementsArray.length; i++) {
newElements.add(i, elementsArray[i]);
}
d.add(newElements.clone());
elements.clear();
}
TransID = triSet[1];
}
elements.add(triSet[2]);
}
String[] elementsArray = new String[elements.size()];
for (int i = 0; i < elements.size(); i++) {
elementsArray[i] = (String) elements.get(i);
}
Arrays.sort(elementsArray);
ArrayList newElements = new ArrayList();
for (int i = 0; i < elementsArray.length; i++) {
newElements.add(i, elementsArray[i]);
}
d.add(newElements);
totalTransCount = d.size();
} catch (FileNotFoundException ex) {
ex.printStackTrace();
throw ex;
} catch (IOException ex) {
ex.printStackTrace();
throw ex;
}
}
/**
* 返回所有l
*/
public Ls getLs() {
Ls ls = new Ls();
L l1 = findFrequent1Itemsets();
ls.put(l1);
for (int k = 2; !ls.get(k - 1).isEmpty(); k++) {
C c = ls.get(k - 1).aprioriGen();
Item[] items = c.getItems();
if (c.isEmpty()) {
break;
}
for (int i = 0; i < items.length; i++) {
int count = 0;
for (Iterator iterator = d.iterator(); iterator.hasNext(); ) {
ArrayList elements = (ArrayList) iterator.next();
if (elements.containsAll(items[i].getElements())) {
count++;
}
}
items[i].setSupport((double) count / totalTransCount);
}
ls.put(c.getL(minimumSupport));
}
return ls;
}
/**
* 返回频繁1-项集
*/
private L findFrequent1Itemsets() {
ArrayList total1Itemsets = new ArrayList();
for (int i = 0; i < d.size(); i++) {
total1Itemsets.addAll((ArrayList) d.get(i));
}
ArrayList items = new ArrayList();
while (!total1Itemsets.isEmpty()) {
String item = (String) total1Itemsets.get(0);
int count = 0;
int j = 0;
while(j < total1Itemsets.size()) {
if (item.equals(total1Itemsets.get(j))) {
count++;
total1Itemsets.remove(item);
// Removes the first occurrence of an item from the list
} else {
j++;
}
}
double support = (double) count / totalTransCount;
if (support > minimumSupport) {
items.add(new Item(item, support));
}
}
Item[] theItems = new Item[items.size()];
for (int i = 0; i < items.size(); i++) {
theItems[i] = (Item) items.get(i);
}
return new L(theItems, 1);
}
public static void main(String[] args) {
//if (args.length < 1) {
// System.out.println("config.property is required!");
// System.out.println("Please key in: java Apriori config.property");
// } else {
try {
Apriori apriori = new Apriori("../config.property");
Ls ls = apriori.getLs();
System.out.println(ls);
} catch (Exception ex) {
System.out.println("Initialization error!");
ex.printStackTrace();
}
}
}