www.pudn.com > datamining.rar > L.java
//package cn.edu.tsinghua.ss.liuhongxin;
import java.util.ArrayList;
//import cn.edu.tsinghua.ss.liuhongxin.C;
//import cn.edu.tsinghua.ss.liuhongxin.Item;
/**
* 频繁k-项集
*/
public class L {
/**
* 项的组
*/
private Item[] items;
/**
* 阶数k
*/
private int k;
public L() {
}
public L(Item[] items, int k) {
this.items = items;
this.k = k;
}
/**
* 生成L对应的下一阶c,即由L(k)得到c(k+1)
*/
public C aprioriGen() {
ArrayList cItems = new ArrayList();
for (int i = 0; i < items.length; i++) {
// j > i, avoid reduplication.
for (int j = i + 1; j < items.length; j++) {
if (items[i].similarTo(items[j])) {
Item c = items[i].join(items[j]);
if (!hasInfrequentSubset(c)) {
cItems.add(c);
}
}
}
}
if (cItems.size() == 0) {
return new C(null, k + 1);
} else {
Item[] theCItems = new Item[cItems.size()];
for (int i = 0; i < cItems.size(); i++) {
theCItems[i] = (Item) cItems.get(i);
}
return new C(theCItems, k + 1);
}
}
/**
* 判断c中是否存在非频繁子项,在判断是否进行剪裁时使用。
*/
private boolean hasInfrequentSubset(Item c) {
for (int i = 0; i < k; i++) {
ArrayList cSubSet = c.getSubSet(i + 1);
for (int j = 0; j < cSubSet.size(); j++) {
boolean flag = false;
for (int l = 0; l < items.length; l++) {
if (items[l].getElements().containsAll((ArrayList) cSubSet.get(j))) {
flag = true;
break;
}
}
if (!flag) {
return true;
}
}
}
return false;
}
public int getK() {
return k;
}
public boolean isEmpty() {
if (items == null || items.length == 0) {
return true;
} else {
return false;
}
}
public String toString() {
StringBuffer returnValue = new StringBuffer("\nFrequent " + k + "-Itemsets:\n");
if (items == null || items.length == 0) {
return returnValue.append("EMPTY!").toString();
}
for (int i = 0; i < items.length; i++) {
returnValue.append(items[i].toString() + ",\n");
}
return returnValue.substring(0, returnValue.length() - 2) + ".";
}
}