www.pudn.com > apriori的vc源代码.zip > apriori.cpp


// Apriori.cpp: implementation of the CApriori class. 
// 
////////////////////////////////////////////////////////////////////// 
#include "stdafx.h" 
 
#include "Apriori.h" 
#include "HashTree.h" 
 
#include "itemSet.h" 
#include "List.h" 
 
////////////////////////////////////////////////////////////////////// 
// Construction/Destruction 
////////////////////////////////////////////////////////////////////// 
 
 
//--------------------------------------------------------------------------- 
//    Class Apriori 
 
// Method that finds all large itemsets for the given set of instances. 
void CApriori::FindLargeItemSets(List *instances) 
{ 
    List *kMinusOneSets, *KSets; 
    itemSet *current; 
 
    int i = 0, j; 
 
    CHashTree *tree; 
 
    m_samples = instances; 
    m_sampleNum = (long)m_samples->size(); 
 
    necSupport = (long)(m_minSupport*(double)m_sampleNum + 0.5); 
 
    KSets = singletons(); 
    if (KSets->size() == 0) 
        return; 
 
	if(m_samples->size() <= 0) 
		return; 
 
    // Find k>=2 large itemsets 
    do 
    { 
        m_Ls->add(KSets); 
        kMinusOneSets = KSets; 
         
        KSets = selfjoin(kMinusOneSets, i); 
		delete kMinusOneSets; 
 
        tree = (CHashTree *)new CHashTree(); 
 
        for(j = 0; j < KSets->size(); j++) 
        { 
            current = (itemSet *)KSets->get(j); 
            tree->insert(&(tree->root), (itemSet *)current->clone(), 0); 
//            delete current; 
        } 
        delete KSets; 
 
        j= 0; 
        while(j < m_samples->size()) 
        { 
            current = (itemSet *)m_samples->get(j); 
            if(current->size() < i+2) 
            { 
                m_samples->remove(j); 
            } 
            else 
            { 
                tree->subset(tree->root, current, 0); 
                j++; 
            } 
 
//            delete current; 
        } 
         
        KSets = (List *)new List(); 
        tree->scan(tree->root, KSets, necSupport); 
 
        delete tree; 
 
        i++; 
 
    } while (KSets->size() > 0); 
 
    return; 
} 
 
 
// find 1 itemset 
List * CApriori::singletons() 
{ 
    List *setOfItemSets = (List *)new List(); 
    itemSet *current, *train; 
    long necSupport; 
    int i, j; 
    int *svector; 
    int itemid; 
 
 
    svector = (int *)new int[pagenum]; 
    for(i = 0; i < pagenum; i++) 
        svector[i] = 0; 
 
    for(i = 0; i < m_samples->size(); i++) 
    { 
        train = (itemSet *)m_samples->get(i); 
 
        for(j = 0; j < train->size(); j++) 
        { 
            itemid = train->get(j); 
 
            svector[itemid] += 1; 
        } 
    } 
 
    necSupport = (long)(m_minSupport*(double)(m_samples->size())+0.5); 
 
    for(i = 0; i < pagenum; i++) 
    { 
        if((double)svector[i]/(double)(m_samples->size()) >= m_minSupport) 
        { 
            current = new itemSet(); 
            current->add(i); 
            current->support(svector[i]); 
     
            setOfItemSets->add(current); 
//            delete current; 
        } 
    } 
 
    /* delete all the items that are not in 1-lerge itemset */ 
 
    i = 0; 
    while (i < m_samples->size()) 
    { 
        train = (itemSet *)m_samples->get(i); 
 
        j = 0; 
        while (j < train->size()) 
        { 
            itemid = train->get(j); 
 
            if(svector[itemid] < necSupport) 
                train->remove(j); 
            else 
                j++; 
        } 
 
        if(train->size() == 0) 
            m_samples->remove(i); 
        else 
            i++; 
    } 
 
    delete svector; 
 
    return setOfItemSets; 
} 
 
 
List * CApriori::selfjoin(List *in, int size) 
{ 
    List *newVector = new List(); 
    itemSet *first; 
    itemSet *second; 
    itemSet *result; 
    int i, j; 
     
    for (i = 0; i < in->size(); i++) 
    { 
        first = (itemSet *)in->get(i); 
         
        for (j = i+1; j < in->size(); j++) 
        { 
            second = (itemSet *)in->get(j); 
 
            if((result = join(first, second, size)) != NULL) 
            { 
                if(prune(in, result, size)) 
                { 
                    result->support(0); 
                    newVector->add(result); 
                } 
				else 
	                delete result; 
            } 
 
//            delete second; 
        } 
 
//        delete first; 
    } 
     
    return(newVector); 
} 
 
bool CApriori::prune(List *in, itemSet *attend, int size) 
{ 
    itemSet *temp; 
    bool result = true; 
 
    for(int i = 0; i < attend->size(); i++) 
    { 
        temp = (itemSet *)attend->clone(); 
        temp->remove(i); 
         
        if(in->indexOf(temp) < 0) 
        { 
            delete temp; 
            result = true; 
            break; 
        } 
         
        delete temp; 
    } 
     
    return(result); 
} 
 
 
 
itemSet * CApriori::join(itemSet *first, itemSet *attend, int size) 
{ 
    itemSet *result = (itemSet *)NULL; 
    int i; 
 
    if( ! check(first, attend, size)) 
        return(result); 
         
    result = (itemSet *)new itemSet(); 
    for(i = 0; i < size; i++) 
        result->add(first->get(i)); 
     
    if(first->get(i) < attend->get(i)) 
    { 
        result->add(first->get(i)); 
        result->add(attend->get(i)); 
    } 
    else 
    { 
        result->add(attend->get(i)); 
        result->add(first->get(i)); 
    } 
     
    return(result);		 
} 
 
bool CApriori::check(itemSet *first, itemSet *attend, int size) 
{ 
    for(int i = 0; i < size; i++) 
        if(first->get(i) != attend->get(i)) 
            return(false); 
             
    return(true); 
}