#include "header.i" /*****************************************************************************************/ /* Builds a Superlist consisting of one AllList for each value */ /* of attribute Att2 appearing in Item[Fp..Lp]. */ /* All items with such a value appear in that interval-list. */ /* The Superlist is sorted on Att2. */ /* Each interval-list is sorted on Att1 and contains a tail element with value Infinity. */ /*****************************************************************************************/ SuperList InitSuperList(ItemNo Fp, ItemNo Lp, Attribute Att1, Attribute Att2) /* ------------- */ { ItemList IL, IP, IH; SuperList SL, SP; IL = MakeItemList(Fp, Lp); IL = SortItemList(IL, Lp-Fp+1, Att2); SL = (SuperList) calloc(1, sizeof(SLRecord)); SP = SL; while( IL != Nil ) { IP = IL; while( IP != Nil && IP->next != Nil && CVal(IP->item, Att2) == CVal(IP->next->item, Att2) ) { IP = IP->next; } IH = IL; IL = IP->next; IP->next = Nil; if( LIST_SLRecord == Nil ) { LIST_SLRecord = (SuperList) calloc(1, sizeof(SLRecord)); LIST_SLRecord->next = Nil; } SP->next = LIST_SLRecord; LIST_SLRecord = LIST_SLRecord->next; SP = SP->next; SP->next = Nil; SP->alllist = InitAllList(IH, Att1, Att2); ReleaseItemList(IH); } SP = SL; SL = SL->next; cfree(SP); return SL; } /***********************************************************/ /* Merges the elements of SL until only 1 element remains. */ /* SL != Nil. */ /***********************************************************/ void MergeSuperList(SuperList SL) /* -------------- */ { SuperList SP; while( SL->next != Nil ) { SP = SL; while( SP != Nil && SP->next != Nil ) { SP->alllist = MergeAllLists(SP->alllist, SP->next->alllist); SP->next = RemoveElSL(SP->next); SP = SP->next; } } } /**************************************************************************/ /* Builds from ItemList IList where all items have the same value of Att2 */ /* and which is sorted on Att1 the corresponding AllList */ /* with an Infinity element at the end. Att1 might be None. */ /**************************************************************************/ AllList InitAllList(ItemList IL, Attribute Att1, Attribute Att2) /* ----------- */ { AllList AL, AP; ClassNo c, c1, c2; ItemCount TotalFreq_L, TotalFreq_U; ItemList IP; int k; float Att2Value; AL = (AllList) calloc(1, sizeof(ALRecord)); AP = AL; ForEach(c, 0, MaxClass) { Freq_L[c] = 0; Freq_U[c] = 0; } TotalFreq_L = 0; TotalFreq_U = 0; for( IP = IL ; IP != Nil ; IP = IP->next ) { Freq_U[ Class(IP->item) ] += Weight(IP->item); TotalFreq_U += Weight(IP->item); } Att2Value = CVal(IL->item, Att2); while( IL != Nil ) { AP->next = MakeALRecord(); AP = AP->next; if( Att1 != None ) AP->y_val = CVal(IL->item, Att1); ForAllIntervals(c1, k, c2) { if( c1 == c2 && ( ( k == 1 && Att2Value != Unknown ) || ( k == 0 && Att2Value == Unknown ) ) ) { AP->allint_L[c1][k][c2].Freq = TotalFreq_L; AP->allint_L[c1][k][c2].Error = TotalFreq_L - Freq_L[c1]; AP->allint_L[c1][k][c2].BestClass = c1; AP->allint_L[c1][k][c2].NoIntervals = k; AP->allint_L[c1][k][c2].Split[k].Freq = TotalFreq_L; AP->allint_L[c1][k][c2].Split[k].Error = TotalFreq_L - Freq_L[c1]; AP->allint_L[c1][k][c2].Split[k].Threshold = Att2Value; AP->allint_L[c1][k][c2].Split[k].BestClass = c1; AP->allint_U[c1][k][c2].Freq = TotalFreq_U; AP->allint_U[c1][k][c2].Error = TotalFreq_U - Freq_U[c1]; AP->allint_U[c1][k][c2].BestClass = c1; AP->allint_U[c1][k][c2].NoIntervals = k; AP->allint_U[c1][k][c2].Split[k].Freq = TotalFreq_U; AP->allint_U[c1][k][c2].Split[k].Error = TotalFreq_U - Freq_U[c1]; AP->allint_U[c1][k][c2].Split[k].Threshold = Att2Value; AP->allint_U[c1][k][c2].Split[k].BestClass = c1; } else { AP->allint_L[c1][k][c2].Error = Infinity; AP->allint_U[c1][k][c2].Error = Infinity; } } do { Freq_L[Class(IL->item)] += Weight(IL->item); Freq_U[Class(IL->item)] -= Weight(IL->item); TotalFreq_L += Weight(IL->item); TotalFreq_U -= Weight(IL->item); IL = IL->next; } while( IL != Nil && ( Att1 == None || CVal(IL->item, Att1) == AP->y_val ) ); } AP->next = MakeALRecord(); AP = AP->next; AP->y_val = Infinity; ForAllIntervals(c1, k, c2) { if( c1 == c2 && ( ( k == 1 && Att2Value != Unknown ) || ( k == 0 && Att2Value == Unknown ) ) ) { AP->allint_L[c1][k][c2].Freq = TotalFreq_L; AP->allint_L[c1][k][c2].Error = TotalFreq_L - Freq_L[c1]; AP->allint_L[c1][k][c2].BestClass = c1; AP->allint_L[c1][k][c2].NoIntervals = k; AP->allint_L[c1][k][c2].Split[k].Freq = TotalFreq_L; AP->allint_L[c1][k][c2].Split[k].Error = TotalFreq_L - Freq_L[c1]; AP->allint_L[c1][k][c2].Split[k].Threshold = Att2Value; AP->allint_L[c1][k][c2].Split[k].BestClass = c1; AP->allint_U[c1][k][c2].Freq = TotalFreq_U; AP->allint_U[c1][k][c2].Error = TotalFreq_U - Freq_U[c1]; AP->allint_U[c1][k][c2].BestClass = c1; AP->allint_U[c1][k][c2].NoIntervals = k; AP->allint_U[c1][k][c2].Split[k].Freq = TotalFreq_U; AP->allint_U[c1][k][c2].Split[k].Error = TotalFreq_U - Freq_U[c1]; AP->allint_U[c1][k][c2].Split[k].Threshold = Att2Value; AP->allint_U[c1][k][c2].Split[k].BestClass = c1; } else { AP->allint_L[c1][k][c2].Error = Infinity; AP->allint_U[c1][k][c2].Error = Infinity; } } AP = AL; AL = AL->next; cfree(AP); return AL; } /************************************************************************/ /* Merges 2 AllLists, destroying both of them and returning the result, */ /* the x-values in L1 must be smaller than the x-values in L2 */ /************************************************************************/ AllList MergeAllLists(AllList AL1, AllList AL2) /* ---------- */ { AllList AL; if( AL1 == Nil && AL2 == Nil) return Nil; if( AL1 == Nil || AL2 == Nil) { Error(7,"MergeAllList",""); exit(1); } AL = MergeIntervals(AL1, AL2); if( AL1->y_val == AL2->y_val ) { AL->next = MergeAllLists(AL1->next, AL2->next); AL1->next = AL2; AL2->next = LIST_ALRecord; LIST_ALRecord = AL1; } if( AL1->y_val < AL2->y_val ) { AL->next = MergeAllLists(AL1->next, AL2); AL1->next = LIST_ALRecord; LIST_ALRecord = AL1; } if( AL1->y_val > AL2->y_val ) { AL->next = MergeAllLists(AL1, AL2->next); AL2->next = LIST_ALRecord; LIST_ALRecord = AL2; } return AL; } /***********************************************************************************/ /* Merges the LRecords *AL1 and *AL2, returning a pointer to the resulting LRecord */ /* the intervals in AL1 must be left of the intervals in AL2 */ /***********************************************************************************/ AllList MergeIntervals(AllList AL1, AllList AL2) /* -------------- */ { AllList AL; AL = MakeALRecord(); AL->y_val = Min(AL1->y_val, AL2->y_val); MergeInts(AL1->allint_L, AL2->allint_L, AL->allint_L); MergeInts(AL1->allint_U, AL2->allint_U, AL->allint_U); return AL; } /****************************************************************/ /* Merges AllInt1 und AllInt2 into AllInt if AllInt1 <= AllInt2 */ /****************************************************************/ void MergeInts(AllIntervals AllInt_1, AllIntervals AllInt_2, AllIntervals AllInt) /* --------- */ { ClassNo c11, c12, c21, c22, BestC12, BestC21, BestClass, c, c1, c2; int k, k1, k2, BestK1, BestK2, l; ItemCount BE, BestError; /* Special case : AllInt_1 contains only unknown attribute values */ if( AllInt_1[0][1][0].Error == Infinity ) { BestClass = 0; ForEach(c, 1, MaxClass) { if( AllInt_1[c][0][c].Error < AllInt_1[BestClass][0][BestClass].Error ) BestClass = c; } ForAllIntervals(c1, k, c2) { AllInt[c1][k][c2].Freq = AllInt_1[BestClass][0][BestClass].Freq + AllInt_2[c1][k][c2].Freq; AllInt[c1][k][c2].Error = AllInt_1[BestClass][0][BestClass].Error + AllInt_2[c1][k][c2].Error; AllInt[c1][k][c2].NoIntervals = k; AllInt[c1][k][c2].Split[0] = AllInt_1[BestClass][0][BestClass].Split[0]; ForEach(l, 1, k) { AllInt[c1][k][c2].Split[l] = AllInt_2[c1][k][c2].Split[l]; } } } else { ForAllIntervals(c11, k, c22) { BestError = Infinity; ForAllIntervals(c12, k1, c21) { if( k1 == 0 ) continue; if( c12 == c21 ) k2 = k - k1 + 1; else k2 = k - k1; if( k2 < 1 ) continue; BE = AllInt_1[c11][k1][c12].Error + AllInt_2[c21][k2][c22].Error; if( BE < BestError ) { BestK1 = k1; BestC12 = c12; BestC21 = c21; BestK2 = k2; BestError = BE; } } if( BestError == Infinity ) { AllInt[c11][k][c22].Error = Infinity; } else { AllInt[c11][k][c22].Freq = AllInt_1[c11][BestK1][BestC12].Freq + AllInt_2[BestC21][BestK2][c22].Freq; AllInt[c11][k][c22].Error = AllInt_1[c11][BestK1][BestC12].Error + AllInt_2[BestC21][BestK2][c22].Error; AllInt[c11][k][c22].NoIntervals = k; if( BestC12 != BestC21 ) { ForEach(l, 0, BestK1) { AllInt[c11][k][c22].Split[l] = AllInt_1[c11][BestK1][BestC12].Split[l]; } ForEach(l, 1, BestK2) { AllInt[c11][k][c22].Split[l+BestK1] = AllInt_2[BestC21][BestK2][c22].Split[l]; } } else { ForEach(l, 0, BestK1) { AllInt[c11][k][c22].Split[l] = AllInt_1[c11][BestK1][BestC12].Split[l]; } ForEach(l, 2, BestK2) { AllInt[c11][k][c22].Split[l+BestK1-1] = AllInt_2[BestC21][BestK2][c22].Split[l]; } AllInt[c11][k][c22].Split[BestK1].Freq += AllInt_2[BestC21][BestK2][c22].Split[1].Freq; AllInt[c11][k][c22].Split[BestK1].Error += AllInt_2[BestC21][BestK2][c22].Split[1].Error; } } } } ForEach(c, 0, MaxClass) AllInt[c][0][c].Split[0].Error = AllInt_1[c][0][c].Split[0].Error; BestClass = 0; ForEach(c, 1, MaxClass) { if( AllInt[c][1][c].Split[1].Error + AllInt[c][0][c].Split[0].Error < AllInt[BestClass][1][BestClass].Split[1].Error + AllInt[BestClass][0][BestClass].Split[0].Error ) BestClass = c; } ForAllIntervals(c1, k, c2) AllInt[c1][k][c2].BestClass = BestClass; }