/* ====================================================================== David W. Aha testing.c: Testing source for the IBL algorithms Topic: IB1, IB2, IB3, and now IB4 July, 1990 ====================================================================== */ #include "datastructures.h" /* ====================================================================== Classifying the test set. Called from ibl.c. ====================================================================== */ void read_test_set() { /*==== Subfunctions called ====*/ extern ptr_instance translate_instance(); /* Utility */ FILE *fopen(); /*==== Local variables ====*/ FILE *test_set_file; char line[MAXLINE]; /*==== 1. Read and Process each test instance. Keep count. ====*/ printf("Reading test instances...\n"); number_of_test_instances = 0; test_set_file = fopen(testingfile,"r"); while (NULL != fgets(line, MAXLINE,test_set_file)) { raw_test_instances[number_of_test_instances]= translate_instance(line,test_set_file); number_of_test_instances++; if (number_of_test_instances > MAX_NUMBER_OF_TEST_INSTANCES) { printf("FATAL ERROR in read_test_set: too many test instances!!\n"); printf("Currently, there is room for only %d test instances.\n", MAX_NUMBER_OF_TEST_INSTANCES); number_of_test_instances = 0; } } /*==== 2. Cleanup. ====*/ fclose(test_set_file); printf("...Done reading test instances.\n"); } /* ====================================================================== Setting things up for testing. Note: global setup done in print_classification_results. ====================================================================== */ void setup_for_testing() { /*==== Local Variables ====*/ register int i, predictee_a; /*==== 1. Clear Summary Variables ====*/ num_correct_in_alg = 0; num_incorrect_in_alg = 0; num_queries_in_alg = 0; percent_data_in_alg = 0.0; percent_correct_in_alg = 0.0; /*==== 2. Clear and Reset Arrays ====*/ for(predictee_a=0; predictee_aattributes[predictee[predictee_a]]; /*==== 2. Testing ====*/ if (test_val != unknown_value) { /*==== 2.1 Keep a tally on total number of queries, too. ====*/ num_queries_in_alg++; num_queries_in_concept[predictee_a]++; /*==== 2.2 Do the work ====*/ if (num_accepted_in_concept[predictee_a] == 0) { /*==== If none accepted, then random guess ====*/ if (test_val == (double)(random()%num_values[predictee[predictee_a]])) { num_correct_in_concept[predictee_a]++; num_correct_in_alg++; } else { num_incorrect_in_concept[predictee_a]++; num_incorrect_in_alg++; } probability_of_membership[test_num][predictee_a] = 0.5; } else /*==== If some accepted, calculate guess ====*/ { compute_similarities_during_testing(predictee_a,test_inst); if (ib4 && best_concept_only) calculate_membership_probabilities(predictee_a,test_num); else { order_similarities_during_testing(predictee_a); if (correct_classification(predictee_a,test_val)) { num_correct_in_concept[predictee_a]++; num_correct_in_alg++; } else { num_incorrect_in_concept[predictee_a]++; num_incorrect_in_alg++; } } } } } } /*==== Get best concept only predictions when requested ====*/ if (ib4 && best_concept_only) best_concept_only_predictions(); } /* ====================================================================== Computing all information for calculating similarities from new instance to only those accepted, previous training instances. Side effects: values saved in the array named distances. DURING TESTING ONLY! Assumes test_inst->attributes[predictee_a] is known; predictee[predictee_a]. ====================================================================== */ void compute_similarities_during_testing(predictee_a,testinst) register int predictee_a; struct instance *testinst; { /*==== Subfunctions ====*/ extern double diff_with_unknowns(); /*==== Utility ====*/ /*==== Local Variables ====*/ register int i, data_id, predictor_a,a; double sum, diff, normalizer; struct instance *oldinst; /*==== 1. Dynamic determination of distance ====*/ for(i=0; iattributes[predictee[predictee_a]] == unknown_value) distances[i] = my_infinity; /*==== Preventing 0 division ====*/ else { for(predictor_a=0; predictor_aattributes[a] == unknown_value) || (oldinst->attributes[a] == unknown_value)) { if (ib4) normalizer -= attribute_weight_used[predictee_a][predictor_a]; else normalizer -= 1.0; diff = diff_with_unknowns(a,testinst->attributes[a], oldinst->attributes[a]); } else if (attribute_type[a] == NOMINAL_TYPE) diff = (double)(testinst->attributes[a] != oldinst->attributes[a]); else /*==== This is a pair of known numeric values. ====*/ diff = testinst->attributes[a] - oldinst->attributes[a]; if (ib4) sum += (diff * diff * attribute_weight_used[predictee_a][predictor_a]); else sum += (diff * diff); } if (!missing_ignore) distances[i] = sqrt(sum); else if (normalizer == 0.0) distances[i] = my_infinity; else distances[i] = sqrt(sum)/sqrt(normalizer); } } } /* ====================================================================== Order the similarities during testing. All instances were accepted. ====================================================================== */ void order_similarities_during_testing(predictee_a) register int predictee_a; { /*==== Subfunctions ====*/ extern void insert_accepted(), /*==== Training ====*/ randomize_ordered(); /*==== Training ====*/ /*==== Local Variables ====*/ register int i, data_id, num_required = k; /*==== For Booleans and Nominals ====*/ /*==== 1. Setup ====*/ num_accepted = num_rejected = 0; /*==== 2. Find required number of best accepted instances. ====*/ for(i=0; iattributes[a]; if (distances[i] < closest_with_val[target_val]) closest_with_val[target_val] = distances[i]; if ((distances[i] != my_infinity) && ((num_accepted < num_required) || (distances[i] <= accepted_distance[num_accepted-1]))) insert_accepted(data_id,distances[i],num_required); } /*==== 3. Best Acceptance Tie? Find a random-selected winner ====*/ randomize_ordered(); /*==== Yes, needed to break ties ====*/ /*==== 4. Set probabilities. Overlap must be set, so need only 1. ====*/ /*==== Note that the distance values are negative! ====*/ if (closest_with_val[0] == my_infinity) if (closest_with_val[1] == my_infinity) probability_of_membership[test_num][predictee_a] = 0.5; else probability_of_membership[test_num][predictee_a] = 1.0; else if (closest_with_val[1] == my_infinity) probability_of_membership[test_num][predictee_a] = 0.0; else { total = closest_with_val[0]+closest_with_val[1]; if (total == 0.0) /*==== Could happen if, say, almost all wts are 0 */ probability_of_membership[test_num][predictee_a] = 0.5; else probability_of_membership[test_num][predictee_a] = closest_with_val[0]/total; } } /* ====================================================================== Tallies up for predictee[0] classification results based on the most probable guesses for each test instance. Assume 1 real predictee! Assumes all pseudo-predictees were set up as binary classifiers. ======================================================================*/ void best_concept_only_predictions() { /*==== Local Variables ====*/ register int test_num, test_val, predictee_a, num_in_tie, selected; int has_highest_prob[MAX_NUMBER_OF_PREDICTEES]; struct instance *test_inst; double max; /*==== 1. Setup: clear "results" in predictee[0]====*/ num_correct_in_alg = 0; num_incorrect_in_alg = 0; num_correct_in_concept[0] = 0; num_incorrect_in_concept[0] = 0; /*==== 2. Loop for each test instance ====*/ for(test_num=0; test_numattributes[predictee[predictee_a]] == 1.0) { test_val = predictee_a; break; } /*==== 2b. Find values with highest probability ====*/ num_in_tie = 1; has_highest_prob[0] = 0; max = probability_of_membership[test_num][0]; for(predictee_a=1; predictee_amax) { num_in_tie = 1; has_highest_prob[0] = predictee_a; max = probability_of_membership[test_num][predictee_a]; } else if (probability_of_membership[test_num][predictee_a] == max) { has_highest_prob[num_in_tie] = predictee_a; num_in_tie++; } /*==== 2c. Break ties randomly and update correct or incorrect cts ====*/ selected = random() % num_in_tie; if (has_highest_prob[selected] == test_val) { /*==== Correct guess ====*/ num_correct_in_concept[0]++; num_correct_in_alg++; } else { /*==== Incorrect guess ====*/ num_incorrect_in_concept[0]++; num_incorrect_in_alg++; } } num_queries_in_alg = num_queries_in_concept[0]; }