1 #ifndef CGAL_SURFACE_MESH_SEGMENTATION_K_MEANS_CLUSTERING_H
2 #define CGAL_SURFACE_MESH_SEGMENTATION_K_MEANS_CLUSTERING_H
9 #define CGAL_DEFAULT_MAXIMUM_ITERATION 10
10 #define CGAL_DEFAULT_NUMBER_OF_RUN 15
11 #define CGAL_DEFAULT_SEED 1340818006
21 class K_means_clustering
35 int new_number_of_points;
38 K_means_center(
double mean): mean(mean), new_mean(0.0), new_number_of_points(0)
44 void add_point(
double data)
46 ++new_number_of_points;
55 mean = new_mean / new_number_of_points;
56 new_number_of_points = 0;
60 bool operator < (
const K_means_center& center)
const
62 return mean < center.mean;
75 K_means_point(
double data,
int center_id = -1) : data(data), center_id(center_id)
82 bool calculate_new_center(std::vector<K_means_center>& centers)
84 int new_center_id = 0;
85 double min_distance = std::abs(centers[0].mean - data);
86 for(std::size_t i = 1; i < centers.size(); ++i)
88 double new_distance = std::abs(centers[i].mean - data);
89 if(new_distance < min_distance)
92 min_distance = new_distance;
95 bool is_center_changed = (new_center_id != center_id);
96 center_id = new_center_id;
98 centers[center_id].add_point(data);
99 return is_center_changed;
105 enum Initialization_types
107 RANDOM_INITIALIZATION,
112 std::vector<K_means_center> centers;
113 std::vector<K_means_point> points;
114 int maximum_iteration;
116 Initialization_types init_type;
128 K_means_clustering(
int number_of_centers,
129 const std::vector<double>& data,
130 Initialization_types init_type = PLUS_INITIALIZATION,
131 int number_of_run = CGAL_DEFAULT_NUMBER_OF_RUN,
132 int maximum_iteration = CGAL_DEFAULT_MAXIMUM_ITERATION)
134 points(data.begin(), data.end()), maximum_iteration(maximum_iteration), init_type(init_type)
136 srand(CGAL_DEFAULT_SEED);
137 calculate_clustering_with_multiple_run(number_of_centers, number_of_run);
138 sort(centers.begin(), centers.end());
145 void fill_with_center_ids(std::vector<int>& data_centers)
147 data_centers.reserve(points.size());
148 for(std::vector<K_means_point>::iterator point_it = points.begin();
149 point_it != points.end(); ++point_it)
151 point_it->calculate_new_center(centers);
152 data_centers.push_back(point_it->center_id);
161 void initiate_centers_randomly(
int number_of_centers)
164 for(
int i = 0; i < number_of_centers; ++i)
166 double initial_mean = points[rand() % points.size()].data;
167 if(!make_center(initial_mean)) { --i; }
176 void initiate_centers_plus_plus(
int number_of_centers)
179 std::vector<double> distance_square_cumulative(points.size());
180 std::vector<double> distance_square(points.size(), (std::numeric_limits<double>::max)());
184 double initial_mean = points[rand() % points.size()].data;
185 make_center(initial_mean);
187 for(
int i = 1; i < number_of_centers; ++i)
189 double cumulative_distance_square = 0.0;
190 for(std::size_t j = 0; j < points.size(); ++j)
192 double new_distance = std::pow(centers.back().mean - points[j].data, 2);
193 if(new_distance < distance_square[j]) { distance_square[j] = new_distance; }
194 cumulative_distance_square += distance_square[j];
195 distance_square_cumulative[j] = cumulative_distance_square;
197 double zero_one = rand() / (RAND_MAX + 1.0);
198 double random_ds = zero_one * (distance_square_cumulative.back());
199 int selection_index = std::upper_bound(distance_square_cumulative.begin(), distance_square_cumulative.end(), random_ds)
200 - distance_square_cumulative.begin();
201 double initial_mean = points[selection_index].data;
202 if(!make_center(initial_mean)) { --i; }
211 bool is_already_center(
const K_means_center& center)
const
213 for(std::vector<K_means_center>::const_iterator it = centers.begin(); it != centers.end(); ++it)
215 if(it->mean == center.mean) {
return true; }
225 bool make_center(
double value)
227 K_means_center new_center(value);
228 if(is_already_center(new_center)) {
return false; }
229 centers.push_back(new_center);
236 void calculate_clustering()
238 int iteration_count = 0;
239 bool any_center_changed =
true;
240 while(any_center_changed && iteration_count++ < maximum_iteration)
242 any_center_changed =
false;
244 for(std::vector<K_means_point>::iterator point_it = points.begin();
245 point_it != points.end(); ++point_it)
247 bool center_changed = point_it->calculate_new_center(centers);
248 any_center_changed |= center_changed;
251 for(std::vector<K_means_center>::iterator center_it = centers.begin();
252 center_it != centers.end(); ++center_it)
254 center_it->calculate_mean();
266 void calculate_clustering_with_multiple_run(
int number_of_centers,
int number_of_run)
268 std::vector<K_means_center> min_centers;
269 double error = (std::numeric_limits<double>::max)();
270 while(number_of_run-- > 0)
272 init_type == RANDOM_INITIALIZATION ? initiate_centers_randomly(number_of_centers)
273 : initiate_centers_plus_plus(number_of_centers);
274 calculate_clustering();
275 double new_error = within_cluster_sum_of_squares();
276 if(error > new_error)
279 min_centers = centers;
284 centers = min_centers;
290 double within_cluster_sum_of_squares()
const
293 for(std::vector<K_means_point>::const_iterator point_it = points.begin();
294 point_it != points.end(); ++point_it)
296 sum += std::pow(centers[point_it->center_id].mean - point_it->data, 2);
304 #undef CGAL_DEFAULT_SEED
305 #undef CGAL_DEFAULT_MAXIMUM_ITERATION
306 #undef CGAL_DEFAULT_NUMBER_OF_RUN
308 #endif //CGAL_SURFACE_MESH_SEGMENTATION_K_MEANS_CLUSTERING_H