Surface Mesh Segmentation
 All Classes Namespaces Files Functions Typedefs Pages
K_means_clustering.h
1 #ifndef CGAL_SURFACE_MESH_SEGMENTATION_K_MEANS_CLUSTERING_H
2 #define CGAL_SURFACE_MESH_SEGMENTATION_K_MEANS_CLUSTERING_H
3 
4 #include <vector>
5 #include <cmath>
6 #include <limits>
7 #include <algorithm>
8 
9 #define CGAL_DEFAULT_MAXIMUM_ITERATION 10
10 #define CGAL_DEFAULT_NUMBER_OF_RUN 15
11 #define CGAL_DEFAULT_SEED 1340818006
12 
13 namespace CGAL {
15 namespace internal{
16 
21 class K_means_clustering
22 {
23 // Nested classes
24 private:
29  class K_means_center
30  {
31  public:
32  double mean;
33  private:
34  double new_mean;
35  int new_number_of_points;
36 
37  public:
38  K_means_center(double mean): mean(mean), new_mean(0.0), new_number_of_points(0)
39  { }
44  void add_point(double data)
45  {
46  ++new_number_of_points;
47  new_mean += data;
48  }
53  void calculate_mean()
54  {
55  mean = new_mean / new_number_of_points;
56  new_number_of_points = 0;
57  new_mean = 0.0;
58  }
60  bool operator < (const K_means_center& center) const
61  {
62  return mean < center.mean;
63  }
64  };
65 
70  class K_means_point
71  {
72  public:
73  double data;
74  int center_id;
75  K_means_point(double data, int center_id = -1) : data(data), center_id(center_id)
76  { }
82  bool calculate_new_center(std::vector<K_means_center>& centers)
83  {
84  int new_center_id = 0;
85  double min_distance = std::abs(centers[0].mean - data);
86  for(std::size_t i = 1; i < centers.size(); ++i)
87  {
88  double new_distance = std::abs(centers[i].mean - data);
89  if(new_distance < min_distance)
90  {
91  new_center_id = i;
92  min_distance = new_distance;
93  }
94  }
95  bool is_center_changed = (new_center_id != center_id);
96  center_id = new_center_id;
97 
98  centers[center_id].add_point(data);
99  return is_center_changed;
100  }
101  };
102 
103 public:
105  enum Initialization_types
106  {
107  RANDOM_INITIALIZATION,
108  PLUS_INITIALIZATION
109  };
110 
111 private:
112  std::vector<K_means_center> centers;
113  std::vector<K_means_point> points;
114  int maximum_iteration;
115 
116  Initialization_types init_type;
117 
118 public:
128  K_means_clustering(int number_of_centers,
129  const std::vector<double>& data,
130  Initialization_types init_type = PLUS_INITIALIZATION,
131  int number_of_run = CGAL_DEFAULT_NUMBER_OF_RUN,
132  int maximum_iteration = CGAL_DEFAULT_MAXIMUM_ITERATION)
133  :
134  points(data.begin(), data.end()), maximum_iteration(maximum_iteration), init_type(init_type)
135  {
136  srand(CGAL_DEFAULT_SEED); //(static_cast<unsigned int>(time(NULL)))
137  calculate_clustering_with_multiple_run(number_of_centers, number_of_run);
138  sort(centers.begin(), centers.end());
139  }
140 
145  void fill_with_center_ids(std::vector<int>& data_centers)
146  {
147  data_centers.reserve(points.size());
148  for(std::vector<K_means_point>::iterator point_it = points.begin();
149  point_it != points.end(); ++point_it)
150  {
151  point_it->calculate_new_center(centers); // find closest center
152  data_centers.push_back(point_it->center_id);
153  }
154  }
155 
156 private:
161  void initiate_centers_randomly(int number_of_centers)
162  {
163  centers.clear();
164  for(int i = 0; i < number_of_centers; ++i)
165  {
166  double initial_mean = points[rand() % points.size()].data;
167  if(!make_center(initial_mean)) { --i; }
168  }
169  }
170 
176  void initiate_centers_plus_plus(int number_of_centers)
177  {
178  centers.clear();
179  std::vector<double> distance_square_cumulative(points.size());
180  std::vector<double> distance_square(points.size(), (std::numeric_limits<double>::max)());
181  // distance_square stores squared distance to the closest center for each point.
182  // say, distance_square -> [ 0.1, 0.2, 0.3, 0.1, ... ]
183  // then corresponding distance_square_cumulative -> [ 0.1, 0.3, 0.6, 0.7, ...]
184  double initial_mean = points[rand() % points.size()].data;
185  make_center(initial_mean);
186 
187  for(int i = 1; i < number_of_centers; ++i)
188  {
189  double cumulative_distance_square = 0.0;
190  for(std::size_t j = 0; j < points.size(); ++j)
191  {
192  double new_distance = std::pow(centers.back().mean - points[j].data, 2);
193  if(new_distance < distance_square[j]) { distance_square[j] = new_distance; }
194  cumulative_distance_square += distance_square[j];
195  distance_square_cumulative[j] = cumulative_distance_square;
196  }
197  double zero_one = rand() / (RAND_MAX + 1.0); // [0,1) random number
198  double random_ds = zero_one * (distance_square_cumulative.back());
199  int selection_index = std::upper_bound(distance_square_cumulative.begin(), distance_square_cumulative.end(), random_ds)
200  - distance_square_cumulative.begin();
201  double initial_mean = points[selection_index].data;
202  if(!make_center(initial_mean)) { --i; }
203  }
204  }
205 
211  bool is_already_center(const K_means_center& center) const
212  {
213  for(std::vector<K_means_center>::const_iterator it = centers.begin(); it != centers.end(); ++it)
214  {
215  if(it->mean == center.mean) { return true; }
216  }
217  return false;
218  }
219 
225  bool make_center(double value)
226  {
227  K_means_center new_center(value);
228  if(is_already_center(new_center)) { return false; }
229  centers.push_back(new_center);
230  return true;
231  }
236  void calculate_clustering()
237  {
238  int iteration_count = 0;
239  bool any_center_changed = true;
240  while(any_center_changed && iteration_count++ < maximum_iteration)
241  {
242  any_center_changed = false;
243  // For each point, calculate its new center
244  for(std::vector<K_means_point>::iterator point_it = points.begin();
245  point_it != points.end(); ++point_it)
246  {
247  bool center_changed = point_it->calculate_new_center(centers);
248  any_center_changed |= center_changed;
249  }
250  // For each center, calculate its new mean
251  for(std::vector<K_means_center>::iterator center_it = centers.begin();
252  center_it != centers.end(); ++center_it)
253  {
254  center_it->calculate_mean();
255  }
256  }
257  }
258 
266  void calculate_clustering_with_multiple_run(int number_of_centers, int number_of_run)
267  {
268  std::vector<K_means_center> min_centers;
269  double error = (std::numeric_limits<double>::max)();
270  while(number_of_run-- > 0)
271  {
272  init_type == RANDOM_INITIALIZATION ? initiate_centers_randomly(number_of_centers)
273  : initiate_centers_plus_plus(number_of_centers);
274  calculate_clustering();
275  double new_error = within_cluster_sum_of_squares();
276  if(error > new_error)
277  {
278  error = new_error;
279  min_centers = centers;
280  }
281  }
282  // Note that current center-ids in points are not valid.
283  // But they are recalculated when asked (in fill_with_center_ids())
284  centers = min_centers;
285  }
286 
290  double within_cluster_sum_of_squares() const
291  {
292  double sum = 0.0;
293  for(std::vector<K_means_point>::const_iterator point_it = points.begin();
294  point_it != points.end(); ++point_it)
295  {
296  sum += std::pow(centers[point_it->center_id].mean - point_it->data, 2);
297  }
298  return sum;
299  }
300 };
301 }//namespace internal
303 }//namespace CGAL
304 #undef CGAL_DEFAULT_SEED
305 #undef CGAL_DEFAULT_MAXIMUM_ITERATION
306 #undef CGAL_DEFAULT_NUMBER_OF_RUN
307 
308 #endif //CGAL_SURFACE_MESH_SEGMENTATION_K_MEANS_CLUSTERING_H