PROBI  1.0
 All Classes Functions
KumarMedian.hpp
1 #ifndef KUMARMEDIAN_H
2 #define KUMARMEDIAN_H
3 
4 #include <vector>
5 #include <memory>
6 #include <functional>
7 #include <cmath>
8 
9 #include "Sampling.cpp"
10 #include "Point.hpp"
11 #include "Metric.hpp"
12 #include "Norm.hpp"
13 #include "EuclideanSpace.hpp"
14 #include "KMedian.hpp"
15 
22 {
23 public:
24  KumarMedian(std::function<Metric<Point>*()> createMetric, std::function<Norm<Point>*()> createNorm);
25 
33  template<typename InputIterator>
34  Point approximateOneMedianRounds(InputIterator begin, InputIterator end, double eps, int rounds = 0);
35 
42  template<typename InputIterator>
43  Point approximateOneMedian(InputIterator begin, InputIterator end, double eps);
44 
45 private:
46  template<typename InputIterator>
47  std::unique_ptr<std::vector<Point>> constructSatisfyingSet(InputIterator begin, InputIterator end, double eps, size_t sizeOfRandomSample);
48 
49  template<typename InputIterator>
50  std::unique_ptr<std::vector<Point>> constructSatisfyingSet(InputIterator begin, InputIterator end, double a, double b, double eps, size_t sizeOfRandomSample);
51 
52  std::unique_ptr<std::vector<Point>> constructGridSphere(std::vector<Point> const & basis, Point basePoint, double sideLength, double radius);
53 
54  Metric<Point>* metric;
55  Norm<Point>* norm;
56  KMedian kmedian;
57  EuclideanSpace space;
58 };
59 
60 template<typename InputIterator>
61 Point KumarMedian::approximateOneMedianRounds(InputIterator begin, InputIterator end, double eps, int rounds)
62 {
63  if(rounds == 0)
64  {
65  int dim = begin->getDimension();
66  if(dim <= 5)
67  rounds = 8;
68  else if(dim <= 10)
69  rounds = 12;
70  else if(dim <= 20)
71  rounds = 15;
72  else if(dim <= 50)
73  rounds = 16;
74  else
75  rounds = 18;
76  }
77 
78  Point minPoint;
79  double cost = 0;
80  for(int i = 0; i < rounds; ++i)
81  {
82  Point tmpPoint(approximateOneMedian(begin, end, eps));
83  std::unique_ptr<std::vector<Point>> samSet(Sampling::sampleWithoutReplacementFast<InputIterator, Point>(begin, end, 1000));
84  double tmpCost(kmedian.cost(samSet->begin(), samSet->end(), tmpPoint));
85  if(tmpCost < cost || i == 0)
86  {
87  cost = tmpCost;
88  minPoint = tmpPoint;
89  }
90  }
91  return minPoint;
92 }
93 
94 template<typename InputIterator>
95 Point KumarMedian::approximateOneMedian(InputIterator begin, InputIterator end, double eps)
96 {
97  // Theorem 5.7
98  size_t sampleSize = 1;
99  std::unique_ptr<std::vector<Point>> satSet(constructSatisfyingSet(begin, end, eps, sampleSize));
100  std::unique_ptr<std::vector<Point>> samSet(Sampling::sampleWithoutReplacementFast<InputIterator, Point>(begin, end, 10));
101 
102  double cost = 0;
103  Point median;
104  for(size_t i = 0; i < satSet->size(); ++i)
105  {
106  double tmpcost = kmedian.cost(samSet->begin(), samSet->end(), (*satSet)[i]);
107  if(tmpcost < cost || i == 0)
108  {
109  cost = tmpcost;
110  median = (*satSet)[i];
111  }
112  }
113 
114  return median;
115 }
116 
117 template<typename InputIterator>
118 std::unique_ptr<std::vector<Point>> KumarMedian::constructSatisfyingSet(InputIterator begin, InputIterator end, double eps, size_t sizeOfRandomSample)
119 { // Theorem 5.4
120  std::unique_ptr<std::vector<Point>> sample(Sampling::sampleWithoutReplacementFast<InputIterator, Point>(begin, end, std::ceil(1.0/eps+1.0)));
121  double v = kmedian.cost(sample->begin()+1, sample->end(), (*sample)[0]);
122 
123  double a = v * std::pow(eps, 3) / 2;
124  double b = v / eps;
125  if(a < b)
126  {
127  std::unique_ptr<std::vector<Point>> set(constructSatisfyingSet(sample->begin()+1, sample->end(), a, b, eps, sizeOfRandomSample));
128  set->push_back((*sample)[0]);
129  return set;
130  }
131  else
132  {
133  std::unique_ptr<std::vector<Point>> set(new std::vector<Point>(sample->begin(), sample->begin()+1));
134  return set;
135  }
136 }
137 
138 template<typename InputIterator>
139 std::unique_ptr<std::vector<Point>> KumarMedian::constructSatisfyingSet(InputIterator begin, InputIterator end, double a, double b, double eps, size_t sizeOfRandomSample)
140 { // Lemma 5.3
141  std::unique_ptr<std::vector<Point>> sampleVector(Sampling::sampleWithoutReplacementFast<InputIterator, Point>(begin, end, sizeOfRandomSample));
142  int lowerBound(std::floor(std::log(eps/4.0*a)));
143  int upperBound(std::ceil(std::log(b)));
144  std::unique_ptr<std::vector<Point>> basis(space.orthonormalize(sampleVector->begin(), sampleVector->end()));
145 
146  std::unique_ptr<std::vector<Point>> satisfyingSet(new std::vector<Point>());
147  for(auto it = sampleVector->begin(); it != sampleVector->end(); ++it)
148  {
149  for(int i = lowerBound; i <= upperBound; ++i)
150  {
151  double t = std::pow(2, i);
152  std::unique_ptr<std::vector<Point>> subset = constructGridSphere(*basis, *it, (eps*t)/(4.0*sampleVector->size()), 2.0*t);
153  satisfyingSet->insert(satisfyingSet->end(), subset->begin(), subset->end());
154  }
155  }
156 
157  return satisfyingSet;
158 }
159 
160 #endif