43 #include <gamlMerge.hpp>
44 #include <gamlPartition.hpp>
45 #include <gamlMap.hpp>
49 #include <type_traits>
63 typedef any input_type;
64 typedef any output_type;
68 output_type operator()(
const input_type& x)
const;
77 typedef any predictor_type;
82 template<
typename DataIterator,
typename InputOf,
typename OutputOf>
83 predictor_type operator()(
const DataIterator& begin,
const DataIterator& end,
84 const InputOf&,
const OutputOf&)
const;
94 typedef double value_type;
98 template<
typename Predictor,
typename DataIterator,
typename InputOf,
typename OutputOf>
99 value_type operator()(
const Predictor& predictor,
const DataIterator& begin,
const DataIterator& end,
100 const InputOf&,
const OutputOf&)
const;
110 typedef double value_type;
114 template<
typename Learner,
typename DataIterator,
typename InputOf,
typename OutputOf>
115 value_type operator()(
const Learner& learner,
const DataIterator& begin,
const DataIterator& end,
116 const InputOf&,
const OutputOf&)
const;
123 template<
typename Predictor,
typename DataIterator,
124 typename InputOf,
typename OutputOf,
125 typename Loss,
typename AccumIterator>
126 double accumulation(
const Predictor& predictor,
127 const DataIterator& begin,
const DataIterator& end,
128 const InputOf& inputOf,
const OutputOf& outputOf,
129 const Loss& loss, AccumIterator& acc) {
130 for(DataIterator it = begin; it != end; ++it)
131 *acc++ = loss(predictor(inputOf(*it)),
133 return (
double)(acc());
136 template<
typename LOSS>
147 template<
typename Predictor,
typename DataIterator,
typename InputOf,
typename OutputOf>
148 double operator()(
const Predictor& predictor,
const DataIterator& begin,
const DataIterator& end,
149 const InputOf& inputOf,
const OutputOf& outputOf)
const {
152 for(DataIterator it = begin; it != end; ++it) {
154 sum += loss(predictor(inputOf(data)),
163 template<
typename LOSS>
167 template<
typename LOSS,
typename PARTITION>
179 : loss(l), partition(part), verbose(verbosity) {}
182 template<
typename Learner,
typename DataIterator,
typename InputOf,
typename OutputOf>
183 double operator()(
const Learner& learner,
const DataIterator& begin,
const DataIterator& end,
184 const InputOf& inputOf,
const OutputOf& outputOf)
const {
186 auto built_partition = partition.build(begin,end);
190 std::cout <<
"Splitting the database into " << built_partition.size() <<
" sets." << std::endl;
191 for(
unsigned int i = 0; i < built_partition.size(); ++i) {
193 auto begin = built_partition.begin(i);
194 auto end = built_partition.end(i);
195 auto _begin = built_partition.complement_begin(i);
196 auto _end = built_partition.complement_end(i);
199 std::cout << std::setw(6) << i+1 <<
'/' << built_partition.size()
200 <<
" : learning...\r" << std::flush;
202 auto predictor = learner(_begin, _end, inputOf, outputOf);
206 for(DataIterator it = begin; it != end; ++it) {
208 risk += loss(predictor(inputOf(data)),
213 auto size = std::distance(begin,end);
214 std::cout << std::setw(6) << i+1 <<
'/' << built_partition.size()
215 <<
" : risk = " << risk / size <<
" ("
216 << size <<
"-sized test set)" << std::endl;
222 return (
double)(sum/(double)(built_partition.data_size()));
226 template<
typename LOSS,
typename PARTITION>
241 using difference_type = long;
242 using value_type = int;
243 using pointer = value_type*;
244 using reference = value_type&;
245 using iterator_category = std::random_access_iterator_tag;
251 integer& operator=(
int i) {j=i;
return *
this;}
253 integer& operator++() {++j;
return *
this;}
254 integer& operator--() {--j;
return *
this;}
255 integer& operator+=(
int diff) {j+=diff;
return *
this;}
256 integer& operator-=(
int diff) {j-=diff;
return *
this;}
257 integer operator++(
int) {
integer res = *
this; ++*
this;
return res;}
258 integer operator--(
int) {
integer res = *
this; --*
this;
return res;}
259 int operator-(
const integer& i)
const {
return j - i.j;}
262 const int& operator*()
const {
return j;}
263 bool operator==(
const integer& i)
const {
return j == i.j;}
264 bool operator!=(
const integer& i)
const {
return j != i.j;}
270 typedef double output_type;
271 template<
typename DataIterator,
typename ValueOf>
272 double operator()(
const DataIterator& begin,
const DataIterator& end,
const ValueOf& value_of)
const {
274 throw std::runtime_error(
"Average called on an empty collection");
284 for(
auto it = begin; it != end; ++it, ++nb) sum += (
double)(value_of(*it));
285 return sum/(double)nb;
294 template<
typename DataIterator,
typename ValueOf>
295 double average(
const DataIterator& begin,
const DataIterator& end,
const ValueOf& value_of) {
297 return avg(begin,end,value_of);
303 typedef double output_type;
304 template<
typename DataIterator,
typename ValueOf>
305 double operator()(
const DataIterator& begin,
const DataIterator& end,
const ValueOf& value_of)
const {
309 for(
auto it = begin; it != end; ++it) {
310 double x = (double)(value_of(*it));
311 double delta = x - mean;
312 mean = mean + delta/(++n);
313 M2 = M2 + delta*(x - mean);
325 template<
typename DataIterator,
typename ValueOf>
326 double variance(
const DataIterator& begin,
const DataIterator& end,
const ValueOf& value_of) {
328 return var(begin,end,value_of);
331 namespace by_default {
332 template<
typename VALUE>
334 bool operator()(
const VALUE& v1,
const VALUE& v2)
const {
341 template<
typename VALUE,
typename COMP = by_default::LesserThan<VALUE> >
344 template<
typename DataIterator,
typename ValueOf>
345 std::map<VALUE,double,COMP> operator()(
const DataIterator& begin,
const DataIterator& end,
const ValueOf& value_of)
const {
348 throw std::runtime_error(
"Frequencies called on an empty collection");
351 for(
auto it = begin; it != end; ++it) {
352 auto value = value_of(*it);
360 double size = (double)(std::distance(begin,end));
370 template<
typename VALUE,
typename COMP = by_default::LesserThan<VALUE>,
typename DataIterator,
typename ValueOf>
371 std::map<VALUE,double,COMP> frequencies(
const DataIterator& begin,
const DataIterator& end,
const ValueOf& value_of) {
373 return f(begin,end,value_of);
380 template<
typename Map>
381 typename Map::key_type most_frequent(
const Map& frequencies) {
383 auto it = std::max_element(frequencies.begin(), frequencies.end(),
384 [](
const std::pair<typename Map::key_type,double>& e1,
385 const std::pair<typename Map::key_type,double>& e2) ->
bool {return e1.second < e2.second;});
391 template<
typename VALUE,
typename COMP = by_default::LesserThan<VALUE>>
394 typedef VALUE output_type;
395 template<
typename DataIterator,
typename ValueOf>
396 VALUE operator()(
const DataIterator& begin,
const DataIterator& end,
const ValueOf& value_of)
const {
397 return (VALUE)(gaml::most_frequent(gaml::frequencies<VALUE,COMP>(begin,end,value_of)));
406 template<
typename VALUE,
typename COMP = by_default::LesserThan<VALUE>,
typename DataIterator,
typename ValueOf>
407 VALUE most_frequent(
const DataIterator& begin,
const DataIterator& end,
const ValueOf& value_of) {
409 return mf(begin,end,value_of);
414 template<
typename VALUE>
417 typedef VALUE output_type;
418 template<
typename DataIterator,
typename ValueOf>
419 VALUE operator()(
const DataIterator& begin,
const DataIterator& end,
const ValueOf& value_of)
const {
421 auto cumulated_frequencies = value_of(*it);
422 for(++it; it != end; ++it)
423 for(
auto& kv : value_of(*it)) {
424 auto fit = cumulated_frequencies.find(kv.first);
425 if(fit == cumulated_frequencies.end())
426 cumulated_frequencies[kv.first] = kv.second;
428 fit->second += kv.second;
430 return (VALUE)(gaml::most_frequent(cumulated_frequencies));
439 template<
typename VALUE,
typename DataIterator,
typename ValueOf>
440 VALUE highest_cumulated_frequency(
const DataIterator& begin,
const DataIterator& end,
const ValueOf& value_of) {
442 return hcf(begin,end,value_of);
448 template<
typename Class,
typename DataIterator,
typename ClassComp = by_default::LesserThan<Class>,
typename ClassOf>
449 double classification_entropy(
const DataIterator& begin,
const DataIterator& end,
const ClassOf& class_of) {
450 auto freq = gaml::frequencies<Class,ClassComp>(begin,end,class_of);
452 for(
auto& kv : freq) {
453 double p = kv.second;