Skip to content

Commit 056076a

Browse files
committed
add FINEdex test
1 parent 7bb7980 commit 056076a

21 files changed

+6627
-69
lines changed

include/finedex/aidel.h

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#ifndef __AIDEL_H__
2+
#define __AIDEL_H__
3+
4+
#include "util.h"
5+
#include "lr_model.h"
6+
#include "lr_model_impl.h"
7+
#include "aidel_model.h"
8+
#include "aidel_model_impl.h"
9+
#include "piecewise_linear_model.h"
10+
11+
namespace aidel {
12+
13+
template<class key_t, class val_t>
14+
class AIDEL{
15+
public:
16+
typedef aidel::AidelModel<key_t, val_t> aidelmodel_type;
17+
typedef LinearRegressionModel<key_t> lrmodel_type;
18+
typedef typename OptimalPiecewiseLinearModel<key_t, size_t>::CanonicalSegment canonical_segment;
19+
//typedef aidel::LevelIndex<key_t> root_type;
20+
21+
public:
22+
inline AIDEL();
23+
inline AIDEL(int _maxErr, int _learning_step, float _learning_rate);
24+
~AIDEL();
25+
void train(const std::vector<key_t> &keys, const std::vector<val_t> &vals, size_t _maxErr);
26+
void train_opt(const std::vector<key_t> &keys, const std::vector<val_t> &vals, size_t _maxErr);
27+
//void retrain(typename root_type::iterator it);
28+
void print_models();
29+
void self_check();
30+
31+
32+
inline result_t find(const key_t &key, val_t &val);
33+
inline result_t insert(const key_t &key, const val_t &val);
34+
inline result_t update(const key_t &key, const val_t &val);
35+
inline result_t remove(const key_t &key);
36+
int scan(const key_t &key, const size_t n, std::vector<std::pair<key_t, val_t>> &result);
37+
size_t model_size();
38+
39+
40+
private:
41+
size_t backward_train(const typename std::vector<key_t>::const_iterator &keys_begin,
42+
const typename std::vector<val_t>::const_iterator &vals_begin,
43+
uint32_t size, int step);
44+
void append_model(lrmodel_type &model, const typename std::vector<key_t>::const_iterator &keys_begin,
45+
const typename std::vector<val_t>::const_iterator &vals_begin,
46+
size_t size, int err);
47+
aidelmodel_type* find_model(const key_t &key);
48+
int locate_in_levelbin(key_t key, int model_pos);
49+
50+
51+
private:
52+
std::vector<key_t> model_keys;
53+
std::vector<aidelmodel_type> aimodels;
54+
//root_type* root = nullptr;
55+
std::vector<canonical_segment> segments;
56+
57+
int maxErr = 64;
58+
int learning_step = 1000;
59+
float learning_rate = 0.1;
60+
61+
};
62+
63+
} // namespace aidel
64+
65+
66+
#endif

include/finedex/aidel_impl.h

+275
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
#ifndef __AIDEL_IMPL_H__
2+
#define __AIDEL_IMPL_H__
3+
4+
#include "aidel.h"
5+
#include "util.h"
6+
#include "aidel_model.h"
7+
#include "aidel_model_impl.h"
8+
#include "piecewise_linear_model.h"
9+
10+
namespace aidel {
11+
12+
template<class key_t, class val_t>
13+
inline AIDEL<key_t, val_t>::AIDEL()
14+
: maxErr(64), learning_step(1000), learning_rate(0.1)
15+
{
16+
//root = new root_type();
17+
}
18+
19+
template<class key_t, class val_t>
20+
inline AIDEL<key_t, val_t>::AIDEL(int _maxErr, int _learning_step, float _learning_rate)
21+
: maxErr(_maxErr), learning_step(_learning_step), learning_rate(_learning_rate)
22+
{
23+
//root = new root_type();
24+
}
25+
26+
template<class key_t, class val_t>
27+
AIDEL<key_t, val_t>::~AIDEL(){
28+
//root = nullptr;
29+
}
30+
31+
// ====================== train models ========================
32+
template<class key_t, class val_t>
33+
void AIDEL<key_t, val_t>::train(const std::vector<key_t> &keys,
34+
const std::vector<val_t> &vals, size_t _maxErr)
35+
{
36+
assert(keys.size() == vals.size());
37+
maxErr = _maxErr;
38+
std::cout<<"training begin, length of training_data is:" << keys.size() <<" ,maxErr: "<< maxErr << std::endl;
39+
40+
size_t start = 0;
41+
size_t end = learning_step<keys.size()?learning_step:keys.size();
42+
while(start<end){
43+
//COUT_THIS("start:" << start<<" ,end: "<<end);
44+
lrmodel_type model;
45+
model.train(keys.begin()+start, end-start);
46+
size_t err = model.get_maxErr();
47+
// equal
48+
if(err == maxErr) {
49+
append_model(model, keys.begin()+start, vals.begin()+start, end-start, err);
50+
} else if(err < maxErr) {
51+
if(end>=keys.size()){
52+
append_model(model, keys.begin()+start, vals.begin()+start, end-start, err);
53+
break;
54+
}
55+
end += learning_step;
56+
if(end>keys.size()){
57+
end = keys.size();
58+
}
59+
continue;
60+
} else {
61+
size_t offset = backward_train(keys.begin()+start, vals.begin()+start, end-start, int(learning_step*learning_rate));
62+
end = start + offset;
63+
}
64+
start = end;
65+
end += learning_step;
66+
if(end>=keys.size()){
67+
end = keys.size();
68+
}
69+
}
70+
71+
//root = new root_type(model_keys);
72+
COUT_THIS("[aidle] get models -> "<< model_keys.size());
73+
assert(model_keys.size()==aimodels.size());
74+
}
75+
76+
template<class key_t, class val_t>
77+
size_t AIDEL<key_t, val_t>::backward_train(const typename std::vector<key_t>::const_iterator &keys_begin,
78+
const typename std::vector<val_t>::const_iterator &vals_begin,
79+
uint32_t size, int step)
80+
{
81+
if(size<=10){
82+
step = 1;
83+
} else {
84+
while(size<=step){
85+
step = int(step*learning_rate);
86+
}
87+
}
88+
assert(step>0);
89+
size_t start = 0;
90+
size_t end = size-step;
91+
while(end>0){
92+
lrmodel_type model;
93+
model.train(keys_begin, end);
94+
size_t err = model.get_maxErr();
95+
if(err<=maxErr){
96+
append_model(model, keys_begin, vals_begin, end, err);
97+
return end;
98+
}
99+
end -= step;
100+
}
101+
end = backward_train(keys_begin, vals_begin, end, int(step*learning_rate));
102+
return end;
103+
}
104+
105+
template<class key_t, class val_t>
106+
void AIDEL<key_t, val_t>::append_model(lrmodel_type &model,
107+
const typename std::vector<key_t>::const_iterator &keys_begin,
108+
const typename std::vector<val_t>::const_iterator &vals_begin,
109+
size_t size, int err)
110+
{
111+
key_t key = *(keys_begin+size-1);
112+
113+
// set learning_step
114+
int n = size/10;
115+
learning_step = 1;
116+
while(n!=0){
117+
n/=10;
118+
learning_step*=10;
119+
}
120+
121+
assert(err<=maxErr);
122+
aidelmodel_type aimodel(model, keys_begin, vals_begin, size, maxErr);
123+
124+
model_keys.push_back(key);
125+
aimodels.push_back(aimodel);
126+
}
127+
128+
template<class key_t, class val_t>
129+
typename AIDEL<key_t, val_t>::aidelmodel_type* AIDEL<key_t, val_t>::find_model(const key_t &key)
130+
{
131+
// root
132+
size_t model_pos = binary_search_branchless(&model_keys[0], model_keys.size(), key);
133+
if(model_pos >= aimodels.size())
134+
model_pos = aimodels.size()-1;
135+
return &aimodels[model_pos];
136+
}
137+
138+
139+
// ===================== print data =====================
140+
template<class key_t, class val_t>
141+
void AIDEL<key_t, val_t>::print_models()
142+
{
143+
144+
for(int i=0; i<model_keys.size(); i++){
145+
std::cout<<"model "<<i<<" ,key:"<<model_keys[i]<<" ->";
146+
aimodels[i].print_model();
147+
}
148+
149+
150+
151+
}
152+
153+
template<class key_t, class val_t>
154+
void AIDEL<key_t, val_t>::self_check()
155+
{
156+
for(int i=0; i<model_keys.size(); i++){
157+
aimodels[i].self_check();
158+
}
159+
160+
}
161+
162+
163+
// =================== search the data =======================
164+
template<class key_t, class val_t>
165+
inline result_t AIDEL<key_t, val_t>::find(const key_t &key, val_t &val)
166+
{
167+
/*size_t model_pos = root->find(key);
168+
if(model_pos >= aimodels.size())
169+
model_pos = aimodels.size()-1;
170+
return aimodels[model_pos].con_find(key, val);*/
171+
172+
return find_model(key)[0].con_find_retrain(key, val);
173+
174+
}
175+
176+
177+
// ================= scan ====================
178+
template<class key_t, class val_t>
179+
int AIDEL<key_t, val_t>::scan(const key_t &key, const size_t n, std::vector<std::pair<key_t, val_t>> &result)
180+
{
181+
size_t remaining = n;
182+
size_t model_pos = binary_search_branchless(&model_keys[0], model_keys.size(), key);
183+
if(model_pos >= aimodels.size())
184+
model_pos = aimodels.size()-1;
185+
while(remaining>0 && model_pos < aimodels.size()){
186+
remaining = aimodels[model_pos].scan(key, remaining, result);
187+
}
188+
return remaining;
189+
}
190+
191+
192+
193+
// =================== insert the data =======================
194+
template<class key_t, class val_t>
195+
inline result_t AIDEL<key_t, val_t>::insert(
196+
const key_t& key, const val_t& val)
197+
{
198+
return find_model(key)[0].con_insert_retrain(key, val);
199+
//return find_model(key)[0].con_insert(key, val);
200+
}
201+
202+
203+
// ================ update =================
204+
template<class key_t, class val_t>
205+
inline result_t AIDEL<key_t, val_t>::update(
206+
const key_t& key, const val_t& val)
207+
{
208+
return find_model(key)[0].update(key, val);
209+
//return find_model(key)[0].con_insert(key, val);
210+
}
211+
212+
213+
// ==================== remove =====================
214+
template<class key_t, class val_t>
215+
inline result_t AIDEL<key_t, val_t>::remove(const key_t& key)
216+
{
217+
return find_model(key)[0].remove(key);
218+
//return find_model(key)[0].con_insert(key, val);
219+
}
220+
221+
// ========================== using OptimalLPR train the model ==========================
222+
template<class key_t, class val_t>
223+
void AIDEL<key_t, val_t>::train_opt(const std::vector<key_t> &keys,
224+
const std::vector<val_t> &vals, size_t _maxErr)
225+
{
226+
using pair_type = typename std::pair<size_t, size_t>;
227+
228+
assert(keys.size() == vals.size());
229+
maxErr = _maxErr;
230+
std::cout<<"training begin, length of training_data is:" << keys.size() <<" ,maxErr: "<< maxErr << std::endl;
231+
232+
segments = make_segmentation(keys.begin(), keys.end(), maxErr);
233+
COUT_THIS("[aidle] get models -> "<< segments.size());
234+
235+
/*
236+
// ===== predict the positions ===========
237+
std::vector<pair_type> predicts;
238+
predicts.reserve(keys.size());
239+
240+
auto it = segments.begin();
241+
auto [slope, intercept] = it->get_floating_point_segment(it->get_first_x());
242+
//COUT_THIS(slope<<", "<<intercept<<", "<<keys[0]<<", "<<it->get_first_x());
243+
244+
for (auto i = 0; i < keys.size(); ++i) {
245+
if (i != 0 && keys[i] == keys[i - 1])
246+
continue;
247+
if (std::next(it) != segments.end() && std::next(it)->get_first_x() <= keys[i]) {
248+
++it;
249+
std::tie(slope, intercept) = it->get_floating_point_segment(it->get_first_x());
250+
}
251+
252+
auto pos = (keys[i] - it->get_first_x()) * slope + intercept;
253+
pos = pos<=0? 0:pos;
254+
size_t e;
255+
if(i>pos) e = i-pos;
256+
else e=pos-i;
257+
predicts.push_back(pair_type(pos, e));
258+
// assert(e <= maxErr + 1);
259+
}
260+
261+
//assert(model_keys.size()==aimodels.size());
262+
return predicts;*/
263+
}
264+
265+
template<class key_t, class val_t>
266+
size_t AIDEL<key_t, val_t>::model_size(){
267+
return segments.size();
268+
}
269+
270+
271+
272+
} // namespace aidel
273+
274+
275+
#endif

0 commit comments

Comments
 (0)