0%

alex 源码解析

ALEX:An Updatable Adaptive Learned Index

源代码目录结构

src
├── benchmark
│   ├── flags.h
│   ├── main.cpp
│   ├── utils.h
│   └── zipf.h
├── core
│   ├── alex_base.h alex实现过程中需要用到的基本模块组建如线性模型等等
│   ├── alex_fanout_tree.h
│   ├── alex.h alex基础实现,实现了一个完整的索引
│   ├── alex_map.h
│   ├── alex_multimap.h
│   └── alex_nodes.h
└── examples
└── main.cpp

上面给出了源代码的目录结构树。接下来针对core部分的核心代码对alex进行详细的介绍。为了更简易的读懂alex的工作流程,我们使用example中的main函数操作流程对alex进行分析:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#include "../core/alex.h"
// 这里定义key和value的数据类型
#define KEY_TYPE int
#define PAYLOAD_TYPE int

int main(int, char**) {
// 创造随即负载,key值是0~99之间的随机值,value值使用随机数生成
const int num_keys = 100;
std::pair<KEY_TYPE, PAYLOAD_TYPE> values[num_keys];
std::mt19937_64 gen(std::random_device{}());
std::uniform_int_distribution<PAYLOAD_TYPE> dis;
for (int i = 0; i < num_keys; i++) {
values[i].first = i;
values[i].second = dis(gen); // dis函数生成随机数
}

alex::Alex<KEY_TYPE, PAYLOAD_TYPE> index; // 创建alex索引

// Bulk load the keys [0, 100)
index.bulk_load(values, num_keys); // 批量插入数据,每100个一组

// Insert the keys [100, 200). Now there are 200 keys.
for (int i = num_keys; i < 2 * num_keys; i++) { // 单个插入数据,一共插入100个新数据
KEY_TYPE new_key = i;
PAYLOAD_TYPE new_payload = dis(gen);
index.insert(new_key, new_payload);
}

// Erase the keys [0, 10). Now there are 190 keys.
for (int i = 0; i < 10; i++) { // 擦除key值为0~9的十个元素
index.erase(i);
}

// Iterate through all entries in the index and update their payload if the
// key is even
int num_entries = 0;
for (auto it = index.begin(); it != index.end(); it++) { // 这里使用alex提供的迭代器实现遍历操作,如果key值为偶数则更新value
if (it.key() % 2 == 0) { // it.key() is equivalent to (*it).first
it.payload() = dis(gen);
}
num_entries++; // 计算遍历元素的个数
}
if (num_entries != 190) { // 检查遍历元素的个数是否符合预期
std::cout << "Error! There should be 190 entries in the index."
<< std::endl;
}

// Iterate through all entries with keys between 50 (inclusive) and 100
// (exclusive)
num_entries = 0;
for (auto it = index.lower_bound(50); it != index.lower_bound(100); it++) { // 遍历key值在50~100之间的元素,检查是否为50个元素值
num_entries++;
}
if (num_entries != 50) {
std::cout
<< "Error! There should be 50 entries with keys in the range [50, 100)."
<< std::endl;
}

// Equivalent way of iterating through all entries with keys between 50
// (inclusive) and 100 (exclusive)
num_entries = 0;
auto it = index.lower_bound(50);
while (it.key() < 100 && it != index.end()) { // 另外一种方式遍历key值在50~100之间的元素
num_entries++;
it++;
}
if (num_entries != 50) {
std::cout
<< "Error! There should be 50 entries with keys in the range [50, 100)."
<< std::endl;
}

// Insert 9 more keys with value 42. Now there are 199 keys.
for (int i = 0; i < 9; i++) { // 插入9个key值为42的元素,现在一共有199个元素
KEY_TYPE new_key = 42;
PAYLOAD_TYPE new_payload = dis(gen);
index.insert(new_key, new_payload);
}

// Iterate through all 10 entries with keys of value 42
int num_duplicates = 0;
for (auto it = index.lower_bound(42); it != index.upper_bound(42); it++) { //检查key值为42的元素是否为10个
num_duplicates++;
}
if (num_duplicates != 10) {
std::cout << "Error! There should be 10 entries with key of value 42."
<< std::endl;
}

// Check if a non-existent key exists
it = index.find(1337); // 寻找key值为1337的元素是否存在
if (it != index.end()) {
std::cout << "Error! Key with value 1337 should not exist." << std::endl;
}

// Look at some stats
auto stats = index.get_stats();
std::cout << "Final num keys: " << stats.num_keys
<< std::endl; // expected: 199
std::cout << "Num inserts: " << stats.num_inserts
<< std::endl; // expected: 109
}

通过上述简单的示例,我们可以知道alex索引具有的功能,接下来针对alex索引的增删查改四个方法的源码进行分析: