From 19c2aa8199c6e799d3be5af44ffbb731b0cf9c24 Mon Sep 17 00:00:00 2001 From: Yunbin Liu Date: Wed, 6 Mar 2024 03:18:41 +0000 Subject: [PATCH] add binding maker and editor for C++ --- ReadMe.md | 3 + binding/cpp/Makefile | 11 ++ binding/cpp/readme.md | 108 ++++++++++++++ binding/cpp/xdb_bench.cc | 135 ++++++++++++++++++ binding/cpp/xdb_bench.h | 28 ++++ binding/cpp/xdb_bench_test.cc | 70 ++++++++++ binding/cpp/xdb_search.cc | 190 +++++++++++++++++++++++++ binding/cpp/xdb_search.h | 50 +++++++ binding/cpp/xdb_search_test.cc | 73 ++++++++++ maker/cpp/Makefile | 11 ++ maker/cpp/readme.md | 217 +++++++++++++++++++++++++++++ maker/cpp/xdb_edit.cc | 247 +++++++++++++++++++++++++++++++++ maker/cpp/xdb_edit.h | 35 +++++ maker/cpp/xdb_edit_test.cc | 50 +++++++ maker/cpp/xdb_make.cc | 240 ++++++++++++++++++++++++++++++++ maker/cpp/xdb_make.h | 48 +++++++ maker/cpp/xdb_make_test.cc | 50 +++++++ 17 files changed, 1566 insertions(+) create mode 100644 binding/cpp/Makefile create mode 100644 binding/cpp/readme.md create mode 100644 binding/cpp/xdb_bench.cc create mode 100644 binding/cpp/xdb_bench.h create mode 100644 binding/cpp/xdb_bench_test.cc create mode 100644 binding/cpp/xdb_search.cc create mode 100644 binding/cpp/xdb_search.h create mode 100644 binding/cpp/xdb_search_test.cc create mode 100644 maker/cpp/Makefile create mode 100644 maker/cpp/readme.md create mode 100644 maker/cpp/xdb_edit.cc create mode 100644 maker/cpp/xdb_edit.h create mode 100644 maker/cpp/xdb_edit_test.cc create mode 100644 maker/cpp/xdb_make.cc create mode 100644 maker/cpp/xdb_make.h create mode 100644 maker/cpp/xdb_make_test.cc diff --git a/ReadMe.md b/ReadMe.md index e87329dd..7b7040af 100644 --- a/ReadMe.md +++ b/ReadMe.md @@ -42,6 +42,7 @@ API 介绍,使用文档和测试程序请参考对应 `searcher` 查询客户 | :white_check_mark: | 已完成 | [erlang](binding/erlang) | erlang xdb 查询客户端实现 | [leihua996](https://github.com/leihua996) | |     | 待开始 | [php_ext](binding/php7_ext) | php c 扩展 xdb 查询客户端实现 | 待确定 | | :white_check_mark: | 已完成 | [nginx](binding/nginx) | nginx 扩展 xdb 查询客户端实现 | [Wu Jian Ping](https://github.com/wujjpp) | +| :white_check_mark: | 已完成 | [C++](binding/cpp) | C++ xdb 查询客户端实现 | [Yunbin Liu](https://github.com/liuyunbin) | 以下工具链实现由社区开发者通过第三方仓库贡献: @@ -63,6 +64,7 @@ API 介绍,使用文档和测试程序请参考如下 `maker` 生成程序下 | :white_check_mark: | 已完成 | [python](maker/python) | python xdb 生成程序实现 | [leolin49](https://github.com/leolin49) | | :white_check_mark: | 已完成 | [csharp](maker/csharp) | csharp xdb 生成程序实现 | [Alan Lee](https://github.com/malus2077) | | :white_check_mark: | 已完成 | [rust](maker/rust) | rust xdb 生成程序实现 | [KevinWang](https://github.com/KevinWL) | +| :white_check_mark: | 已完成 | [C++](maker/cpp) | C++ xdb 生成程序实现 | [Yunbin Liu](https://github.com/liuyunbin) | # `xdb` 数据更新 @@ -82,6 +84,7 @@ ip2region 旨在于 研究 IP 数据的存储和快速查询的设计和实 |:-------------------|:----| :--- |:-------------------| :--- | | :white_check_mark: | 已完成 | [golang](maker/golang#xdb-数据编辑) | golang 原始 IP 数据编辑器 | [Lion](https://github.com/lionsoul2014) | |     | 待开始 | [java](maker/java#xdb-数据编辑) | java 原始 IP 数据编辑器 | [Lion](https://github.com/lionsoul2014) | +| :white_check_mark: | 已完成 | [C++](maker/cpp#xdb-数据编辑) | C++ 原始 IP 数据编辑器 | [Yunbin Liu](https://github.com/liuyunbin) | ### 检测自动更新 diff --git a/binding/cpp/Makefile b/binding/cpp/Makefile new file mode 100644 index 00000000..2f810d6a --- /dev/null +++ b/binding/cpp/Makefile @@ -0,0 +1,11 @@ + +all: xdb_search xdb_bench + +xdb_search: xdb_search.cc xdb_search_test.cc + g++ -std=c++11 -O2 $^ -o $@ + +xdb_bench: xdb_search.cc xdb_bench.cc xdb_bench_test.cc + g++ -std=c++11 -O2 $^ -o $@ + +clean: + rm -f xdb_search xdb_bench diff --git a/binding/cpp/readme.md b/binding/cpp/readme.md new file mode 100644 index 00000000..bac5bda6 --- /dev/null +++ b/binding/cpp/readme.md @@ -0,0 +1,108 @@ +# ip2region xdb C++ 查询客户端实现 + +## 使用方式 +### 完全基于文件的查询 +``` +#include + +#include "xdb_search.h" + +int main(int argc, char* argv[]) { + char file_name[] = "../../data/ip2region.xdb"; + char ip[] = "1.2.3.4"; + + xdb_search_t xdb(file_name); + xdb.init_file(); + + std::cout << xdb.search(ip) << std::endl; + return 0; +} +``` + +### 缓存 `vector_index` 索引 +``` +#include + +#include "xdb_search.h" + +int main(int argc, char* argv[]) { + char file_name[] = "../../data/ip2region.xdb"; + char ip[] = "1.2.3.4"; + + xdb_search_t xdb(file_name); + xdb.init_vector_index(); + + std::cout << xdb.search(ip) << std::endl; + return 0; +} +``` + +### 缓存整个 `xdb` 数据 +``` +#include + +#include "xdb_search.h" + +int main(int argc, char* argv[]) { + char file_name[] = "../../data/ip2region.xdb"; + char ip[] = "1.2.3.4"; + + xdb_search_t xdb(file_name); + xdb.init_content(); + + std::cout << xdb.search(ip) << std::endl; + return 0; +} +``` + +## 测试程序编译 +1. 切换到当前目录 +2. 编译 + +``` +$ make +g++ -std=c++11 -O2 xdb_search.cc xdb_search_test.cc -o xdb_search +g++ -std=c++11 -O2 xdb_search.cc xdb_bench.cc xdb_bench_test.cc -o xdb_bench +``` + +## 测试查询 +### 说明 +``` +$ ./xdb_search --help +./xdb_search [command options] +options: + --db string ip2region binary xdb file path + --cache-policy string cache policy: file/vector_index/content + --help print help +``` + +### 测试 +``` +$ ./xdb_search --db ../../data/ip2region.xdb --cache-policy vector_index +cache policy : vector_index +ip2region>> 1.2.3.4 +美国|0|华盛顿|0|谷歌 +``` + +## bench 测试 +### 说明 +``` +$ ./xdb_bench --help +./xdb_bench [command options] +options: + --db string ip2region binary xdb file path + --src string source ip text file path + --cache-policy string cache policy: file/vector_index/content + --help print help +``` + +### 测试 +``` +$ ./xdb_bench --db ../../data/ip2region.xdb --src ../../data/ip.merge.txt --cache-policy content +total: 3419220, took: 3.44 s, cost: 0.27 μs/op, io count: 0 +$ ./xdb_bench --db ../../data/ip2region.xdb --src ../../data/ip.merge.txt --cache-policy vector_index +total: 3419220, took: 45.99 s, cost: 12.24 μs/op, io count: 21739300 +$ ./xdb_bench --db ../../data/ip2region.xdb --src ../../data/ip.merge.txt --cache-policy file +total: 3419220, took: 60.39 s, cost: 16.32 μs/op, io count: 25158520 +``` + diff --git a/binding/cpp/xdb_bench.cc b/binding/cpp/xdb_bench.cc new file mode 100644 index 00000000..a4e04cb8 --- /dev/null +++ b/binding/cpp/xdb_bench.cc @@ -0,0 +1,135 @@ + +#include "xdb_bench.h" + +#include +#include +#include +#include + +#include +#include + +static void log_exit(const std::string &msg) { + std::cout << msg << std::endl; + exit(-1); +} + +static unsigned long long get_time() { + struct timeval tv1; + gettimeofday(&tv1, NULL); + return (unsigned long long)tv1.tv_sec * 1000 * 1000 + tv1.tv_usec; +} + +static bool ip2uint(const char *buf, unsigned int &ip) { + struct in_addr addr; + if (inet_pton(AF_INET, buf, &addr) == 0) + return false; + // 网络字节序为大端存储, 在此转换为小端存储 + ip = (((addr.s_addr >> 0) & 0xFF) << 24) | + (((addr.s_addr >> 8) & 0xFF) << 16) | + (((addr.s_addr >> 16) & 0xFF) << 8) | + (((addr.s_addr >> 24) & 0xFF) << 0); + return true; +} + +static std::string uint2ip(unsigned int ip) { + char buf[16]; + snprintf(buf, + sizeof(buf), + "%d.%d.%d.%d", + (ip >> 24) & 0xFF, + (ip >> 16) & 0xFF, + (ip >> 8) & 0xFF, + ip & 0xFF); + return std::string(buf); +} + +xdb_bench_t::xdb_bench_t(const std::string &file_name) : xdb_search(file_name) { +} + +void xdb_bench_t::init_file() { + xdb_search.init_file(); +} + +void xdb_bench_t::init_vector_index() { + xdb_search.init_vector_index(); +} + +void xdb_bench_t::init_content() { + xdb_search.init_content(); +} + +void xdb_bench_t::bench_test_one(unsigned int ip_uint, const char *region) { + if (xdb_search.search(uint2ip(ip_uint)) != region) + log_exit("failed: " + uint2ip(ip_uint)); + sum_io_count += xdb_search.get_io_count(); + sum_cost_time += xdb_search.get_cost_time(); + sum_count++; +} + +void xdb_bench_t::bench_test_line(char *buf) { + size_t buf_len = strlen(buf); + if (buf_len == 0) + return; + buf[buf_len - 1] = '\0'; // 去掉换行符 + + char *pos1 = strchr(buf, '|'); + + if (pos1 == NULL) + log_exit("invalid data: " + std::string(buf)); + char *pos2 = strchr(pos1 + 1, '|'); + if (pos2 == NULL) + log_exit("invalid data: " + std::string(buf)); + *pos1 = '\0'; + *pos2 = '\0'; + + unsigned int ip1, ip2; + if (!ip2uint(buf, ip1) || !ip2uint(pos1 + 1, ip2) || ip1 > ip2) { + *pos1 = *pos2 = '|'; + log_exit(std::string("invalid data: ") + buf); + } + + const char *region = pos2 + 1; + + unsigned int ip_mid = ip1 + (ip2 - ip1) / 2; + std::vector ip_vec; + ip_vec.push_back(ip1); + ip_vec.push_back(ip1 + (ip_mid - ip1) / 2); + ip_vec.push_back(ip_mid); + ip_vec.push_back(ip_mid + (ip2 - ip_mid) / 2); + ip_vec.push_back(ip2); + + for (auto &d : ip_vec) + bench_test_one(d, region); +} + +void xdb_bench_t::bench_test_file(const std::string &file_name) { + FILE *f = fopen(file_name.data(), "r"); + if (f == NULL) + log_exit("can't open " + file_name); + char buf[1024]; + while (fgets(buf, sizeof(buf), f) != NULL) + bench_test_line(buf); +} + +void xdb_bench_t::bench(const std::string &file_name) { + sum_io_count = 0; + sum_cost_time = 0; + sum_count = 0; + + unsigned long long tv1 = get_time(); + bench_test_file(file_name); + unsigned long long tv2 = get_time(); + + double took = (tv2 - tv1) * 1.0 / 1000 / 1000; + double cost = sum_cost_time * 1.0 / sum_count; + + printf( + "total: %llu, took: %.2f s, cost: %.2f μs/op, io " + "count: " + "%llu\n", + sum_count, + took, + cost, + sum_io_count); +} diff --git a/binding/cpp/xdb_bench.h b/binding/cpp/xdb_bench.h new file mode 100644 index 00000000..1ec84c90 --- /dev/null +++ b/binding/cpp/xdb_bench.h @@ -0,0 +1,28 @@ +#ifndef XDB_BENCH_H +#define XDB_BENCH_H + +#include "xdb_search.h" + +class xdb_bench_t { + public: + xdb_bench_t(const std::string &file_name); + + void init_file(); + void init_vector_index(); + void init_content(); + + void bench(const std::string &file_name); + + private: + void bench_test_one(unsigned int ip_uint, const char *region); + void bench_test_line(char *buf); + void bench_test_file(const std::string &file_name); + + xdb_search_t xdb_search; + + unsigned long long sum_io_count; + unsigned long long sum_cost_time; + unsigned long long sum_count; +}; + +#endif diff --git a/binding/cpp/xdb_bench_test.cc b/binding/cpp/xdb_bench_test.cc new file mode 100644 index 00000000..c282fd7d --- /dev/null +++ b/binding/cpp/xdb_bench_test.cc @@ -0,0 +1,70 @@ + +#include "xdb_bench.h" + +#include + +#include + +void print_help(int argc, char* argv[]) { + printf("./xdb_bench [command options]\n"); + printf("options:\n"); + printf(" --db string ip2region binary xdb file path\n"); + printf(" --src string source ip text file path\n"); + printf( + " --cache-policy string cache policy: " + "file/vector_index/content\n"); + printf(" --help print help\n"); + exit(-1); +} + +int main(int argc, char* argv[]) { + struct option long_options[] = { + {"db", required_argument, 0, 'd'}, + {"cache-policy", required_argument, 0, 't'}, + {"src", required_argument, 0, 's'}, + {"help", no_argument, 0, 'h'}, + {0, 0, 0, 0 } + }; + + std::string db_file_name = "../../data/ip2region.xdb"; + std::string src_file_name = "../../data/ip.merge.txt"; + std::string cache_policy = "vector_index"; + + while (1) { + int c = getopt_long(argc, argv, "", long_options, NULL); + if (c == -1) + break; + switch (c) { + case 'd': + db_file_name = optarg; + break; + case 'h': + print_help(argc, argv); + break; + case 't': + cache_policy = optarg; + break; + case 's': + src_file_name = optarg; + break; + case '?': + exit(-1); + } + } + + xdb_bench_t xdb(db_file_name); + + if (cache_policy == "content") + xdb.init_content(); + else if (cache_policy == "vector_index") + xdb.init_vector_index(); + else if (cache_policy == "file") + xdb.init_file(); + else { + std::cout << "invalid cache policy: " << cache_policy << std::endl; + exit(-1); + } + + xdb.bench(src_file_name); + return 0; +} diff --git a/binding/cpp/xdb_search.cc b/binding/cpp/xdb_search.cc new file mode 100644 index 00000000..e5937bf5 --- /dev/null +++ b/binding/cpp/xdb_search.cc @@ -0,0 +1,190 @@ + +#include "xdb_search.h" + +#include +#include + +#include + +static void log_exit(const std::string &msg) { + std::cout << msg << std::endl; + exit(-1); +} + +static unsigned long long get_time() { + struct timeval tv1; + gettimeofday(&tv1, NULL); + return (unsigned long long)tv1.tv_sec * 1000 * 1000 + tv1.tv_usec; +} + +static void read_bin(int index, char *buf, size_t len, FILE *db) { + fseek(db, index, SEEK_SET); + if (fread(buf, 1, len, db) != len) + log_exit(__func__); +} + +static unsigned int read_uint(const char *buf) { + return ((buf[0]) & 0x000000FF) | ((buf[1] << 8) & 0x0000FF00) | + ((buf[2] << 16) & 0x00FF0000) | ((buf[3] << 24) & 0xFF000000); +} + +static unsigned short read_ushort(const char *buf) { + return ((buf[0]) & 0x000000FF) | ((buf[1] << 8) & 0x0000FF00); +} + +static bool ip2uint(const char *buf, unsigned int &ip) { + struct in_addr addr; + if (inet_pton(AF_INET, buf, &addr) == 0) + return false; + // 网络字节序为大端存储, 在此转换为小端存储 + ip = (((addr.s_addr >> 0) & 0xFF) << 24) | + (((addr.s_addr >> 8) & 0xFF) << 16) | + (((addr.s_addr >> 16) & 0xFF) << 8) | + (((addr.s_addr >> 24) & 0xFF) << 0); + return true; +} + +void xdb_search_t::get_content_index(unsigned int ip, + unsigned int &left, + unsigned int &right) { + unsigned int ip_1 = (ip >> 24) & 0xFF; + unsigned int ip_2 = (ip >> 16) & 0xFF; + unsigned int index = (ip_1 * vector_index_cols + ip_2) * vector_index_size; + + if (content != NULL) { + left = read_uint(content + index + header_length); + right = read_uint(content + index + header_length + 4); + } else if (vector_index != NULL) { + left = read_uint(vector_index + index); + right = read_uint(vector_index + index + 4); + } else { + ++io_count; + char buf[8]; + read_bin(header_length + index, buf, sizeof(buf), db); + left = read_uint(buf); + right = read_uint(buf + 4); + } +} + +void xdb_search_t::get_content(unsigned int index, + unsigned int &ip_left, + unsigned int &ip_right, + unsigned short ®ion_len, + unsigned int ®ion_index) { + char buf[segment_index_size]; // 4 + 4 + 2 + 4 + const char *p; + + if (content != NULL) { + p = content + index; + } else { + ++io_count; + read_bin(index, buf, sizeof(buf), db); + p = buf; + } + ip_left = read_uint(p); + ip_right = read_uint(p + 4); + region_len = read_ushort(p + 8); + region_index = read_uint(p + 10); +} + +std::string xdb_search_t::get_region(unsigned int index, unsigned short len) { + if (content != NULL) { + return std::string(content + index, len); + } else { + ++io_count; + char *buf = (char *)malloc(sizeof(char) * len); + read_bin(index, buf, len, db); + std::string res(buf, len); + free(buf); + return res; + } +} + +xdb_search_t::xdb_search_t(const std::string &file_name) { + db = fopen(file_name.data(), "r"); + vector_index = NULL; + content = NULL; + + if (db == NULL) + log_exit("can't open " + file_name); +} + +void xdb_search_t::init_file() { +} + +void xdb_search_t::init_vector_index() { + vector_index = (char *)malloc(vector_index_length); + read_bin(header_length, vector_index, vector_index_length, db); +} + +void xdb_search_t::init_content() { + fseek(db, 0, SEEK_END); + unsigned int size = ftell(db); + content = (char *)malloc(size); + read_bin(0, content, size, db); +} + +xdb_search_t::~xdb_search_t() { + if (db != NULL) { + fclose(db); + db = NULL; + } + if (vector_index != NULL) { + free(vector_index); + vector_index = NULL; + } + if (content != NULL) { + free(content); + content = NULL; + } +} + +unsigned long long xdb_search_t::get_io_count() { + return io_count; +} + +unsigned long long xdb_search_t::get_cost_time() { + return cost_time; +} + +std::string xdb_search_t::search(const std::string &ip_str) { + unsigned long long t1 = get_time(); + + unsigned int ip_uint; + if (!ip2uint(ip_str.data(), ip_uint)) + return "invalid ip: " + ip_str; + std::string region = search(ip_uint); + + unsigned long long t2 = get_time(); + cost_time = t2 - t1; + return region; +} + +std::string xdb_search_t::search(unsigned int ip_uint) { + io_count = 0; + + unsigned int content_index_left, content_index_right; + get_content_index(ip_uint, content_index_left, content_index_right); + + unsigned int left, right, mid; + unsigned int ip_left, ip_right; + unsigned short region_len; + unsigned int region_index; + unsigned int mid_index; + + left = 0; + right = (content_index_right - content_index_left) / segment_index_size; + + for (;;) { + mid = left + (right - left) / 2; + mid_index = content_index_left + mid * segment_index_size; + get_content(mid_index, ip_left, ip_right, region_len, region_index); + + if (ip_left > ip_uint) + right = mid - 1; + else if (ip_right < ip_uint) + left = mid + 1; + else + return get_region(region_index, region_len); + } +} diff --git a/binding/cpp/xdb_search.h b/binding/cpp/xdb_search.h new file mode 100644 index 00000000..44286ef0 --- /dev/null +++ b/binding/cpp/xdb_search.h @@ -0,0 +1,50 @@ +#ifndef XDB_SEARCH_H +#define XDB_SEARCH_H + +#include + +class xdb_search_t { + public: + xdb_search_t(const std::string &file_name); + ~xdb_search_t(); + + void init_file(); + void init_vector_index(); + void init_content(); + + unsigned long long get_io_count(); + unsigned long long get_cost_time(); + + std::string search(const std::string &ip); + + private: + void get_content_index(unsigned int ip, + unsigned int &left, + unsigned int &right); + + void get_content(unsigned int index, + unsigned int &ip_left, + unsigned int &ip_right, + unsigned short ®ion_len, + unsigned int ®ion_index); + + std::string get_region(unsigned int index, unsigned short len); + + std::string search(unsigned int ip_uint); + + FILE *db; + char *vector_index; + char *content; + unsigned long long io_count; + unsigned long long cost_time; + + static constexpr int header_length = 256; + static constexpr int vector_index_rows = 256; + static constexpr int vector_index_cols = 256; + static constexpr int vector_index_size = 8; + static constexpr int vector_index_length = + vector_index_rows * vector_index_cols * vector_index_size; + static constexpr int segment_index_size = 14; +}; + +#endif diff --git a/binding/cpp/xdb_search_test.cc b/binding/cpp/xdb_search_test.cc new file mode 100644 index 00000000..a2deea2c --- /dev/null +++ b/binding/cpp/xdb_search_test.cc @@ -0,0 +1,73 @@ + +#include "xdb_search.h" + +#include + +#include + +void print_help(int argc, char* argv[]) { + printf("./xdb_search [command options]\n"); + printf("options:\n"); + printf(" --db string ip2region binary xdb file path\n"); + printf( + " --cache-policy string cache policy: " + "file/vector_index/content\n"); + printf(" --help print help\n"); + exit(-1); +} + +int main(int argc, char* argv[]) { + struct option long_options[] = { + {"db", required_argument, 0, 'd'}, + {"cache-policy", required_argument, 0, 't'}, + {"help", no_argument, 0, 'h'}, + {0, 0, 0, 0 } + }; + + std::string db_file_name = "../../data/ip2region.xdb"; + std::string cache_policy = "vector_index"; + + while (1) { + int c = getopt_long(argc, argv, "", long_options, NULL); + if (c == -1) + break; + switch (c) { + case 'd': + db_file_name = optarg; + break; + case 'h': + print_help(argc, argv); + break; + case 't': + cache_policy = optarg; + break; + case '?': + exit(-1); + } + } + + xdb_search_t xdb(db_file_name); + + if (cache_policy == "content") + xdb.init_content(); + else if (cache_policy == "vector_index") + xdb.init_vector_index(); + else if (cache_policy == "file") + xdb.init_file(); + else { + std::cout << "invalid cache policy: " << cache_policy << std::endl; + exit(-1); + } + + std::string ip; + for (;;) { + std::cout << "ip2region>> "; + std::getline(std::cin, ip); + if (ip.empty()) + continue; + if (ip == "exit" || ip == "quit") + break; + std::cout << xdb.search(ip) << std::endl; + } + return 0; +} diff --git a/maker/cpp/Makefile b/maker/cpp/Makefile new file mode 100644 index 00000000..06641394 --- /dev/null +++ b/maker/cpp/Makefile @@ -0,0 +1,11 @@ + +all: xdb_make xdb_edit + +xdb_make: xdb_make.cc xdb_make_test.cc + g++ -std=c++11 -O2 $^ -o $@ + +xdb_edit: xdb_edit.cc xdb_edit_test.cc + g++ -std=c++11 -O2 $^ -o $@ + +clean: + rm -f xdb_make xdb_edit diff --git a/maker/cpp/readme.md b/maker/cpp/readme.md new file mode 100644 index 00000000..c09dde07 --- /dev/null +++ b/maker/cpp/readme.md @@ -0,0 +1,217 @@ +# ip2region xdb C++ 生成实现 + +# 编译 +1. 切换到当前目录 +2. 编译 + +``` +$ make +g++ -std=c++11 -O2 xdb_make.cc xdb_make_test.cc -o xdb_make +``` + +# `xdb` 数据生成 +## 使用说明 +``` +$ ./xdb_make --help +./xdb_make [command options] +options: + --db string ip2region binary xdb file path + --src string source ip text file path +``` + +## 数据生成 +``` +$ ./xdb_make --db ip2region.xdb --src ../../data/ip.merge.txt +took: 1.46s +``` + +## 数据正确性测试 +``` +$ make # 1. 编译 +$ ./xdb_maker # 2. 本目录生成 xdb 文件 +$ diff <(xxd ./ip2region.xdb) <(xxd ../../data/ip2region.xdb) # 3. 比较本目录和仓库中的 xdb 文件 + # 只有生成的时间不同 +1c1 +< 00000000: 0200 0100 3c6a f965 2302 0f00 75ea a800 .... 00000000: 0200 0100 469b de65 2302 0f00 75ea a800 ....F..e#...u... +``` + +# `xdb` 数据编辑 +## 使用说明 +* 新的IP归属地文件可以包含空行 +* 新的IP归属地文件顺序可以乱序, 程序会自动排序 +* 新的IP归属地文件顺序可以重叠, 只要无二义性, 程序会自动合并 +* 最终的结果会将相邻的且归属地相同的行自动合并 + +``` +$ ./xdb_edit --help +./xdb_edit [command options] +options: + --old filename old source ip text file path + --new filename new source ip text file path +``` + +## 数据更新 +``` +$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt +took: 1.46s +``` + +## 数据正确性测试 +### 测试一: 测试数据文件包含空行以及重复的情况 +``` +$ cat -n 1.txt + 1 + 2 1.0.128.0|1.0.128.255|测试归属地 + 3 1.0.128.0|1.0.128.255|测试归属地 + 4 +$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt +took: 1.83s +$ git diff ../../data/ +diff --git a/data/ip.merge.txt b/data/ip.merge.txt +index 8976bd3..6da5e18 100644 +--- a/data/ip.merge.txt ++++ b/data/ip.merge.txt +@@ -7,7 +7,7 @@ + 1.0.32.0|1.0.63.255|中国|0|广东省|广州市|电信 + 1.0.64.0|1.0.79.255|日本|0|广岛县|0|0 + 1.0.80.0|1.0.127.255|日本|0|冈山县|0|0 +-1.0.128.0|1.0.128.255|泰国|0|清莱府|0|TOT ++1.0.128.0|1.0.128.255|测试归属地 + 1.0.129.0|1.0.132.191|泰国|0|曼谷|曼谷|TOT + 1.0.132.192|1.0.132.255|泰国|0|Nakhon-Ratchasima|0|TOT + 1.0.133.0|1.0.133.255|泰国|0|素攀武里府|0|TOT +@@ -320906,8 +320906,7 @@ + 100.47.160.0|100.47.191.255|美国|0|密歇根|0|美国电话电报 + 100.47.192.0|100.47.255.255|美国|0|0|0|美国电话电报 + 100.48.0.0|100.63.255.255|美国|0|0|0|Sprint +-100.64.0.0|100.122.255.255|0|0|0|内网IP|内网IP +-100.123.0.0|100.127.255.255|0|0|0|内网IP|内网IP ++100.64.0.0|100.127.255.255|0|0|0|内网IP|内网IP + 100.128.0.0|100.255.255.255|美国|0|0|0|T-Mobile + 101.0.0.0|101.0.3.255|中国|0|福建省|福州市|电信 + 101.0.4.0|101.0.7.255|印度尼西亚|0|东爪哇|泗水|0 +``` + +### 测试二: 测试数据文件乱序以及数据有交叉, 归属地相同的情况 +``` +$ cat -n 1.txt + 1 + 2 1.0.128.5|1.0.128.255|测试归属地 + 3 1.0.128.0|1.0.128.9|测试归属地 + 4 +$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt +took: 1.83s +$ git diff ../../data/ +diff --git a/data/ip.merge.txt b/data/ip.merge.txt +index 8976bd3..6da5e18 100644 +--- a/data/ip.merge.txt ++++ b/data/ip.merge.txt +@@ -7,7 +7,7 @@ + 1.0.32.0|1.0.63.255|中国|0|广东省|广州市|电信 + 1.0.64.0|1.0.79.255|日本|0|广岛县|0|0 + 1.0.80.0|1.0.127.255|日本|0|冈山县|0|0 +-1.0.128.0|1.0.128.255|泰国|0|清莱府|0|TOT ++1.0.128.0|1.0.128.255|测试归属地 + 1.0.129.0|1.0.132.191|泰国|0|曼谷|曼谷|TOT + 1.0.132.192|1.0.132.255|泰国|0|Nakhon-Ratchasima|0|TOT + 1.0.133.0|1.0.133.255|泰国|0|素攀武里府|0|TOT +@@ -320906,8 +320906,7 @@ + 100.47.160.0|100.47.191.255|美国|0|密歇根|0|美国电话电报 + 100.47.192.0|100.47.255.255|美国|0|0|0|美国电话电报 + 100.48.0.0|100.63.255.255|美国|0|0|0|Sprint +-100.64.0.0|100.122.255.255|0|0|0|内网IP|内网IP +-100.123.0.0|100.127.255.255|0|0|0|内网IP|内网IP ++100.64.0.0|100.127.255.255|0|0|0|内网IP|内网IP + 100.128.0.0|100.255.255.255|美国|0|0|0|T-Mobile + 101.0.0.0|101.0.3.255|中国|0|福建省|福州市|电信 + 101.0.4.0|101.0.7.255|印度尼西亚|0|东爪哇|泗水|0 +``` + +### 测试三: 测试数据文件乱序以及数据有交叉的, 归属地不同情况 +``` +$ cat -n 1.txt + 1 + 2 1.0.128.5|1.0.128.255|测试归属地 + 3 1.0.128.0|1.0.128.9|测试归属地123 + 4 +$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt +数据有二义性: 1.0.128.0|1.0.128.9|测试归属地123, 1.0.128.5|1.0.128.255|测试归属地 +``` + +### 测试四: 测试将一个IP数据拆成多个IP +``` +$ cat -n 1.txt + 1 36.136.1.0|36.136.7.255|中国|0|广西|来宾市|移动 + 2 36.136.8.0|36.136.15.255|中国|0|广西|玉林市|移动 + 3 36.136.16.0|36.136.23.255|中国|0|广西|河池市|移动 +$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt +took: 1.83s +$ git diff ../../data/ +diff --git a/data/ip.merge.txt b/data/ip.merge.txt +index 8976bd3..7be0227 100644 +--- a/data/ip.merge.txt ++++ b/data/ip.merge.txt +@@ -54778,7 +54778,11 @@ + 36.134.84.0|36.134.85.255|中国|0|安徽省|合肥市|移动 + 36.134.86.0|36.134.87.255|中国|0|广西|南宁市|移动 + 36.134.88.0|36.134.89.255|中国|0|内蒙古|呼和浩特市|移动 +-36.134.90.0|36.141.255.255|中国|0|0|0|移动 ++36.134.90.0|36.136.0.255|中国|0|0|0|移动 ++36.136.1.0|36.136.7.255|中国|0|广西|来宾市|移动 ++36.136.8.0|36.136.15.255|中国|0|广西|玉林市|移动 ++36.136.16.0|36.136.23.255|中国|0|广西|河池市|移动 ++36.136.24.0|36.141.255.255|中国|0|0|0|移动 + 36.142.0.0|36.142.1.255|中国|0|四川省|成都市|移动 + 36.142.2.0|36.142.31.255|中国|0|甘肃省|兰州市|移动 + 36.142.32.0|36.142.127.255|中国|0|甘肃省|0|移动 +@@ -320906,8 +320910,7 @@ + 100.47.160.0|100.47.191.255|美国|0|密歇根|0|美国电话电报 + 100.47.192.0|100.47.255.255|美国|0|0|0|美国电话电报 + 100.48.0.0|100.63.255.255|美国|0|0|0|Sprint +-100.64.0.0|100.122.255.255|0|0|0|内网IP|内网IP +-100.123.0.0|100.127.255.255|0|0|0|内网IP|内网IP ++100.64.0.0|100.127.255.255|0|0|0|内网IP|内网IP + 100.128.0.0|100.255.255.255|美国|0|0|0|T-Mobile + 101.0.0.0|101.0.3.255|中国|0|福建省|福州市|电信 + 101.0.4.0|101.0.7.255|印度尼西亚|0|东爪哇|泗水|0 + ``` + +### 测试五: 测试将多个IP数据并成一个IP数据 +``` +$ cat -n 1.txt + 1 + 2 1.0.16.0|1.0.127.255|测试归属地 + 3 +$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt +took: 1.83s +$ git diff ../../data/ +diff --git a/data/ip.merge.txt b/data/ip.merge.txt +index 8976bd3..acc27a5 100644 +--- a/data/ip.merge.txt ++++ b/data/ip.merge.txt +@@ -3,10 +3,7 @@ + 1.0.1.0|1.0.3.255|中国|0|福建省|福州市|电信 + 1.0.4.0|1.0.7.255|澳大利亚|0|维多利亚|墨尔本|0 + 1.0.8.0|1.0.15.255|中国|0|广东省|广州市|电信 +-1.0.16.0|1.0.31.255|日本|0|0|0|0 +-1.0.32.0|1.0.63.255|中国|0|广东省|广州市|电信 +-1.0.64.0|1.0.79.255|日本|0|广岛县|0|0 +-1.0.80.0|1.0.127.255|日本|0|冈山县|0|0 ++1.0.16.0|1.0.127.255|测试归属地 + 1.0.128.0|1.0.128.255|泰国|0|清莱府|0|TOT + 1.0.129.0|1.0.132.191|泰国|0|曼谷|曼谷|TOT + 1.0.132.192|1.0.132.255|泰国|0|Nakhon-Ratchasima|0|TOT +@@ -320906,8 +320903,7 @@ + 100.47.160.0|100.47.191.255|美国|0|密歇根|0|美国电话电报 + 100.47.192.0|100.47.255.255|美国|0|0|0|美国电话电报 + 100.48.0.0|100.63.255.255|美国|0|0|0|Sprint +-100.64.0.0|100.122.255.255|0|0|0|内网IP|内网IP +-100.123.0.0|100.127.255.255|0|0|0|内网IP|内网IP ++100.64.0.0|100.127.255.255|0|0|0|内网IP|内网IP + 100.128.0.0|100.255.255.255|美国|0|0|0|T-Mobile + 101.0.0.0|101.0.3.255|中国|0|福建省|福州市|电信 + 101.0.4.0|101.0.7.255|印度尼西亚|0|东爪哇|泗水|0 + ``` + diff --git a/maker/cpp/xdb_edit.cc b/maker/cpp/xdb_edit.cc new file mode 100644 index 00000000..2279aa2a --- /dev/null +++ b/maker/cpp/xdb_edit.cc @@ -0,0 +1,247 @@ + +#include "xdb_edit.h" + +#include +#include +#include +#include +#include + +#include +#include + +static void log_exit(const std::string& msg) { + std::cout << msg << std::endl; + exit(-1); +} + +static unsigned long long get_time() { + struct timeval tv1; + gettimeofday(&tv1, NULL); + return (unsigned long long)tv1.tv_sec * 1000 * 1000 + tv1.tv_usec; +} + +static bool ip2uint(const char* buf, unsigned int& ip) { + struct in_addr addr; + if (inet_pton(AF_INET, buf, &addr) == 0) + return false; + // 网络字节序为大端存储, 在此转换为小端存储 + ip = (((addr.s_addr >> 0) & 0xFF) << 24) | + (((addr.s_addr >> 8) & 0xFF) << 16) | + (((addr.s_addr >> 16) & 0xFF) << 8) | + (((addr.s_addr >> 24) & 0xFF) << 0); + return true; +} + +static std::string uint2ip(unsigned int ip) { + char buf[16]; + snprintf(buf, + sizeof(buf), + "%d.%d.%d.%d", + (ip >> 24) & 0xFF, + (ip >> 16) & 0xFF, + (ip >> 8) & 0xFF, + ip & 0xFF); + return std::string(buf); +} + +static void handle_ip_txt(const std::string& file_name, + std::list& regions) { + FILE* f = fopen(file_name.data(), "r"); + if (f == NULL) + log_exit("can't open " + file_name); + + char buf[1024]; + while (fgets(buf, sizeof(buf), f) != NULL) { + unsigned int buf_len = strlen(buf); + // 去掉多余的空 + while (buf_len > 0 && isspace(buf[buf_len - 1])) + --buf_len; + if (buf_len == 0) + continue; + buf[buf_len] = '\0'; + regions.push_back(xdb_node_t(buf)); + } + + fclose(f); +} + +// xdb_node_t +xdb_node_t::xdb_node_t() { +} + +xdb_node_t::xdb_node_t(char* buf) { + char* pos1 = strchr(buf, '|'); + + if (pos1 == NULL) + log_exit("invalid data: " + std::string(buf)); + char* pos2 = strchr(pos1 + 1, '|'); + if (pos2 == NULL) + log_exit("invalid data: " + std::string(buf)); + *pos1 = '\0'; + *pos2 = '\0'; + + region = pos2 + 1; + if (!ip2uint(buf, ip1) || !ip2uint(pos1 + 1, ip2) || ip1 > ip2 || + region.empty()) { + *pos1 = *pos2 = '|'; + log_exit(std::string("invalid data: ") + buf); + } +} + +bool xdb_node_t::operator<(const xdb_node_t& rhs) const { + if (ip1 < rhs.ip1) + return true; + if (ip1 > rhs.ip1) + return false; + return ip2 < rhs.ip2; +} + +std::string xdb_node_t::to_string() const { + return uint2ip(ip1) + "|" + uint2ip(ip2) + "|" + region; +} + +void xdb_edit_t::handle_new_file(const std::string& file_name) { + // 输入 + handle_ip_txt(file_name, new_regions); + + // 排序 + new_regions.sort(); + // 检验及其去重 + auto it = new_regions.begin(); + + for (;;) { + if (it == new_regions.end()) + break; + auto next = it; + ++next; + if (next == new_regions.end()) + break; + if (it->ip1 == next->ip1 || it->ip2 >= next->ip1) { + // 数据重叠 + if (it->region != next->region) + log_exit("数据有二义性: " + it->to_string() + ", " + + next->to_string()); + it->ip2 = std::max(it->ip2, next->ip2); + new_regions.erase(next); + } else if (it->ip2 + 1 == next->ip1 && it->region == next->region) { + // 数据连接 + it->ip2 = next->ip2; + new_regions.erase(next); + } else { + ++it; + } + } +} + +void xdb_edit_t::handle_old_file(const std::string& file_name) { + handle_ip_txt(file_name, old_regions); +} + +void xdb_edit_t::merge() { + auto it1 = old_regions.begin(); + auto it2 = new_regions.begin(); + + for (;;) { + if (it2 == new_regions.end()) + break; + if (it2->ip1 > it2->ip2) { + // 失效数据 + ++it2; + continue; + } + while (it1->ip2 < it2->ip1) + ++it1; + if (it1->ip2 <= it2->ip2) { + xdb_node_t node; + node.ip1 = it2->ip1; + node.ip2 = it1->ip2; + node.region = it2->region; + + it1->ip2 = node.ip1 - 1; + it2->ip1 = node.ip2 + 1; + + // std::cout << "insert: " << node.to_string() << + // std::endl; + ++it1; + it1 = old_regions.insert(it1, node); + ++it1; + } else { + xdb_node_t node; + node.ip1 = it2->ip2 + 1; + node.ip2 = it1->ip2; + node.region = it1->region; + + it1->ip2 = it2->ip1 - 1; + + // std::cout << "insert: " << it2->to_string() << + // std::endl; + ++it1; + it1 = old_regions.insert(it1, *it2); + + ++it1; + it1 = old_regions.insert(it1, node); + + ++it2; + } + } +} + +void xdb_edit_t::write_old_file(const std::string& file_name) { + FILE* f = fopen(file_name.data(), "w"); + if (f == NULL) + log_exit("can't open " + file_name); + + auto it = old_regions.begin(); + + // 删除非法的数据 + for (;;) { + if (it == old_regions.end()) + break; + if (it->ip1 > it->ip2) + it = old_regions.erase(it); + else + ++it; + } + + // 合并数据域相同的相邻数据 + it = old_regions.begin(); + for (;;) { + if (it == old_regions.end()) + break; + auto next = it; + ++next; + if (next == old_regions.end()) + break; + if (it->region == next->region) { + it->ip2 = next->ip2; + old_regions.erase(next); + } else { + ++it; + } + } + + for (auto& d : old_regions) { + std::string res = + uint2ip(d.ip1) + "|" + uint2ip(d.ip2) + "|" + d.region + "\n"; + fputs(res.data(), f); + } + + fclose(f); +} + +xdb_edit_t::xdb_edit_t(const std::string& file_name_old, + const std::string& file_name_new) { + unsigned long long tv1 = get_time(); + + handle_new_file(file_name_new); + handle_old_file(file_name_old); + merge(); + write_old_file(file_name_old); + + unsigned long long tv2 = get_time(); + + double took = (tv2 - tv1) * 1.0 / 1000 / 1000; + + printf("took: %.2fs\n", took); +} diff --git a/maker/cpp/xdb_edit.h b/maker/cpp/xdb_edit.h new file mode 100644 index 00000000..7fb86408 --- /dev/null +++ b/maker/cpp/xdb_edit.h @@ -0,0 +1,35 @@ +#ifndef XDB_EDIT_H +#define XDB_EDIT_H + +#include +#include + +struct xdb_node_t { + unsigned int ip1; + unsigned int ip2; + std::string region; + + xdb_node_t(); + xdb_node_t(char* buf); + + bool operator<(const xdb_node_t& rhs) const; + + std::string to_string() const; +}; + +class xdb_edit_t { + public: + xdb_edit_t(const std::string& file_name_old, + const std::string& file_name_new); + + private: + void handle_new_file(const std::string& file_name); + void handle_old_file(const std::string& file_name); + void merge(); + void write_old_file(const std::string& file_name); + + std::list old_regions; + std::list new_regions; +}; + +#endif diff --git a/maker/cpp/xdb_edit_test.cc b/maker/cpp/xdb_edit_test.cc new file mode 100644 index 00000000..c1d3216e --- /dev/null +++ b/maker/cpp/xdb_edit_test.cc @@ -0,0 +1,50 @@ + +#include "xdb_edit.h" + +#include +#include + +#include + +void print_help() { + printf("./xdb_edit [command options]\n"); + printf("options:\n"); + printf(" --old filename old source ip text file path\n"); + printf(" --new filename new source ip text file path\n"); + exit(-1); +} + +int main(int argc, char* argv[]) { + struct option long_options[] = { + {"new", required_argument, 0, 'n'}, + {"old", required_argument, 0, 'o'}, + {"help", no_argument, 0, 'h'}, + {0, 0, 0, 0 } + }; + + std::string file_name_old = "../../data/ip.merge.txt"; + std::string file_name_new = "./1.txt"; + + while (1) { + int c = getopt_long(argc, argv, "", long_options, NULL); + if (c == -1) + break; + switch (c) { + case 'n': + file_name_new = optarg; + break; + case 'h': + print_help(); + break; + case 'o': + file_name_old = optarg; + break; + case '?': + exit(-1); + } + } + + xdb_edit_t xdb(file_name_old, file_name_new); + + return 0; +} diff --git a/maker/cpp/xdb_make.cc b/maker/cpp/xdb_make.cc new file mode 100644 index 00000000..dea887f6 --- /dev/null +++ b/maker/cpp/xdb_make.cc @@ -0,0 +1,240 @@ + +#include "xdb_make.h" + +#include +#include +#include +#include + +#include + +static void log_exit(const std::string &msg) { + std::cout << msg << std::endl; + exit(-1); +} + +static unsigned long long get_time() { + struct timeval tv1; + gettimeofday(&tv1, NULL); + return (unsigned long long)tv1.tv_sec * 1000 * 1000 + tv1.tv_usec; +} + +static void write_uint(unsigned int data, char buf[]) { + buf[0] = (data >> 0) & 0xFF; + buf[1] = (data >> 8) & 0xFF; + buf[2] = (data >> 16) & 0xFF; + buf[3] = (data >> 24) & 0xFF; +} + +static void write_uint(unsigned int data, FILE *dst) { + char buf[4]; + write_uint(data, buf); + fwrite(buf, 1, sizeof(buf), dst); +} + +static void write_ushort(unsigned short data, char buf[]) { + buf[0] = (data >> 0) & 0xFF; + buf[1] = (data >> 8) & 0xFF; +} + +static void write_ushort(unsigned short data, FILE *dst) { + char buf[2]; + write_ushort(data, buf); + fwrite(buf, 1, sizeof(buf), dst); +} + +static void write_string(const char *buf, unsigned int len, FILE *dst) { + fwrite(buf, 1, len, dst); +} + +static bool ip2uint(const char *buf, unsigned int &ip) { + struct in_addr addr; + if (inet_pton(AF_INET, buf, &addr) == 0) + return false; + // 网络字节序为大端存储, 在此转换为小端存储 + ip = (((addr.s_addr >> 0) & 0xFF) << 24) | + (((addr.s_addr >> 8) & 0xFF) << 16) | + (((addr.s_addr >> 16) & 0xFF) << 8) | + (((addr.s_addr >> 24) & 0xFF) << 0); + return true; +} + +static std::string uint2ip(unsigned int ip) { + char buf[16]; + snprintf(buf, + sizeof(buf), + "%d.%d.%d.%d", + (ip >> 24) & 0xFF, + (ip >> 16) & 0xFF, + (ip >> 8) & 0xFF, + ip & 0xFF); + return std::string(buf); +} + +void xdb_make_t::vector_index_push_back(unsigned int row, + unsigned int col, + unsigned int ip1, + unsigned int ip2, + const char *region_str) { + char buf[8]; + write_uint(ip1, buf); + write_uint(ip2, buf + 4); + + vector_index[row][col].push_back(std::make_pair( + std::string(buf, sizeof(buf)), region_str)); +} + +void xdb_make_t::vector_index_push_back(unsigned int ip1, + unsigned int ip2, + const char *region_str) { + unsigned int ip1_1 = (ip1 >> 24) & 0xFF; + unsigned int ip1_2 = (ip1 >> 16) & 0xFF; + unsigned int ip2_1 = (ip2 >> 24) & 0xFF; + unsigned int ip2_2 = (ip2 >> 16) & 0xFF; + + if (ip1_1 == ip2_1 && ip1_2 == ip2_2) { + vector_index_push_back(ip1_1, ip1_2, ip1, ip2, region_str); + return; + } + + vector_index_push_back(ip1_1, ip1_2, ip1, ip1 | 0x0000FFFF, region_str); + vector_index_push_back(ip2_1, ip2_2, ip2 & 0xFFFF0000, ip2, region_str); + + for (;;) { + ++ip1_2; + if (ip1_2 == 256) { + ++ip1_1; + ip1_2 = 0; + } + if (ip1_1 == ip2_1 && ip1_2 == ip2_2) + break; + ip1 = (ip1_1 << 24) | (ip1_2 << 16); + vector_index_push_back(ip1_1, ip1_2, ip1, ip1 | 0x0000FFFF, region_str); + } +} + +void xdb_make_t::handle_input_help(char *buf) { + static unsigned int region_index = vector_index_length + header_length; + static unsigned int next_ip = 0; + + // 去掉多余的空 + unsigned int buf_len = strlen(buf); + while (buf_len > 0 && isspace(buf[buf_len - 1])) + --buf_len; + if (buf_len == 0) + return; + buf[buf_len] = '\0'; + + char *pos1 = strchr(buf, '|'); + + if (pos1 == NULL) + log_exit("invalid data: " + std::string(buf)); + char *pos2 = strchr(pos1 + 1, '|'); + if (pos2 == NULL) + log_exit("invalid data: " + std::string(buf)); + *pos1 = '\0'; + *pos2 = '\0'; + + const char *region_str = pos2 + 1; + + unsigned int ip1, ip2; + if (!ip2uint(buf, ip1) || !ip2uint(pos1 + 1, ip2) || ip1 > ip2 || + *region_str == '\0') { + *pos1 = *pos2 = '|'; + log_exit(std::string("invalid data: ") + buf); + } + + if (next_ip != ip1) + log_exit("ip 不连续: " + uint2ip(ip1)); + next_ip = ip2 + 1; + + if (region.find(region_str) == region.end()) { + region[region_str] = region_index; + region_index += strlen(region_str); + } + + vector_index_push_back(ip1, ip2, region_str); +} + +void xdb_make_t::handle_input(const std::string &file_name) { + FILE *src = fopen(file_name.data(), "r"); + if (src == NULL) + log_exit("can't open " + file_name); + + char buf[1024]; + while (fgets(buf, sizeof(buf), src) != NULL) + handle_input_help(buf); + fclose(src); +} + +void xdb_make_t::handle_header() { + char buf[header_length]; + memset(buf, 0, header_length); + write_ushort(2, buf); // 版本号 + write_ushort(1, buf + 2); // 缓存策略 + write_uint(time(NULL), buf + 4); // 时间 + // 索引 + unsigned int content_left = header_length + vector_index_length; + for (auto &d : region) + content_left += d.first.size(); + unsigned int content_right = content_left; + + for (int i = 0; i < vector_index_rows; ++i) + for (int j = 0; j < vector_index_cols; ++j) + content_right += vector_index[i][j].size() * segment_index_size; + content_right -= segment_index_size; + write_uint(content_left, buf + 8); + write_uint(content_right, buf + 12); + write_string(buf, header_length, dst); +} + +void xdb_make_t::handle_vector_index() { + unsigned int index = header_length + vector_index_length; + for (auto &d : region) + index += d.first.size(); + for (unsigned i = 0; i < vector_index_rows; ++i) + for (unsigned j = 0; j < vector_index_cols; ++j) { + write_uint(index, dst); + index += segment_index_size * vector_index[i][j].size(); + write_uint(index, dst); + } +} + +void xdb_make_t::handle_region() { + for (auto &d : region) { + fseek(dst, d.second, SEEK_SET); + write_string(d.first.data(), d.first.size(), dst); + } +} + +void xdb_make_t::handle_content() { + fseek(dst, 0, SEEK_END); + for (unsigned i = 0; i < vector_index_rows; ++i) + for (unsigned j = 0; j < vector_index_cols; ++j) + for (auto d : vector_index[i][j]) { + write_string(d.first.data(), d.first.size(), dst); + write_ushort(d.second.size(), dst); + write_uint(region[d.second], dst); + } +} + +xdb_make_t::xdb_make_t(const std::string &file_name_src, + const std::string &file_name_dst) { + unsigned long long tv1 = get_time(); + + handle_input(file_name_src); + + dst = fopen(file_name_dst.data(), "w"); + if (dst == NULL) + log_exit("can't open " + std::string(file_name_dst)); + + handle_header(); + handle_vector_index(); + handle_region(); + handle_content(); + + fclose(dst); + + unsigned long long tv2 = get_time(); + printf("took: %.2fs\n", (tv2 - tv1) * 1.0 / 1000 / 1000); +} diff --git a/maker/cpp/xdb_make.h b/maker/cpp/xdb_make.h new file mode 100644 index 00000000..4aea7fca --- /dev/null +++ b/maker/cpp/xdb_make.h @@ -0,0 +1,48 @@ +#ifndef XDB_MAKE_H +#define XDB_MAKE_H + +#include + +#include +#include +#include + +class xdb_make_t { + public: + xdb_make_t(const std::string &file_name_src, + const std::string &file_name_dst); + + private: + void vector_index_push_back(unsigned int row, + unsigned int col, + unsigned int ip1, + unsigned int ip2, + const char *region); + void vector_index_push_back(unsigned int ip1, + unsigned int ip2, + const char *region); + void handle_input_help(char buf[]); + void handle_input(const std::string &file_name); + + void handle_header(); + void handle_vector_index(); + void handle_region(); + void handle_content(); + + static constexpr int header_length = 256; + static constexpr int vector_index_rows = 256; + static constexpr int vector_index_cols = 256; + static constexpr int vector_index_size = 8; + static constexpr int vector_index_length = + vector_index_rows * vector_index_cols * vector_index_size; + static constexpr int segment_index_size = 14; + + FILE *dst = NULL; + + std::vector> + vector_index[vector_index_rows][vector_index_cols]; + + std::unordered_map region; +}; + +#endif diff --git a/maker/cpp/xdb_make_test.cc b/maker/cpp/xdb_make_test.cc new file mode 100644 index 00000000..b763d045 --- /dev/null +++ b/maker/cpp/xdb_make_test.cc @@ -0,0 +1,50 @@ + +#include "xdb_make.h" + +#include +#include + +#include + +void print_help() { + printf("./xdb_make [command options]\n"); + printf("options:\n"); + printf(" --db string ip2region binary xdb file path\n"); + printf(" --src string source ip text file path\n"); + exit(-1); +} + +int main(int argc, char* argv[]) { + struct option long_options[] = { + {"db", required_argument, 0, 'd'}, + {"src", required_argument, 0, 's'}, + {"help", no_argument, 0, 'h'}, + {0, 0, 0, 0 } + }; + + std::string file_name_dst = "./ip2region.xdb"; + std::string file_name_src = "../../data/ip.merge.txt"; + + while (1) { + int c = getopt_long(argc, argv, "", long_options, NULL); + if (c == -1) + break; + switch (c) { + case 'd': + file_name_dst = optarg; + break; + case 'h': + print_help(); + break; + case 's': + file_name_src = optarg; + break; + case '?': + exit(-1); + } + } + + xdb_make_t xdb(file_name_src, file_name_dst); + + return 0; +}