Skip to content

Commit

Permalink
Merge pull request #333 from KevinWL/master
Browse files Browse the repository at this point in the history
feat: Implement the generation tool of Ip2region in the Rust .
  • Loading branch information
lionsoul2014 authored Mar 14, 2024
2 parents 5826ba6 + f793a48 commit c82399f
Show file tree
Hide file tree
Showing 6 changed files with 321 additions and 0 deletions.
1 change: 1 addition & 0 deletions ReadMe.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ API 介绍,使用文档和测试程序请参考如下 `maker` 生成程序下
|     | 待开始 | [c](maker/c) | ANSC c xdb 生成程序实现 | [Lion](https://github.com/lionsoul2014) |
| :white_check_mark: | 已完成 | [python](maker/python) | python xdb 生成程序实现 | [leolin49](https://github.com/leolin49) |
| :white_check_mark: | 已完成 | [csharp](maker/csharp) | csharp xdb 生成程序实现 | [Alan Lee](https://github.com/malus2077) |
| :white_check_mark: | 已完成 | [rust](maker/rust) | rust xdb 生成程序实现 | [KevinWang](https://github.com/KevinWL) |


# `xdb` 数据更新
Expand Down
32 changes: 32 additions & 0 deletions maker/rust/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# ip2region xdb rust 生成实现

## 使用方法
* 当前目录下maker子目录下执行 `cargo build --release` 编译生成工具
* 目标生成在targe/release 目录下 maker
* 使用方法:
```
Usage: maker --in-file <IN_FILE> --out-file <OUT_FILE>
Options:
-i, --in-file <IN_FILE>
-o, --out-file <OUT_FILE>
-h, --help Print help
-V, --version Print version
```

例如,通过默认的 data/ip.merge.txt 原数据,在target目录下生成一个 ip2region.xdb 二进制文件:

```
kevin@ubuntu ~/i/m/r/m/t/release (master)> ./maker -i ../../../../../data/ip.merge.txt -o ip2region.xdb
load 683844 lines
try to write the segment index ptr ...
write done, dataBlocks: 13827, IndexBlock: 683844, indexPtr: (983587, 11070069)
Done, elpsed: 0m7s
```

## 数据查询/bench 测试
* 通过将以上步骤生成的二进制文件和python 生成工具生成的二进制文件进行二进制比对,除时间戳位置不同,其它均相同。

![](./vimdiff.png)
* 已经完成开发的 [binding](../../binding/) 都有查询和 bench 测试程序以及使用文档,你可以使用你熟悉的语言的 searcher 进行查询测试或者bench测试,来确认数据的正确性和完整性。

12 changes: 12 additions & 0 deletions maker/rust/maker/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "maker"
version = "0.1.0"
edition = "2021"
authors = ["Kevin Wang <[email protected]>"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
clap = { version = "4.4.18", features = ["derive"] }
lazy_static = "1.4.0"
once_cell = "1.19.0"
61 changes: 61 additions & 0 deletions maker/rust/maker/src/ip_value.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
use std::error::Error;
use std::net::Ipv4Addr;
use std::str::FromStr;

pub trait ToUIntIP {
fn to_u32_ip(&self) -> Result<u32, Box<dyn Error>>;
}

impl ToUIntIP for u32 {
fn to_u32_ip(&self) -> Result<u32, Box<dyn Error>> {
Ok(self.to_owned())
}
}

impl ToUIntIP for &str {
fn to_u32_ip(&self) -> Result<u32, Box<dyn Error>> {
if let Ok(ip_addr) = Ipv4Addr::from_str(self) {
return Ok(u32::from(ip_addr));
}
Ok(self.parse::<u32>()?)
}
}

impl ToUIntIP for Ipv4Addr {
fn to_u32_ip(&self) -> Result<u32, Box<dyn Error>> {
Ok(u32::from(*self))
}
}

#[cfg(test)]
mod test_ip {
use super::*;

#[test]
fn test_ip_str_2_u32() {
let ip_str = "1.1.1.1";
let result = ip_str.to_u32_ip().unwrap();
assert_eq!(result, 1 << 24 | 1 << 16 | 1 << 8 | 1);
}

#[test]
fn test_ip_u32_str() {
let ip = "12";
let result = ip.to_u32_ip().unwrap();
assert_eq!(result, 12);
}

#[test]
fn test_ip_u32() {
let ip: u32 = 33;
let result = ip.to_u32_ip().unwrap();
assert_eq!(result, 33);
}

#[test]
fn test_ip_addr() {
let ip = Ipv4Addr::from_str("0.0.3.12").unwrap();
let result = ip.to_u32_ip().unwrap();
assert_eq!(result, 3 << 8 | 12)
}
}
215 changes: 215 additions & 0 deletions maker/rust/maker/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
use clap::Parser;
use std::path::PathBuf;
use std::fs::File;
use std::io::{BufReader, BufRead, Error, Write, Seek, SeekFrom};
use lazy_static::lazy_static;
use std::collections::HashMap;
use std::sync::Mutex;
mod ip_value;
pub use self::ip_value::ToUIntIP;
use std::time::{SystemTime, UNIX_EPOCH, Instant};

const HEADER_LEN:u32 = 256;
const VECTOR_INDEX_LEN:u32 = 256*256*8;
const SEGMENT_INDEX_BLOCK_SIZE:u32 = 14;
static mut START_INDEX_POS:u32 = 0;
static mut END_INDEX_POS:u32 = 0;
const PROTOCAL:u16 = 2;
const INDEX_POLICY:u16 = 1;

lazy_static! {
static ref REG_MAP: Mutex<HashMap<String, u32>> = Mutex::new(HashMap::new());
}

lazy_static! {
static ref V_SEG: Mutex<Vec<Segment>> = Mutex::new(Vec::new());
}

#[derive(Parser)]
#[command(author="Kevin Wang <[email protected]>", version="2.0")]
#[command(help_template = " Author: {author-with-newline} {about-section}Version: {version} \n {usage-heading} {usage} \n {all-args} {tab}")]
struct Args {
#[arg(short, long)]
in_file: PathBuf,
#[arg(short, long)]
out_file: PathBuf,
}

struct Segment {
sip: u32,
eip: u32,
reg: String,
}

#[derive(Debug, Clone, Copy)]
struct IndexBlock {
first_pos: u32,
last_pos: u32,
}

impl Segment {
fn new(sip: u32, eip: u32, reg: String) -> Segment {
Segment {sip, eip, reg}
}
}

fn load_segments(in_file: PathBuf) -> std::io::Result<String> {
let in_f = File::open(in_file)?;
let reader = BufReader::new(in_f);
let mut count = 0;
let last_eip = 0;
for line in reader.lines() {
let line = line?;
let v: Vec<&str> = line.splitn(3, '|').collect();
if v.len() != 3 {
panic!("invalid ip segment line '{}'", line)
}
let sip = v[0].to_u32_ip().expect(&format!("invalid ip address '{}' in line {}", v[0], line));
let eip = v[1].to_u32_ip().expect(&format!("invalid ip address '{}' in line {}", v[0], line));
if sip > eip {
panic!("start ip({}) should not be greater than end ip({})", sip, eip);
}
if v[2].len() < 1 {
panic!("empty region info in segment line `{}`", line);
}
// Check the continuity of data segment
if last_eip != 0 && last_eip + 1 != sip {
panic!("discontinuous data segment: last.eip+1!=seg.sip in line {}", line);
}
let segment = Segment::new(sip, eip, v[2].to_string());
V_SEG.lock().unwrap().push(segment);

count += 1;
}
return Ok(count.to_string());
}

fn write_region(out_fd: &mut File) -> std::io::Result<()> {
out_fd.seek(SeekFrom::Start((HEADER_LEN + VECTOR_INDEX_LEN).into()))?;
let v_seg = V_SEG.lock().unwrap();
for seg in v_seg.iter() {
if REG_MAP.lock().unwrap().get(&seg.reg) == None {
let pos = out_fd.stream_position()?;
REG_MAP.lock().unwrap().insert(seg.reg.to_string(), pos as u32);
out_fd.write(seg.reg.as_bytes())?;
}
}
return Ok(());
}

fn split_ip(sip: u32, eip: u32, reg: String) -> Vec<Segment> {
let s1 = sip >> 24 & 0xFF;
let s2 = sip >> 16 & 0xFF;
//let s3 = sip >> 8 & 0xFF;
//let s4 = sip & 0xFF;

let e1 = eip >> 24 & 0xFF;
let e2 = eip >> 16 & 0xFF;
//let e3 = eip >> 8 & 0xFF;
//let e4 = eip & 0xFF;

let mut node_list: Vec<Segment> = Vec::new();

// println!("split:{}.{}.{}.{}~{}.{}.{}.{}", s1,s2,s3,s4,e1,e2,e3,e4);
for i in s1..e1+1 {
for j in (if i == s1 {s2} else {0})..(if i == e1 {e2+1} else {256}){
let s_ip = if i == s1 && j == s2 {
sip
}else {
((i << 24) & 0xff000000) | (j << 16 & 0xff0000)
};
let e_ip = if i == e1 && j == e2 {
eip
}else {
((i << 24) & 0xff000000) | ((j << 16) &0xff0000) | 0xffff
};
node_list.push(Segment{sip:s_ip, eip:e_ip, reg:reg.to_string()});
}
}

return node_list;
}

fn set_vector_index(arr: &mut [[IndexBlock; 256]; 256], ip:u32, block_pos:u32) {
let row: usize = (ip >> 24 & 0xff) as usize;
let col: usize = (ip >> 16 & 0xff ) as usize;
let vi_block = &mut arr[row][col];
if vi_block.first_pos == 0 {
vi_block.first_pos = block_pos;
}
vi_block.last_pos = block_pos + SEGMENT_INDEX_BLOCK_SIZE;
}

fn write_index_block(out_fd: &mut File) -> std::io::Result<()> {
let v_seg = V_SEG.lock().unwrap();
let mut index_arr :[[IndexBlock; 256]; 256] = [[IndexBlock{first_pos: 0, last_pos: 0}; 256]; 256];
for seg in v_seg.iter() {
let pos = REG_MAP.lock().unwrap().get(&seg.reg).copied().unwrap();
let node_list = split_ip(seg.sip, seg.eip, seg.reg.to_string());
for node in node_list {
let block_pos = out_fd.stream_position()?;
out_fd.write(&node.sip.to_le_bytes())?;
out_fd.write(&node.eip.to_le_bytes())?;
out_fd.write(&(node.reg.len() as u16).to_le_bytes())?;
out_fd.write(&pos.to_le_bytes())?;
set_vector_index(&mut index_arr, node.sip, block_pos as u32);
unsafe {
if START_INDEX_POS == 0 {
START_INDEX_POS = block_pos as u32;
}
END_INDEX_POS = block_pos as u32;
}
}
}
println!("try to write the segment index ptr ...");

out_fd.seek(SeekFrom::Start(HEADER_LEN.into()))?;
for i in 0..256 {
for j in 0..256 {
let index = index_arr[i][j];
out_fd.write(&index.first_pos.to_le_bytes())?;
out_fd.write(&index.last_pos.to_le_bytes())?;
}
}
return Ok(());
}

fn write_header(out_fd: &mut File) -> std::io::Result<()> {
out_fd.seek(SeekFrom::Start(0))?;
out_fd.write(&PROTOCAL.to_le_bytes())?;
out_fd.write(&INDEX_POLICY.to_le_bytes())?;
let now = SystemTime::now();
let timestamp = now.duration_since(UNIX_EPOCH).expect("Time went backwards").as_secs() as u32;
out_fd.write(&timestamp.to_le_bytes())?;
unsafe {
out_fd.write(&START_INDEX_POS.to_le_bytes())?;
out_fd.write(&END_INDEX_POS.to_le_bytes())?;
}

return Ok(())

}

fn main() -> Result<(), Error> {
let args = Args::parse();
let now = Instant::now();
match load_segments(args.in_file) {
Ok(result) => println!("load {} lines", result),
Err(err) => println!("{}", err),
}
let mut out_fd = File::create(args.out_file).unwrap();
write_region(&mut out_fd)?;
write_index_block(&mut out_fd)?;
write_header(&mut out_fd)?;
unsafe {
println!("write done, dataBlocks: {}, IndexBlock: {}, indexPtr: ({}, {})",
REG_MAP.lock().unwrap().len(),
V_SEG.lock().unwrap().len(),
START_INDEX_POS, END_INDEX_POS
);
}
let sec = now.elapsed().as_secs();

println!("Done, elpsed: {}m{}s", sec/60, sec%60);
return Ok(());
}
Binary file added maker/rust/vimdiff.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit c82399f

Please sign in to comment.