From 01902a6792e97d869f80c63677a572f6bd29f118 Mon Sep 17 00:00:00 2001 From: KevinWang Date: Fri, 23 Feb 2024 06:33:21 +0000 Subject: [PATCH] feat: Implement the generation tool of Ip2region in the Rust . --- maker/rust/README.md | 14 ++ maker/rust/maker/Cargo.toml | 12 ++ maker/rust/maker/src/ip_value.rs | 61 +++++++++ maker/rust/maker/src/main.rs | 215 +++++++++++++++++++++++++++++++ 4 files changed, 302 insertions(+) create mode 100644 maker/rust/README.md create mode 100644 maker/rust/maker/Cargo.toml create mode 100644 maker/rust/maker/src/ip_value.rs create mode 100644 maker/rust/maker/src/main.rs diff --git a/maker/rust/README.md b/maker/rust/README.md new file mode 100644 index 00000000..0dd1aa19 --- /dev/null +++ b/maker/rust/README.md @@ -0,0 +1,14 @@ +# ip2region xdb rust 生成实现 + +## 使用方法 +* 当前目录下执行 `cargo build --release` 编译生成工具 +* 目标生成在targe/release 目录下 maker +* 使用方法: +``` + Usage: maker --in-file --out-file + Options: + -i, --in-file + -o, --out-file + -h, --help Print help + -V, --version Print version +``` diff --git a/maker/rust/maker/Cargo.toml b/maker/rust/maker/Cargo.toml new file mode 100644 index 00000000..19d60006 --- /dev/null +++ b/maker/rust/maker/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "maker" +version = "0.1.0" +edition = "2021" +authors = ["Kevin Wang "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = { version = "4.4.18", features = ["derive"] } +lazy_static = "1.4.0" +once_cell = "1.19.0" diff --git a/maker/rust/maker/src/ip_value.rs b/maker/rust/maker/src/ip_value.rs new file mode 100644 index 00000000..0a11f92d --- /dev/null +++ b/maker/rust/maker/src/ip_value.rs @@ -0,0 +1,61 @@ +use std::error::Error; +use std::net::Ipv4Addr; +use std::str::FromStr; + +pub trait ToUIntIP { + fn to_u32_ip(&self) -> Result>; +} + +impl ToUIntIP for u32 { + fn to_u32_ip(&self) -> Result> { + Ok(self.to_owned()) + } +} + +impl ToUIntIP for &str { + fn to_u32_ip(&self) -> Result> { + if let Ok(ip_addr) = Ipv4Addr::from_str(self) { + return Ok(u32::from(ip_addr)); + } + Ok(self.parse::()?) + } +} + +impl ToUIntIP for Ipv4Addr { + fn to_u32_ip(&self) -> Result> { + Ok(u32::from(*self)) + } +} + +#[cfg(test)] +mod test_ip { + use super::*; + + #[test] + fn test_ip_str_2_u32() { + let ip_str = "1.1.1.1"; + let result = ip_str.to_u32_ip().unwrap(); + assert_eq!(result, 1 << 24 | 1 << 16 | 1 << 8 | 1); + } + + #[test] + fn test_ip_u32_str() { + let ip = "12"; + let result = ip.to_u32_ip().unwrap(); + assert_eq!(result, 12); + } + + #[test] + fn test_ip_u32() { + let ip: u32 = 33; + let result = ip.to_u32_ip().unwrap(); + assert_eq!(result, 33); + } + + #[test] + fn test_ip_addr() { + let ip = Ipv4Addr::from_str("0.0.3.12").unwrap(); + let result = ip.to_u32_ip().unwrap(); + assert_eq!(result, 3 << 8 | 12) + } +} diff --git a/maker/rust/maker/src/main.rs b/maker/rust/maker/src/main.rs new file mode 100644 index 00000000..3318503f --- /dev/null +++ b/maker/rust/maker/src/main.rs @@ -0,0 +1,215 @@ +use clap::Parser; +use std::path::PathBuf; +use std::fs::File; +use std::io::{BufReader, BufRead, Error, Write, Seek, SeekFrom}; +use lazy_static::lazy_static; +use std::collections::HashMap; +use std::sync::Mutex; +mod ip_value; +pub use self::ip_value::ToUIntIP; +use std::time::{SystemTime, UNIX_EPOCH, Instant}; + +const HEADER_LEN:u32 = 256; +const VECTOR_INDEX_LEN:u32 = 256*256*8; +const SEGMENT_INDEX_BLOCK_SIZE:u32 = 14; +static mut START_INDEX_POS:u32 = 0; +static mut END_INDEX_POS:u32 = 0; +const PROTOCAL:u16 = 2; +const INDEX_POLICY:u16 = 1; + +lazy_static! { + static ref REG_MAP: Mutex> = Mutex::new(HashMap::new()); +} + +lazy_static! { + static ref V_SEG: Mutex> = Mutex::new(Vec::new()); +} + +#[derive(Parser)] +#[command(author="Kevin Wang ", version="2.0")] +#[command(help_template = " Author: {author-with-newline} {about-section}Version: {version} \n {usage-heading} {usage} \n {all-args} {tab}")] +struct Args { + #[arg(short, long)] + in_file: PathBuf, + #[arg(short, long)] + out_file: PathBuf, +} + +struct Segment { + sip: u32, + eip: u32, + reg: String, +} + +#[derive(Debug, Clone, Copy)] +struct IndexBlock { + first_pos: u32, + last_pos: u32, +} + +impl Segment { + fn new(sip: u32, eip: u32, reg: String) -> Segment { + Segment {sip, eip, reg} + } +} + +fn load_segments(in_file: PathBuf) -> std::io::Result { + let in_f = File::open(in_file)?; + let reader = BufReader::new(in_f); + let mut count = 0; + let last_eip = 0; + for line in reader.lines() { + let line = line?; + let v: Vec<&str> = line.splitn(3, '|').collect(); + if v.len() != 3 { + panic!("invalid ip segment line '{}'", line) + } + let sip = v[0].to_u32_ip().expect(&format!("invalid ip address '{}' in line {}", v[0], line)); + let eip = v[1].to_u32_ip().expect(&format!("invalid ip address '{}' in line {}", v[0], line)); + if sip > eip { + panic!("start ip({}) should not be greater than end ip({})", sip, eip); + } + if v[2].len() < 1 { + panic!("empty region info in segment line `{}`", line); + } + // Check the continuity of data segment + if last_eip != 0 && last_eip + 1 != sip { + panic!("discontinuous data segment: last.eip+1!=seg.sip in line {}", line); + } + let segment = Segment::new(sip, eip, v[2].to_string()); + V_SEG.lock().unwrap().push(segment); + + count += 1; + } + return Ok(count.to_string()); +} + +fn write_region(out_fd: &mut File) -> std::io::Result<()> { + out_fd.seek(SeekFrom::Start((HEADER_LEN + VECTOR_INDEX_LEN).into()))?; + let v_seg = V_SEG.lock().unwrap(); + for seg in v_seg.iter() { + if REG_MAP.lock().unwrap().get(&seg.reg) == None { + let pos = out_fd.stream_position()?; + REG_MAP.lock().unwrap().insert(seg.reg.to_string(), pos as u32); + out_fd.write(seg.reg.as_bytes())?; + } + } + return Ok(()); +} + +fn split_ip(sip: u32, eip: u32, reg: String) -> Vec { + let s1 = sip >> 24 & 0xFF; + let s2 = sip >> 16 & 0xFF; + //let s3 = sip >> 8 & 0xFF; + //let s4 = sip & 0xFF; + + let e1 = eip >> 24 & 0xFF; + let e2 = eip >> 16 & 0xFF; + //let e3 = eip >> 8 & 0xFF; + //let e4 = eip & 0xFF; + + let mut node_list: Vec = Vec::new(); + + // println!("split:{}.{}.{}.{}~{}.{}.{}.{}", s1,s2,s3,s4,e1,e2,e3,e4); + for i in s1..e1+1 { + for j in (if i == s1 {s2} else {0})..(if i == e1 {e2+1} else {256}){ + let s_ip = if i == s1 && j == s2 { + sip + }else { + ((i << 24) & 0xff000000) | (j << 16 & 0xff0000) + }; + let e_ip = if i == e1 && j == e2 { + eip + }else { + ((i << 24) & 0xff000000) | ((j << 16) &0xff0000) | 0xffff + }; + node_list.push(Segment{sip:s_ip, eip:e_ip, reg:reg.to_string()}); + } + } + + return node_list; +} + +fn set_vector_index(arr: &mut [[IndexBlock; 256]; 256], ip:u32, block_pos:u32) { + let row: usize = (ip >> 24 & 0xff) as usize; + let col: usize = (ip >> 16 & 0xff ) as usize; + let vi_block = &mut arr[row][col]; + if vi_block.first_pos == 0 { + vi_block.first_pos = block_pos; + } + vi_block.last_pos = block_pos + SEGMENT_INDEX_BLOCK_SIZE; +} + +fn write_index_block(out_fd: &mut File) -> std::io::Result<()> { + let v_seg = V_SEG.lock().unwrap(); + let mut index_arr :[[IndexBlock; 256]; 256] = [[IndexBlock{first_pos: 0, last_pos: 0}; 256]; 256]; + for seg in v_seg.iter() { + let pos = REG_MAP.lock().unwrap().get(&seg.reg).copied().unwrap(); + let node_list = split_ip(seg.sip, seg.eip, seg.reg.to_string()); + for node in node_list { + let block_pos = out_fd.stream_position()?; + out_fd.write(&node.sip.to_le_bytes())?; + out_fd.write(&node.eip.to_le_bytes())?; + out_fd.write(&(node.reg.len() as u16).to_le_bytes())?; + out_fd.write(&pos.to_le_bytes())?; + set_vector_index(&mut index_arr, node.sip, block_pos as u32); + unsafe { + if START_INDEX_POS == 0 { + START_INDEX_POS = block_pos as u32; + } + END_INDEX_POS = block_pos as u32; + } + } + } + println!("try to write the segment index ptr ..."); + + out_fd.seek(SeekFrom::Start(HEADER_LEN.into()))?; + for i in 0..256 { + for j in 0..256 { + let index = index_arr[i][j]; + out_fd.write(&index.first_pos.to_le_bytes())?; + out_fd.write(&index.last_pos.to_le_bytes())?; + } + } + return Ok(()); +} + +fn write_header(out_fd: &mut File) -> std::io::Result<()> { + out_fd.seek(SeekFrom::Start(0))?; + out_fd.write(&PROTOCAL.to_le_bytes())?; + out_fd.write(&INDEX_POLICY.to_le_bytes())?; + let now = SystemTime::now(); + let timestamp = now.duration_since(UNIX_EPOCH).expect("Time went backwards").as_secs() as u32; + out_fd.write(×tamp.to_le_bytes())?; + unsafe { + out_fd.write(&START_INDEX_POS.to_le_bytes())?; + out_fd.write(&END_INDEX_POS.to_le_bytes())?; + } + + return Ok(()) + +} + +fn main() -> Result<(), Error> { + let args = Args::parse(); + let now = Instant::now(); + match load_segments(args.in_file) { + Ok(result) => println!("load {} lines", result), + Err(err) => println!("{}", err), + } + let mut out_fd = File::create(args.out_file).unwrap(); + write_region(&mut out_fd)?; + write_index_block(&mut out_fd)?; + write_header(&mut out_fd)?; + unsafe { + println!("write done, dataBlocks: {}, IndexBlock: {}, indexPtr: ({}, {})", + REG_MAP.lock().unwrap().len(), + V_SEG.lock().unwrap().len(), + START_INDEX_POS, END_INDEX_POS + ); + } + let sec = now.elapsed().as_secs(); + + println!("Done, elpsed: {}m{}s", sec/60, sec%60); + return Ok(()); +}