diff --git a/application/apps/indexer/Cargo.toml b/application/apps/indexer/Cargo.toml index 6f6eb94b2..d613e1732 100644 --- a/application/apps/indexer/Cargo.toml +++ b/application/apps/indexer/Cargo.toml @@ -2,7 +2,8 @@ resolver = "2" members = [ - "addon", + "addons/dlt-tools", + "addons/file-tools", "indexer_base", "indexer_cli", "merging", diff --git a/application/apps/indexer/addon/Cargo.toml b/application/apps/indexer/addons/dlt-tools/Cargo.toml similarity index 70% rename from application/apps/indexer/addon/Cargo.toml rename to application/apps/indexer/addons/dlt-tools/Cargo.toml index f654a76ab..8b6ca9001 100644 --- a/application/apps/indexer/addon/Cargo.toml +++ b/application/apps/indexer/addons/dlt-tools/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "addon" +name = "dlt-tools" version = "0.1.0" edition = "2021" @@ -8,11 +8,11 @@ dlt-core = "0.14" env_logger = "0.10" futures = "0.3" chrono = { version = "0.4", features = ["serde"] } -indexer_base = { path = "../indexer_base" } +indexer_base = { path = "../../indexer_base" } log = "0.4.17" -parsers = { path = "../parsers" } -processor = { path = "../processor" } -sources = { path = "../sources" } +parsers = { path = "../../parsers" } +processor = { path = "../../processor" } +sources = { path = "../../sources" } thiserror = "1.0" tokio = { version = "1.17", features = ["full"] } tokio-util = {version = "0.7", features = ["codec", "net"] } diff --git a/application/apps/indexer/addon/src/lib.rs b/application/apps/indexer/addons/dlt-tools/src/lib.rs similarity index 99% rename from application/apps/indexer/addon/src/lib.rs rename to application/apps/indexer/addons/dlt-tools/src/lib.rs index 9da9ed61d..528ed0304 100644 --- a/application/apps/indexer/addon/src/lib.rs +++ b/application/apps/indexer/addons/dlt-tools/src/lib.rs @@ -116,7 +116,7 @@ mod tests { use super::*; use std::path::Path; - const DLT_FT_SAMPLE: &str = "../../../../application/developing/resources/attachments.dlt"; + const DLT_FT_SAMPLE: &str = "../../../../../application/developing/resources/attachments.dlt"; #[tokio::test] async fn test_scan_dlt_ft() { diff --git a/application/apps/indexer/addons/file-tools/Cargo.toml b/application/apps/indexer/addons/file-tools/Cargo.toml new file mode 100644 index 000000000..4f88e42a3 --- /dev/null +++ b/application/apps/indexer/addons/file-tools/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "file-tools" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1.0" diff --git a/application/apps/indexer/addons/file-tools/src/lib.rs b/application/apps/indexer/addons/file-tools/src/lib.rs new file mode 100644 index 000000000..d0144f2c8 --- /dev/null +++ b/application/apps/indexer/addons/file-tools/src/lib.rs @@ -0,0 +1,76 @@ +use anyhow::Result; +use std::{ + fs::{metadata, File}, + io::Read, + path::Path, + str::from_utf8, +}; + +const BYTES_TO_READ: u64 = 10240; + +pub fn is_binary(file_path: &Path) -> Result { + let chunks = fetch_starting_chunk(file_path); + let buffer = match chunks { + Ok(buffer) => buffer, + Err(err) => return Err(err), + }; + + let result = from_utf8(&buffer); + match result { + Ok(_file_content) => Ok(false), + Err(_err) => Ok(true), + } +} + +fn fetch_starting_chunk(file_path: &Path) -> Result> { + let file = File::open(file_path)?; + let file_length: u64 = metadata(file_path)?.len() - 1; + let file_length = if BYTES_TO_READ < file_length { + BYTES_TO_READ + } else { + file_length + }; + + let mut file = file.take(file_length); + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer)?; + Ok(buffer) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_fetch_starting_chunk() -> Result<()> { + let chunks: Vec = fetch_starting_chunk(Path::new( + "../../../../developing/resources/chinese_poem.txt", + ))?; + assert_eq!(chunks[0..5], [32, 32, 32, 32, 229]); + Ok(()) + } + + #[test] + fn test_is_binary() -> Result<()> { + assert!(is_binary(Path::new( + "../../../../developing/resources/attachments.dlt" + ))?); + assert!(is_binary(Path::new( + "../../../../developing/resources/someip.pcap" + ))?); + assert!(is_binary(Path::new( + "../../../../developing/resources/someip.pcapng" + ))?); + + assert!(!is_binary(Path::new( + "../../../../developing/resources/chinese_poem.txt" + ))?); + assert!(!is_binary(Path::new( + "../../../../developing/resources/sample_utf_8.txt" + ))?); + assert!(!is_binary(Path::new( + "../../../../developing/resources/someip.xml" + ))?); + Ok(()) + } +} diff --git a/application/apps/indexer/indexer_cli/Cargo.toml b/application/apps/indexer/indexer_cli/Cargo.toml index 5bf18756a..3931f316b 100644 --- a/application/apps/indexer/indexer_cli/Cargo.toml +++ b/application/apps/indexer/indexer_cli/Cargo.toml @@ -18,7 +18,7 @@ indexer_base = { path = "../indexer_base" } indicatif = "0.17" lazy_static = "1.4" log = "0.4" -addon = { path = "../addon" } +dlt-tools = { path = "../addons/dlt-tools" } parsers = { path = "../parsers" } processor = { path = "../processor" } session = { path = "../session" } diff --git a/application/apps/indexer/indexer_cli/src/main.rs b/application/apps/indexer/indexer_cli/src/main.rs index dc267b788..c1cce1b97 100644 --- a/application/apps/indexer/indexer_cli/src/main.rs +++ b/application/apps/indexer/indexer_cli/src/main.rs @@ -21,7 +21,6 @@ extern crate lazy_static; mod interactive; use crate::interactive::handle_interactive_session; -use addon::{extract_dlt_ft, scan_dlt_ft}; use anyhow::{anyhow, Result}; use crossbeam_channel as cc; use crossbeam_channel::unbounded; @@ -31,6 +30,7 @@ use dlt_core::{ parse::DltParseError, statistics::{collect_dlt_stats, count_dlt_messages as count_dlt_messages_old}, }; +use dlt_tools::{extract_dlt_ft, scan_dlt_ft}; use env_logger::Env; use futures::{pin_mut, stream::StreamExt}; use indexer_base::{config::*, error_reporter::*, progress::IndexingResults}; diff --git a/application/developing/resources/chinese_poem.txt b/application/developing/resources/chinese_poem.txt new file mode 100644 index 000000000..6cc69d622 --- /dev/null +++ b/application/developing/resources/chinese_poem.txt @@ -0,0 +1,22 @@ + 和毛泽东 <<重上井冈山>>. 严永欣, 一九八八年. + + 久有归天愿 + 终过鬼门关 + 千里来寻归宿 + 春华变苍颜 + 到处群魔乱舞 + 更有妖雾盘绕 + 暗道入阴间 + 过了阎王殿 + 险处不须看 + + 风雷动 + 旌旗奋 + 忆人寰 + 八十三年过去 + 弹指一挥间 + 中原千军逐蒋 + 城楼万众检阅 + 褒贬满载还 + 世上无难事 + 只怕我癫痫 diff --git a/application/developing/resources/sample_utf_8.txt b/application/developing/resources/sample_utf_8.txt new file mode 100644 index 000000000..a159f58d6 --- /dev/null +++ b/application/developing/resources/sample_utf_8.txt @@ -0,0 +1,180 @@ +Original by Markus Kuhn, adapted for HTML by Martin Dürst. + +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] — 1999-08-20 + + +The ASCII compatible UTF-8 encoding of ISO 10646 and Unicode +plain-text files is defined in RFC 2279 and in ISO 10646-1 Annex R. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and Sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), + + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (A ⇔ B), + + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ 14.95 € │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ +