-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add subcli replace and transpose
- Loading branch information
Showing
8 changed files
with
187 additions
and
13 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[package] | ||
name = "xtab" | ||
version = "0.0.4" | ||
version = "0.0.5" | ||
authors = ["sharkLoc <[email protected]>"] | ||
edition = "2021" | ||
homepage = "https://github.com/sharkLoc/xtab" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,7 +23,7 @@ cargo b --release | |
|
||
```bash | ||
xtab -- CSV command line utilities | ||
Version: 0.0.4 | ||
Version: 0.0.5 | ||
|
||
Authors: sharkLoc <[email protected]> | ||
Source code: https://github.com/sharkLoc/xtab.git | ||
|
@@ -46,30 +46,29 @@ Commands: | |
flatten flattened view of CSV records [aliases: flat] | ||
freq Build frequency table of selected column in CSV data | ||
head Print first N records from CSV file | ||
pretty Convert CSV to a readable aligned table | ||
pretty Convert CSV to a readable aligned table [aliases: prt] | ||
replace Replace data of matched fields | ||
reverse Reverses rows of CSV data [aliases: rev] | ||
sample Randomly select rows from CSV file using reservoir sampling | ||
search Applies the regex to each field individually and shows only matching rows | ||
slice Slice rows from a part of a CSV file | ||
tail Print last N records from CSV file | ||
transpose Transpose CSV data [aliases: trans] | ||
uniq Unique data with keys | ||
xlsx2csv Convert XLSX to CSV format [aliases: x2c] | ||
view Show CSV file content | ||
help Print this message or the help of the given subcommand(s) | ||
|
||
|
||
Options: | ||
-h, --help Print help (see more with '--help') | ||
-V, --version Print version | ||
-h, --help Print help (see more with '--help') | ||
|
||
Global Arguments: | ||
-o, --out <FILE> Output file name, file ending in .gz/.bz2/.xz will be compressed automatically, if file not specified write data to stdout | ||
-d, --delimiter <CHAR> Set delimiter for input csv file, e.g., -d $'\t' for tab [default: ,] | ||
-D, --out-delimite <CHAR> Set delimiter for output CSV file, e.g., -D $'\t' for tab [default: ,] | ||
-d, --delimiter <CHAR> Set delimiter for input csv file, e.g., in linux -d $'\t' for tab, in powershell -d `t for tab [default: ,] | ||
-D, --out-delimite <CHAR> Set delimiter for output CSV file, e.g., in linux -D $'\t' for tab, in powershell -D `t for tab [default: ,] | ||
--log <FILE> If file name specified, write log message to this file, or write to stderr | ||
--compress-level <INT> Set compression level 1 (compress faster) - 9 (compress better) for gzip/bzip2/xz output file, just work with option -o/--out [default: 6] | ||
-v, --verbosity <STR> Control verbosity of logging, possible values: {error, warn, info, debug, trace} [default: debug] | ||
[CSV] Input csv file name, if file not specified read data from stdin | ||
[CSV] Input CSV file name, if file not specified read data from stdin | ||
Global FLAGS: | ||
-H, --no-header If set, the first row is treated as a special header row, and the original header row excluded from output | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
use crate::utils::*; | ||
use anyhow::{Error, Ok}; | ||
use csv::{ReaderBuilder, StringRecord, WriterBuilder}; | ||
use log::*; | ||
use std::{path::PathBuf, time::Instant}; | ||
|
||
|
||
pub fn replace_csv( | ||
no_header: bool, | ||
delimiter: u8, | ||
out_delimiter: u8, | ||
index_str: &str, | ||
src: &str, | ||
dst: &str, | ||
all: bool, | ||
csv: Option<PathBuf>, | ||
csvo: Option<PathBuf>, | ||
compression_level: u32, | ||
) -> Result<(), Error> { | ||
let start = Instant::now(); | ||
|
||
let mut csv_reader = ReaderBuilder::new() | ||
.has_headers(no_header) | ||
.flexible(true) | ||
.delimiter(delimiter) | ||
.from_reader(file_reader(csv.as_ref())?); | ||
|
||
let mut col_index = vec![]; | ||
for idx in index_str.split(',').collect::<Vec<&str>>() { | ||
let idx = idx.parse::<usize>()?; | ||
if col_index.contains(&idx) { | ||
warn!("duplicate columns index {}, keep first one", idx); | ||
continue; | ||
} else { | ||
col_index.push(idx); | ||
} | ||
if idx == 0 { | ||
error!("col_index error : {}, start from 1", idx); | ||
std::process::exit(1); | ||
} | ||
} | ||
|
||
match csv { | ||
Some(csv) => info!("read file from: {:?}", csv), | ||
None => info!("read file from stdin "), | ||
} | ||
|
||
let mut csv_writer = WriterBuilder::new() | ||
.has_headers(no_header) | ||
.delimiter(out_delimiter) | ||
.from_writer(file_writer(csvo.as_ref(), compression_level)?); | ||
|
||
let mut rec_new = StringRecord::new(); | ||
let mut count = 0usize; | ||
for rec in csv_reader.records().flatten() { | ||
for (idx,each) in rec.iter().enumerate() { | ||
if all { | ||
if each == src { | ||
rec_new.push_field(dst); | ||
count += 1; | ||
} else { | ||
rec_new.push_field(each); | ||
} | ||
} else { | ||
if col_index.contains(&(idx + 1)) { | ||
if each == src { | ||
rec_new.push_field(dst); | ||
count += 1; | ||
} else { | ||
rec_new.push_field(each); | ||
} | ||
} else { | ||
rec_new.push_field(each); | ||
} | ||
} | ||
} | ||
csv_writer.write_record(&rec_new)?; | ||
rec_new.clear(); | ||
} | ||
csv_writer.flush()?; | ||
|
||
info!("total replace cell count: {}", count); | ||
info!("time elapsed is: {:?}", start.elapsed()); | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
use crate::utils::*; | ||
use anyhow::{Error, Ok}; | ||
use csv::{ReaderBuilder, StringRecord, WriterBuilder}; | ||
use log::*; | ||
use std::{collections::HashMap, path::PathBuf, time::Instant}; | ||
|
||
|
||
pub fn transpose_csv( | ||
no_header: bool, | ||
delimiter: u8, | ||
out_delimiter: u8, | ||
csv: Option<PathBuf>, | ||
csvo: Option<PathBuf>, | ||
compression_level: u32, | ||
) -> Result<(), Error> { | ||
let start = Instant::now(); | ||
|
||
let mut csv_reader = ReaderBuilder::new() | ||
.has_headers(no_header) | ||
.flexible(true) | ||
.delimiter(delimiter) | ||
.from_reader(file_reader(csv.as_ref())?); | ||
|
||
match csv { | ||
Some(csv) => info!("read file from: {:?}", csv), | ||
None => info!("read file from stdin "), | ||
} | ||
|
||
let mut df_hash: HashMap<usize, Vec<String>> = HashMap::new(); | ||
for rec in csv_reader.records().flatten() { | ||
for (col, each) in rec.iter().enumerate() { | ||
df_hash.entry(col).or_default().push(each.to_string()); | ||
} | ||
} | ||
|
||
let mut csv_writer = WriterBuilder::new() | ||
.has_headers(no_header) | ||
.delimiter(out_delimiter) | ||
.from_writer(file_writer(csvo.as_ref(), compression_level)?); | ||
|
||
let mut str_rec = StringRecord::new(); | ||
for i in 0..df_hash.len() { | ||
let vec = df_hash.get(&i).unwrap(); | ||
for v in vec.iter() { | ||
str_rec.push_field(v); | ||
} | ||
csv_writer.write_record(&str_rec)?; | ||
str_rec.clear(); | ||
} | ||
csv_writer.flush()?; | ||
|
||
info!("time elapsed is: {:?}", start.elapsed()); | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters