Skip to content

Commit

Permalink
feat: add subcli replace and transpose
Browse files Browse the repository at this point in the history
  • Loading branch information
sharkLoc committed Apr 26, 2024
1 parent 8343da6 commit bcfc1e5
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 13 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "xtab"
version = "0.0.4"
version = "0.0.5"
authors = ["sharkLoc <[email protected]>"]
edition = "2021"
homepage = "https://github.com/sharkLoc/xtab"
Expand Down
17 changes: 8 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ cargo b --release

```bash
xtab -- CSV command line utilities
Version: 0.0.4
Version: 0.0.5

Authors: sharkLoc <[email protected]>
Source code: https://github.com/sharkLoc/xtab.git
Expand All @@ -46,30 +46,29 @@ Commands:
flatten flattened view of CSV records [aliases: flat]
freq Build frequency table of selected column in CSV data
head Print first N records from CSV file
pretty Convert CSV to a readable aligned table
pretty Convert CSV to a readable aligned table [aliases: prt]
replace Replace data of matched fields
reverse Reverses rows of CSV data [aliases: rev]
sample Randomly select rows from CSV file using reservoir sampling
search Applies the regex to each field individually and shows only matching rows
slice Slice rows from a part of a CSV file
tail Print last N records from CSV file
transpose Transpose CSV data [aliases: trans]
uniq Unique data with keys
xlsx2csv Convert XLSX to CSV format [aliases: x2c]
view Show CSV file content
help Print this message or the help of the given subcommand(s)


Options:
-h, --help Print help (see more with '--help')
-V, --version Print version
-h, --help Print help (see more with '--help')

Global Arguments:
-o, --out <FILE> Output file name, file ending in .gz/.bz2/.xz will be compressed automatically, if file not specified write data to stdout
-d, --delimiter <CHAR> Set delimiter for input csv file, e.g., -d $'\t' for tab [default: ,]
-D, --out-delimite <CHAR> Set delimiter for output CSV file, e.g., -D $'\t' for tab [default: ,]
-d, --delimiter <CHAR> Set delimiter for input csv file, e.g., in linux -d $'\t' for tab, in powershell -d `t for tab [default: ,]
-D, --out-delimite <CHAR> Set delimiter for output CSV file, e.g., in linux -D $'\t' for tab, in powershell -D `t for tab [default: ,]
--log <FILE> If file name specified, write log message to this file, or write to stderr
--compress-level <INT> Set compression level 1 (compress faster) - 9 (compress better) for gzip/bzip2/xz output file, just work with option -o/--out [default: 6]
-v, --verbosity <STR> Control verbosity of logging, possible values: {error, warn, info, debug, trace} [default: debug]
[CSV] Input csv file name, if file not specified read data from stdin
[CSV] Input CSV file name, if file not specified read data from stdin
Global FLAGS:
-H, --no-header If set, the first row is treated as a special header row, and the original header row excluded from output
Expand Down
30 changes: 29 additions & 1 deletion src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use clap::{value_parser, Parser};
#[command(
name = "xtab",
author = "sharkLoc",
version = "0.0.4",
version = "0.0.5",
next_line_help = false,
about = "CSV command line utilities",
long_about = "A simple and cross-platform program for CSV file manipulation"
Expand Down Expand Up @@ -152,6 +152,7 @@ pub enum Cmd {
},

/// Convert CSV to a readable aligned table
#[command(visible_alias = "prt")]
pretty {
/// Set the whole table width
#[arg(short = 'w', long = "width-table", value_name = "INT", value_parser = value_parser!(u16).range(0..=65535))]
Expand All @@ -167,6 +168,25 @@ pub enum Cmd {
header: bool,
},

/// Replace data of matched fields
replace {
/// Select columns index, e.g -c 2,3,5
#[arg(short = 'c', long = "col-index", value_name = "STR", default_value_t = String::from("1"))]
col_index: String,
/// Raw cell content
#[arg(short = 's', long = "src", value_name = "STR")]
src: String,
/// New cell content
#[arg(short = 'd', long = "dst", value_name = "STR")]
dst: String,
/// If set, replace data in whole CSV file, overwrite option -c
#[arg(short = 'a', long = "all", help_heading = Some("FLAGS"))]
all: bool,
/// Output file name, file ending in .gz/.bz2/.xz will be compressed automatically, if file not specified write data to stdout
#[arg(short = 'o', long = "out", value_name = "FILE")]
output: Option<PathBuf>,
},

/// Reverses rows of CSV data
#[command(visible_alias = "rev")]
reverse {
Expand Down Expand Up @@ -236,6 +256,14 @@ pub enum Cmd {
output: Option<PathBuf>,
},

/// Transpose CSV data
#[command(visible_alias = "trans")]
transpose {
/// Output file name, file ending in .gz/.bz2/.xz will be compressed automatically, if file not specified write data to stdout
#[arg(short = 'o', long = "out", value_name = "FILE")]
output: Option<PathBuf>,
},

/// Unique data with keys
uniq {
/// Select these fields as keys. e.g -k 2,3,5
Expand Down
2 changes: 2 additions & 0 deletions src/command/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pub mod flatten;
pub mod freq;
pub mod head;
pub mod pretty;
pub mod replace;
pub mod reverse;
pub mod sample;
pub mod search;
Expand All @@ -14,6 +15,7 @@ pub mod slice;
//pub mod split;
//pub mod stats;
pub mod tail;
pub mod transpose;
pub mod uniq;
pub mod view;
pub mod xlsx2csv;
85 changes: 85 additions & 0 deletions src/command/replace.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
use crate::utils::*;
use anyhow::{Error, Ok};
use csv::{ReaderBuilder, StringRecord, WriterBuilder};
use log::*;
use std::{path::PathBuf, time::Instant};


pub fn replace_csv(
no_header: bool,
delimiter: u8,
out_delimiter: u8,
index_str: &str,
src: &str,
dst: &str,
all: bool,
csv: Option<PathBuf>,
csvo: Option<PathBuf>,
compression_level: u32,
) -> Result<(), Error> {
let start = Instant::now();

let mut csv_reader = ReaderBuilder::new()
.has_headers(no_header)
.flexible(true)
.delimiter(delimiter)
.from_reader(file_reader(csv.as_ref())?);

let mut col_index = vec![];
for idx in index_str.split(',').collect::<Vec<&str>>() {
let idx = idx.parse::<usize>()?;
if col_index.contains(&idx) {
warn!("duplicate columns index {}, keep first one", idx);
continue;
} else {
col_index.push(idx);
}
if idx == 0 {
error!("col_index error : {}, start from 1", idx);
std::process::exit(1);
}
}

match csv {
Some(csv) => info!("read file from: {:?}", csv),
None => info!("read file from stdin "),
}

let mut csv_writer = WriterBuilder::new()
.has_headers(no_header)
.delimiter(out_delimiter)
.from_writer(file_writer(csvo.as_ref(), compression_level)?);

let mut rec_new = StringRecord::new();
let mut count = 0usize;
for rec in csv_reader.records().flatten() {
for (idx,each) in rec.iter().enumerate() {
if all {
if each == src {
rec_new.push_field(dst);
count += 1;
} else {
rec_new.push_field(each);
}
} else {
if col_index.contains(&(idx + 1)) {
if each == src {
rec_new.push_field(dst);
count += 1;
} else {
rec_new.push_field(each);
}
} else {
rec_new.push_field(each);
}
}
}
csv_writer.write_record(&rec_new)?;
rec_new.clear();
}
csv_writer.flush()?;

info!("total replace cell count: {}", count);
info!("time elapsed is: {:?}", start.elapsed());
Ok(())
}
54 changes: 54 additions & 0 deletions src/command/transpose.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
use crate::utils::*;
use anyhow::{Error, Ok};
use csv::{ReaderBuilder, StringRecord, WriterBuilder};
use log::*;
use std::{collections::HashMap, path::PathBuf, time::Instant};


pub fn transpose_csv(
no_header: bool,
delimiter: u8,
out_delimiter: u8,
csv: Option<PathBuf>,
csvo: Option<PathBuf>,
compression_level: u32,
) -> Result<(), Error> {
let start = Instant::now();

let mut csv_reader = ReaderBuilder::new()
.has_headers(no_header)
.flexible(true)
.delimiter(delimiter)
.from_reader(file_reader(csv.as_ref())?);

match csv {
Some(csv) => info!("read file from: {:?}", csv),
None => info!("read file from stdin "),
}

let mut df_hash: HashMap<usize, Vec<String>> = HashMap::new();
for rec in csv_reader.records().flatten() {
for (col, each) in rec.iter().enumerate() {
df_hash.entry(col).or_default().push(each.to_string());
}
}

let mut csv_writer = WriterBuilder::new()
.has_headers(no_header)
.delimiter(out_delimiter)
.from_writer(file_writer(csvo.as_ref(), compression_level)?);

let mut str_rec = StringRecord::new();
for i in 0..df_hash.len() {
let vec = df_hash.get(&i).unwrap();
for v in vec.iter() {
str_rec.push_field(v);
}
csv_writer.write_record(&str_rec)?;
str_rec.clear();
}
csv_writer.flush()?;

info!("time elapsed is: {:?}", start.elapsed());
Ok(())
}
8 changes: 7 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ mod command;
mod utils;

use command::{
addheader::addheader_csv, csv2xlsx::csv_xlsx, dim::dim_csv, drop::drop_csv, flatten::flatten_csv, freq::freq_csv, head::head_csv, pretty::pretty_csv, reverse::reverse_csv, sample::sample_csv, search::search_csv, slice::slice_csv, tail::tail_csv, uniq::uniq_csv, view::view_csv, xlsx2csv::xlsx_csv
addheader::addheader_csv, csv2xlsx::csv_xlsx, dim::dim_csv, drop::drop_csv, flatten::flatten_csv, freq::freq_csv, head::head_csv, pretty::pretty_csv, replace::replace_csv, reverse::reverse_csv, sample::sample_csv, search::search_csv, slice::slice_csv, tail::tail_csv, transpose::transpose_csv, uniq::uniq_csv, view::view_csv, xlsx2csv::xlsx_csv
};


Expand Down Expand Up @@ -66,6 +66,12 @@ fn main() -> Result<(), Error>{
args::Cmd::search { pat, case, invert, output } => {
search_csv(cmd.no_header, cmd.delimiter as u8, cmd.out_delimite as u8, case, invert, &pat, cmd.input, output, cmd.compression_level)?;
}
args::Cmd::transpose { output } => {
transpose_csv(cmd.no_header, cmd.delimiter as u8, cmd.out_delimite as u8, cmd.input, output, cmd.compression_level)?;
}
args::Cmd::replace { col_index, src, dst, all, output } => {
replace_csv(cmd.no_header, cmd.delimiter as u8, cmd.out_delimite as u8, &col_index, &src, &dst, all, cmd.input, output, cmd.compression_level)?;
}
}

Ok(())
Expand Down

0 comments on commit bcfc1e5

Please sign in to comment.