Skip to content

Commit

Permalink
Replace old align -W mode with something much faster. (Keep --prLen
Browse files Browse the repository at this point in the history
as an option even though it is not very relevant here.)
  • Loading branch information
c-blake committed Jul 5, 2023
1 parent 5a71779 commit 001fe79
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 0 deletions.
2 changes: 2 additions & 0 deletions nio.nimble
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ when defined(windows):
"utils/lp2term", # length-prefix => terminated
"utils/nmatch", # print out lines prefixed by number of matches
"utils/transpose", # transpose a text matrix
"utils/nsv", # number of entries in SV data
]
else:
bin = @[
Expand All @@ -33,4 +34,5 @@ else:
"utils/lp2term", # length-prefix => terminated
"utils/nmatch", # print out lines prefixed by number of matches
"utils/transpose", # transpose a text matrix
"utils/nsv", # number of entries in SV data
]
40 changes: 40 additions & 0 deletions utils/nsv.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import cligen, cligen/[osUt, textUt, mslice, mfile]

proc nsv(input="-", delim="\t", maxCol=0, prLen=false, bulkSt=false,
convert=false, sepOut="\t"): int =
## N)umber of S)eparated V)alues. This is useful in what people lately call
## ETL contexts -- in service of building pre-parsed binary files. You can
## almost generate a parsing schema for fixed width text fields by massaging:
## `paste <(head -n1 < $datSV | tr , \\\\n) <(tail -n+2 < $datSV | nsv -b)`
## `tail -n+2 < $datSV | nsv -c | bu/cstats ..` can give detailed width stats.
if not bulkSt and not convert:
raise newException(HelpError, "No work! Full ${HELP}")
template maxEq(mx,x) = mx = max(mx,x) # Accumulate in analogy with += | *=
var n, m: int # Input line number, Num of columns
let dlm = delim.initSep # Compile delim string into a splitter
var cols: seq[MSlice]
var ws: seq[int]
for line in input.mSlices: # stdio RO mmap | slices
inc n # COLLECT WIDTHS & DATA
dlm.split line, cols, maxCol
m.maxEq cols.len
if bulkSt and ws.len < m: ws.setLen m
for j, col in cols:
let w = if prLen: printedLen(col.toOpenArrayChar) else: col.len
if convert:
if j > 0: outu sepOut
outu w
if bulkSt: ws[j].maxEq w
if convert: outu '\n'
if bulkSt:
for w in ws: outu w, '\n'
erru n, " rows ", m, " cols\n"

when isMainModule: include cligen/mergeCfgEnv; dispatch nsv, help={
"input" : "path to mmap|read as input; \"-\" => stdin",
"delim" : "inp delim chars; Any repeats => foldable",
"maxCol" : "max columns to form for aligning;0=unlimited",
"prLen" : "adjust for ANSI SGR/utf8 in assessing width",
"bulkSt" : "emit number of rows & columns seen to stderr",
"convert": "convert every field into just its width",
"sepOut" : "separator for converted output table"}

0 comments on commit 001fe79

Please sign in to comment.