Skip to content

Commit 117880f

Browse files
committed
v2
1 parent 0dc40ea commit 117880f

File tree

6 files changed

+158
-72
lines changed

6 files changed

+158
-72
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
## Unreleased
44

5+
## v2.0.0 - 24 September 2024
6+
- Now there are four public functions, `to_lists`, `to_dicts`, `from_lists` and `from_dicts`.
7+
58
## v1.4.0 - 29 March 2024
69
- Fix bug where trailing comma was causing error
710

README.md

+10-5
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,8 @@
33
[![Package Version](https://img.shields.io/hexpm/v/gsv)](https://hex.pm/packages/gsv)
44
[![Hex Docs](https://img.shields.io/badge/hex-docs-ffaff3)](https://hexdocs.pm/gsv/)
55

6-
This is a simple csv parser and writer for gleam. It will get more performant in the future,
7-
but if you're looking for high performance now, I'd recommend doing ffi to an existing parser
8-
in your target runtime.
9-
10-
We are using the grammar from [rfc 4180](https://datatracker.ietf.org/doc/html/rfc4180#section-2)
6+
This is a simple csv parser and writer for Gleam. It will get more performant/battle tested in the future,
7+
but if you're looking for that now, I'd recommend doing ffi to an existing parser in your target runtime.
118

129
#### Example
1310

@@ -23,6 +20,14 @@ pub fn main() {
2320
// Write a List(List(String)) to a CSV string
2421
let csv_str = records
2522
|> gsv.from_lists(separator: ",", line_ending: Windows)
23+
24+
// Parse a CSV string with headers to a List(Dict(String, String))
25+
let assert Ok(records) = gsv.to_dicts(csv_str)
26+
// => [ dict.from_list([ #("Hello", "Goodbye"), #("World", "Mars") ]) ]
27+
28+
// Write a List(Dict(String, String)) to a CSV string, treating the keys as the header row
29+
let csv_str = records
30+
|> gsv.from_dicts(separator: ",", line_ending: Windows)
2631
}
2732
```
2833

gleam.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name = "gsv"
2-
version = "1.4.0"
2+
version = "2.0.0"
33
gleam = ">= 0.32.0"
44
description = "A simple csv parser and generator written in gleam "
55

src/gsv.gleam

+76-38
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,18 @@
1+
import gleam/dict.{type Dict}
12
import gleam/int
23
import gleam/list
4+
import gleam/pair
35
import gleam/result
46
import gleam/string
57
import gsv/internal/ast.{ParseError}
68
import gsv/internal/token.{Location}
79

8-
/// Parses a csv string to a list of lists of strings.
9-
/// Automatically handles Windows and Unix line endings.
10-
pub fn to_lists(input: String) -> Result(List(List(String)), Nil) {
11-
input
12-
|> token.scan
13-
|> token.with_location
14-
|> ast.parse
15-
|> result.nil_error
16-
}
17-
18-
/// Parses a csv string to a list of lists of strings.
19-
/// Automatically handles Windows and Unix line endings.
20-
/// Panics with an error msg if the string is not valid csv.
21-
pub fn to_lists_or_panic(input: String) -> List(List(String)) {
22-
let res =
23-
input
24-
|> token.scan
25-
|> token.with_location
26-
|> ast.parse
27-
28-
case res {
29-
Ok(lol) -> lol
30-
Error(ParseError(Location(line, column), msg)) -> {
31-
panic as {
32-
"["
33-
<> "line "
34-
<> int.to_string(line)
35-
<> " column "
36-
<> int.to_string(column)
37-
<> "] of csv: "
38-
<> msg
39-
}
40-
[[]]
41-
}
42-
}
43-
}
44-
4510
/// Parses a csv string to a list of lists of strings.
4611
/// Automatically handles Windows and Unix line endings.
4712
/// Returns a string error msg if the string is not valid csv.
48-
pub fn to_lists_or_error(input: String) -> Result(List(List(String)), String) {
13+
/// Unquoted strings are trimmed, while quoted strings have leading and trailing
14+
/// whitespace preserved.
15+
pub fn to_lists(input: String) -> Result(List(List(String)), String) {
4916
input
5017
|> token.scan
5118
|> token.with_location
@@ -62,6 +29,43 @@ pub fn to_lists_or_error(input: String) -> Result(List(List(String)), String) {
6229
})
6330
}
6431

32+
/// Parses a csv string to a list of dicts.
33+
/// Automatically handles Windows and Unix line endings.
34+
/// Returns a string error msg if the string is not valid csv.
35+
/// Unquoted strings are trimmed, while quoted strings have leading and trailing
36+
/// whitespace preserved.
37+
/// Whitespace only or empty strings are not valid headers and will be ignored.
38+
/// Whitespace only or empty strings are not considered "present" in the csv row and
39+
/// are not inserted into the row dict.
40+
pub fn to_dicts(input: String) -> Result(List(Dict(String, String)), String) {
41+
use lol <- result.try(to_lists(input))
42+
case lol {
43+
[] -> []
44+
[headers, ..rows] -> {
45+
let headers =
46+
list.index_fold(headers, dict.new(), fn(acc, x, i) {
47+
case string.trim(x) == "" {
48+
True -> acc
49+
False -> dict.insert(acc, i, x)
50+
}
51+
})
52+
53+
list.map(rows, fn(row) {
54+
use acc, x, i <- list.index_fold(row, dict.new())
55+
case dict.get(headers, i) {
56+
Error(Nil) -> acc
57+
Ok(h) ->
58+
case string.trim(x) {
59+
"" -> acc
60+
t -> dict.insert(acc, string.trim(h), t)
61+
}
62+
}
63+
})
64+
}
65+
}
66+
|> Ok
67+
}
68+
6569
/// Option for using "\n = LF = Unix" or "\r\n = CRLF = Windows"
6670
/// line endings. Use with the `from_lists` function when
6771
/// writing to a csv string.
@@ -107,3 +111,37 @@ pub fn from_lists(
107111
|> list.map(fn(row) { string.join(row, separator) })
108112
|> string.join(le_to_string(line_ending))
109113
}
114+
115+
/// Takes a list of dicts and writes it to a csv string.
116+
/// Will automatically escape strings that contain double quotes or
117+
/// line endings with double quotes (in csv, double quotes get escaped by doing
118+
/// a double doublequote)
119+
/// The string `he"llo\n` becomes `"he""llo\n"`
120+
pub fn from_dicts(
121+
input: List(Dict(String, String)),
122+
separator separator: String,
123+
line_ending line_ending: LineEnding,
124+
) -> String {
125+
case input {
126+
[] -> ""
127+
[first_row, ..] -> {
128+
let headers =
129+
first_row
130+
|> dict.to_list
131+
|> list.map(pair.first)
132+
|> list.sort(string.compare)
133+
134+
let rows =
135+
list.map(input, fn(row) {
136+
row
137+
|> dict.to_list
138+
|> list.sort(fn(lhs, rhs) {
139+
string.compare(pair.first(lhs), pair.first(rhs))
140+
})
141+
|> list.map(pair.second)
142+
})
143+
144+
from_lists([headers, ..rows], separator, line_ending)
145+
}
146+
}
147+
}

src/gsv/internal/ast.gleam

+20-11
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import gleam/list
1212
import gleam/result
13+
import gleam/string
1314
import gsv/internal/token.{
1415
type CsvToken, type Location, CR, Comma, Doublequote, LF, Location, Textdata,
1516
}
@@ -53,7 +54,7 @@ fn parse_p(
5354

5455
// File should begin with either Escaped or Nonescaped string
5556
[#(Textdata(str), _), ..remaining_tokens], Beginning, [] ->
56-
parse_p(remaining_tokens, JustParsedField, [[str]])
57+
parse_p(remaining_tokens, JustParsedField, [[string.trim(str)]])
5758

5859
[#(Doublequote, _), ..remaining_tokens], Beginning, [] ->
5960
parse_p(remaining_tokens, InsideEscapedString, [[""]])
@@ -94,39 +95,44 @@ fn parse_p(
9495
// (indicating an empty string)
9596
[#(Textdata(str), _), ..remaining_tokens],
9697
JustParsedComma,
97-
[curr_line, ..previously_parsed_lines] ->
98+
[curr_line, ..previously_parsed_lines]
99+
->
98100
parse_p(remaining_tokens, JustParsedField, [
99-
[str, ..curr_line],
101+
[string.trim(str), ..curr_line],
100102
..previously_parsed_lines
101103
])
102104

103105
[#(Doublequote, _), ..remaining_tokens],
104106
JustParsedComma,
105-
[curr_line, ..previously_parsed_lines] ->
107+
[curr_line, ..previously_parsed_lines]
108+
->
106109
parse_p(remaining_tokens, InsideEscapedString, [
107110
["", ..curr_line],
108111
..previously_parsed_lines
109112
])
110113

111114
[#(Comma, _), ..remaining_tokens],
112115
JustParsedComma,
113-
[curr_line, ..previously_parsed_lines] ->
116+
[curr_line, ..previously_parsed_lines]
117+
->
114118
parse_p(remaining_tokens, JustParsedComma, [
115119
["", ..curr_line],
116120
..previously_parsed_lines
117121
])
118122

119123
[#(CR, _), ..remaining_tokens],
120124
JustParsedComma,
121-
[curr_line, ..previously_parsed_lines] ->
125+
[curr_line, ..previously_parsed_lines]
126+
->
122127
parse_p(remaining_tokens, JustParsedCR, [
123128
["", ..curr_line],
124129
..previously_parsed_lines
125130
])
126131

127132
[#(LF, _), ..remaining_tokens],
128133
JustParsedComma,
129-
[curr_line, ..previously_parsed_lines] ->
134+
[curr_line, ..previously_parsed_lines]
135+
->
130136
parse_p(remaining_tokens, JustParsedNewline, [
131137
["", ..curr_line],
132138
..previously_parsed_lines
@@ -141,11 +147,12 @@ fn parse_p(
141147

142148
// If we just parsed a new line, we're expecting an escaped or non-escaped string
143149
[#(Textdata(str), _), ..remaining_tokens], JustParsedNewline, llf ->
144-
parse_p(remaining_tokens, JustParsedField, [[str], ..llf])
150+
parse_p(remaining_tokens, JustParsedField, [[string.trim(str)], ..llf])
145151

146152
[#(Doublequote, _), ..remaining_tokens],
147153
JustParsedNewline,
148-
[curr_line, ..previously_parsed_lines] ->
154+
[curr_line, ..previously_parsed_lines]
155+
->
149156
parse_p(remaining_tokens, InsideEscapedString, [
150157
["", ..curr_line],
151158
..previously_parsed_lines
@@ -162,7 +169,8 @@ fn parse_p(
162169
// but a double double quote "" escapes the double quote and we keep parsing
163170
[#(Doublequote, _), #(Doublequote, _), ..remaining_tokens],
164171
InsideEscapedString,
165-
[[str, ..rest_curr_line], ..previously_parsed_lines] ->
172+
[[str, ..rest_curr_line], ..previously_parsed_lines]
173+
->
166174
parse_p(remaining_tokens, InsideEscapedString, [
167175
[str <> "\"", ..rest_curr_line],
168176
..previously_parsed_lines
@@ -173,7 +181,8 @@ fn parse_p(
173181

174182
[#(other_token, _), ..remaining_tokens],
175183
InsideEscapedString,
176-
[[str, ..rest_curr_line], ..previously_parsed_lines] ->
184+
[[str, ..rest_curr_line], ..previously_parsed_lines]
185+
->
177186
parse_p(remaining_tokens, InsideEscapedString, [
178187
[str <> token.to_lexeme(other_token), ..rest_curr_line],
179188
..previously_parsed_lines

0 commit comments

Comments
 (0)