Skip to content

Commit 58506ae

Browse files
committed
fix: always extract fulliri in nt, add non-ascii test
1 parent ed00a1e commit 58506ae

5 files changed

Lines changed: 53 additions & 65 deletions

File tree

AGENTS.md

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ pathrex/
2727
│ └── formats/
2828
│ ├── mod.rs # FormatError enum, re-exports
2929
│ ├── csv.rs # Csv<R> — CSV → Edge iterator (CsvConfig, ColumnSpec)
30-
│ └── nt.rs # NTriples<R> — N-Triples → Edge iterator (LabelExtraction)
30+
│ └── nt.rs # NTriples<R> — N-Triples → Edge iterator
3131
├── tests/
3232
│ └── inmemory_tests.rs # Integration tests for InMemoryBuilder / InMemoryGraph
3333
├── deps/
@@ -211,24 +211,18 @@ Name-based lookup requires `has_header: true`.
211211

212212
#### `NTriples<R>`
213213

214-
[`NTriples<R>`](src/formats/nt.rs:57) parses [W3C N-Triples](https://www.w3.org/TR/n-triples/)
214+
[`NTriples<R>`](src/formats/nt.rs:51) parses [W3C N-Triples](https://www.w3.org/TR/n-triples/)
215215
RDF files using `oxttl`. Each triple `(subject, predicate, object)` becomes an
216216
[`Edge`](src/graph/mod.rs:154) where:
217217

218218
- `source` — subject IRI or blank-node ID (`_:label`).
219219
- `target` — object IRI or blank-node ID; triples whose object is an RDF
220220
literal yield `Err(FormatError::LiteralAsNode)` (callers may filter these out).
221-
- `label` — predicate IRI, transformed by [`LabelExtraction`](src/formats/nt.rs:36):
221+
- `label`full predicate IRI string.
222222

223-
| Variant | Behaviour |
224-
|---|---|
225-
| `LocalName` (default) | Fragment (`#name`) or last path segment of the predicate IRI |
226-
| `FullIri` | Full predicate IRI string |
227-
228-
Constructors:
223+
Constructor:
229224

230-
- [`NTriples::new(reader)`](src/formats/nt.rs:72) — uses `LabelExtraction::LocalName`.
231-
- [`NTriples::with_label_extraction(reader, strategy)`](src/formats/nt.rs:76) — explicit strategy.
225+
- [`NTriples::new(reader)`](src/formats/nt.rs:56) — parses the stream; each predicate becomes the edge label verbatim.
232226

233227
### FFI layer
234228

src/formats/csv.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,25 @@ mod tests {
230230
assert!(edges.is_empty());
231231
}
232232

233+
#[test]
234+
fn test_non_ascii() {
235+
let csv = "source,target,label\n\
236+
人甲,人乙,认识\n\
237+
Алиса,Боб,знает\n";
238+
let edges: Vec<_> = make_csv(csv).collect();
239+
assert_eq!(edges.len(), 2);
240+
241+
let e0 = edges[0].as_ref().unwrap();
242+
assert_eq!(e0.source, "人甲");
243+
assert_eq!(e0.target, "人乙");
244+
assert_eq!(e0.label, "认识");
245+
246+
let e1 = edges[1].as_ref().unwrap();
247+
assert_eq!(e1.source, "Алиса");
248+
assert_eq!(e1.target, "Боб");
249+
assert_eq!(e1.label, "знает");
250+
}
251+
233252
#[test]
234253
fn test_graph_source_impl() {
235254
use crate::graph::{GraphBuilder, GraphDecomposition, InMemoryBuilder};

src/formats/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,6 @@ pub enum FormatError {
4747

4848
/// An RDF literal appeared as a subject or object where a node IRI or
4949
/// blank node was expected.
50-
#[error("RDF literal cannot be used as a graph node (triple skipped)")]
50+
#[error("RDF literal cannot be used as a graph node")]
5151
LiteralAsNode,
5252
}

src/formats/nt.rs

Lines changed: 26 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,6 @@ use oxttl::ntriples::ReaderNTriplesParser;
3333
use crate::formats::FormatError;
3434
use crate::graph::Edge;
3535

36-
/// Controls how predicate IRIs are converted to edge label strings.
37-
#[derive(Debug, Clone, Default)]
38-
pub enum LabelExtraction {
39-
/// Use only the local name: the fragment (`#name`) or last path segment.
40-
/// For example, `http://example.org/ns/knows` → `"knows"`.
41-
/// This is the default.
42-
#[default]
43-
LocalName,
44-
/// Use the full IRI string as the label.
45-
/// For example, `http://example.org/ns/knows` → `"http://example.org/ns/knows"`.
46-
FullIri,
47-
}
48-
4936
/// An iterator that reads N-Triples and yields `Result<Edge, FormatError>`.
5037
///
5138
/// # Example
@@ -63,18 +50,12 @@ pub enum LabelExtraction {
6350
/// ```
6451
pub struct NTriples<R: Read> {
6552
inner: ReaderNTriplesParser<R>,
66-
label_extraction: LabelExtraction,
6753
}
6854

6955
impl<R: Read> NTriples<R> {
7056
pub fn new(reader: R) -> Self {
71-
Self::with_label_extraction(reader, LabelExtraction::LocalName)
72-
}
73-
74-
pub fn with_label_extraction(reader: R, label_extraction: LabelExtraction) -> Self {
7557
Self {
7658
inner: NTriplesParser::new().for_reader(reader),
77-
label_extraction,
7859
}
7960
}
8061

@@ -92,22 +73,6 @@ impl<R: Read> NTriples<R> {
9273
Term::Literal(_) => Err(FormatError::LiteralAsNode),
9374
}
9475
}
95-
96-
fn extract_label(iri: &str, strategy: &LabelExtraction) -> String {
97-
match strategy {
98-
LabelExtraction::FullIri => iri.to_owned(),
99-
LabelExtraction::LocalName => {
100-
// Fragment takes priority, then last path segment.
101-
if let Some(pos) = iri.rfind('#') {
102-
iri[pos + 1..].to_owned()
103-
} else if let Some(pos) = iri.rfind('/') {
104-
iri[pos + 1..].to_owned()
105-
} else {
106-
iri.to_owned()
107-
}
108-
}
109-
}
110-
}
11176
}
11277

11378
impl<R: Read> Iterator for NTriples<R> {
@@ -120,7 +85,7 @@ impl<R: Read> Iterator for NTriples<R> {
12085
};
12186

12287
let source = Self::subject_to_node_id(triple.subject.into());
123-
let label = Self::extract_label(triple.predicate.as_str(), &self.label_extraction);
88+
let label = triple.predicate.as_str().to_owned();
12489
let target = match Self::object_to_node_id(triple.object) {
12590
Ok(t) => t,
12691
Err(e) => return Some(Err(e)),
@@ -152,23 +117,12 @@ mod tests {
152117
let e0 = edges[0].as_ref().unwrap();
153118
assert_eq!(e0.source, "http://example.org/Alice");
154119
assert_eq!(e0.target, "http://example.org/Bob");
155-
assert_eq!(e0.label, "knows");
120+
assert_eq!(e0.label, "http://example.org/knows");
156121

157122
let e1 = edges[1].as_ref().unwrap();
158123
assert_eq!(e1.source, "http://example.org/Bob");
159124
assert_eq!(e1.target, "http://example.org/Charlie");
160-
assert_eq!(e1.label, "likes");
161-
}
162-
163-
#[test]
164-
fn test_full_iri_label_extraction() {
165-
let nt =
166-
"<http://example.org/Alice> <http://example.org/knows> <http://example.org/Bob> .\n";
167-
let edges: Vec<_> =
168-
NTriples::with_label_extraction(nt.as_bytes(), LabelExtraction::FullIri).collect();
169-
170-
assert_eq!(edges.len(), 1);
171-
assert_eq!(edges[0].as_ref().unwrap().label, "http://example.org/knows");
125+
assert_eq!(e1.label, "http://example.org/likes");
172126
}
173127

174128
#[test]
@@ -210,11 +164,32 @@ mod tests {
210164
}
211165

212166
#[test]
213-
fn test_fragment_iri_local_name() {
167+
fn test_predicate_with_fragment_is_full_iri_string() {
214168
let nt =
215169
"<http://example.org/Alice> <http://example.org/ns#knows> <http://example.org/Bob> .\n";
216170
let edges = parse(nt);
217-
assert_eq!(edges[0].as_ref().unwrap().label, "knows");
171+
assert_eq!(
172+
edges[0].as_ref().unwrap().label,
173+
"http://example.org/ns#knows"
174+
);
175+
}
176+
177+
#[test]
178+
fn test_non_ascii_in_iris() {
179+
let nt = "<http://example.org/人甲> <http://example.org/关系/认识> <http://example.org/人乙> .\n\
180+
<http://example.org/Алиса> <http://example.org/знает> <http://example.org/Боб> .\n";
181+
let edges = parse(nt);
182+
assert_eq!(edges.len(), 2);
183+
184+
let e0 = edges[0].as_ref().unwrap();
185+
assert_eq!(e0.source, "http://example.org/人甲");
186+
assert_eq!(e0.target, "http://example.org/人乙");
187+
assert_eq!(e0.label, "http://example.org/关系/认识");
188+
189+
let e1 = edges[1].as_ref().unwrap();
190+
assert_eq!(e1.source, "http://example.org/Алиса");
191+
assert_eq!(e1.target, "http://example.org/Боб");
192+
assert_eq!(e1.label, "http://example.org/знает");
218193
}
219194

220195
#[test]

src/graph/inmemory.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ mod tests {
303303
.expect("build should succeed");
304304

305305
assert_eq!(graph.num_nodes(), 3);
306-
assert!(graph.get_graph("knows").is_ok());
307-
assert!(graph.get_graph("likes").is_ok());
306+
assert!(graph.get_graph("http://example.org/knows").is_ok());
307+
assert!(graph.get_graph("http://example.org/likes").is_ok());
308308
}
309309
}

0 commit comments

Comments
 (0)