Skip to content

Commit 76c3b3b

Browse files
committed
Remove TPF #529
1 parent eb026be commit 76c3b3b

14 files changed

+222
-304
lines changed

CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Changes to JS assets are not included here, but in [`atomic-data-browser`'s CHAN
66

77
## UNRELEASED
88

9-
- Improve query performance, refactor indexes #529
9+
- Improve query performance, refactor indexes. The `.tpf` API is deprecated in favor of the more powerful `.query`. #529
1010
- Improved error handling for HTTPS initialization #530
1111

1212
## [v0.34.0] - 2022-10-31

lib/src/db/query_index.rs

+2-5
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::{
55
atoms::IndexAtom,
66
errors::AtomicResult,
77
storelike::{Query, QueryResult},
8-
values::{query_value_compare, SortableValue},
8+
values::SortableValue,
99
Atom, Db, Resource, Storelike, Value,
1010
};
1111
use serde::{Deserialize, Serialize};
@@ -112,7 +112,6 @@ pub fn query_indexed(store: &Db, q: &Query) -> AtomicResult<QueryResult> {
112112
if in_selection {
113113
let (k, _v) = kv.map_err(|_e| "Unable to parse query_cached")?;
114114
let (_q_filter, _val, subject) = parse_collection_members_key(&k)?;
115-
println!("Found subject: {} with val :{_val}", subject);
116115

117116
// If no external resources should be included, skip this one if it's an external resource
118117
if !q.include_external && !subject.starts_with(&self_url) {
@@ -179,7 +178,7 @@ fn check_resource_query_filter_property(
179178
}
180179
} else if let Some(filter_val) = &q_filter.value {
181180
for (prop, val) in resource.get_propvals() {
182-
if query_value_compare(val, filter_val) {
181+
if val.contains_value(filter_val) {
183182
return Some(prop.to_string());
184183
}
185184
}
@@ -350,7 +349,6 @@ pub fn create_query_index_key(
350349
};
351350
value_bytes.push(SEPARATION_BIT);
352351

353-
println!("Create key subject {:?} value {:?}", subject, value);
354352
let subject_bytes = if let Some(sub) = subject {
355353
sub.as_bytes().to_vec()
356354
} else {
@@ -383,7 +381,6 @@ pub fn parse_collection_members_key(bytes: &[u8]) -> AtomicResult<(QueryFilter,
383381
} else {
384382
return Err("Can't parse subject in members_key".into());
385383
};
386-
println!("Parsed key: {:?} {:?} {:?}", q_filter, value, subject);
387384
Ok((q_filter, value, subject))
388385
}
389386

lib/src/hierarchy.rs

+3-11
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
55
use core::fmt;
66

7-
use crate::{errors::AtomicResult, urls, AtomicError, Resource, Storelike, Value};
7+
use crate::{errors::AtomicResult, storelike::Query, urls, AtomicError, Resource, Storelike};
88

99
#[derive(Debug)]
1010
pub enum Right {
@@ -32,16 +32,8 @@ impl fmt::Display for Right {
3232

3333
/// Looks for children relations, adds to the resource. Performs a TPF query, might be expensive.
3434
pub fn add_children(store: &impl Storelike, resource: &mut Resource) -> AtomicResult<Resource> {
35-
let atoms = store.tpf(
36-
None,
37-
Some(urls::PARENT),
38-
Some(&Value::AtomicUrl(resource.get_subject().into())),
39-
false,
40-
)?;
41-
let mut children: Vec<String> = Vec::new();
42-
for atom in atoms {
43-
children.push(atom.subject)
44-
}
35+
let results = store.query(&Query::new_prop_val(urls::PARENT, resource.get_subject()))?;
36+
let mut children = results.subjects;
4537
children.sort();
4638
resource.set_propval(urls::CHILDREN.into(), children.into(), store)?;
4739
Ok(resource.to_owned())

lib/src/plugins/bookmark.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ impl Parser {
121121
pub fn serialize(node: NodeRef) -> Result<String, FromUtf8Error> {
122122
let mut stream = Vec::new();
123123
if let Err(e) = node.serialize(&mut stream) {
124-
println!("{}", e);
124+
tracing::error!("Serialize error: {}", e);
125125
}
126126

127127
String::from_utf8(stream)

lib/src/plugins/versioning.rs

+11-32
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
use tracing::warn;
22

33
use crate::{
4-
collections::CollectionBuilder, endpoints::Endpoint, errors::AtomicResult, urls, AtomicError,
5-
Commit, Resource, Storelike, Value,
4+
collections::CollectionBuilder, endpoints::Endpoint, errors::AtomicResult, storelike::Query,
5+
urls, AtomicError, Commit, Resource, Storelike,
66
};
77

88
pub fn version_endpoint() -> Endpoint {
@@ -91,37 +91,16 @@ fn handle_all_versions_request(
9191
/// Searches the local store for all commits with this subject, returns sorted from old to new.
9292
#[tracing::instrument(skip(store))]
9393
fn get_commits_for_resource(subject: &str, store: &impl Storelike) -> AtomicResult<Vec<Commit>> {
94-
let commit_atoms = store.tpf(
95-
None,
96-
Some(urls::SUBJECT),
97-
Some(&Value::AtomicUrl(subject.into())),
98-
false,
99-
)?;
100-
let mut commits = Vec::new();
101-
for atom in commit_atoms {
102-
let resource = if let Ok(r) = store.get_resource(&atom.subject) {
103-
r
104-
} else {
105-
// https://github.com/atomicdata-dev/atomic-data-rust/issues/488
106-
warn!("Could not find commit for {} , skipping", atom.subject);
107-
continue;
108-
};
109-
let mut is_commit = false;
110-
// If users use the `subject` field for a non-commit, we prevent using it as a commit here.
111-
for c in resource.get(urls::IS_A)?.to_subjects(None)?.iter() {
112-
if c == urls::COMMIT {
113-
is_commit = true
114-
}
115-
}
116-
if is_commit {
117-
let commit = crate::Commit::from_resource(resource)?;
118-
commits.push(commit)
119-
}
120-
}
121-
// Sort all commits by date
122-
commits.sort_by(|a, b| a.created_at.cmp(&b.created_at));
94+
let mut q = Query::new_prop_val(urls::SUBJECT, subject);
95+
q.sort_by = Some(urls::CREATED_AT.into());
96+
let result = store.query(&q)?;
97+
let filtered: Vec<Commit> = result
98+
.resources
99+
.iter()
100+
.filter_map(|r| crate::Commit::from_resource(r.clone()).ok())
101+
.collect();
123102

124-
Ok(commits)
103+
Ok(filtered)
125104
}
126105

127106
#[tracing::instrument(skip(store))]

lib/src/populate.rs

+6-10
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use crate::{
88
errors::AtomicResult,
99
parse::ParseOpts,
1010
schema::{Class, Property},
11+
storelike::Query,
1112
urls, Resource, Storelike, Value,
1213
};
1314

@@ -212,18 +213,13 @@ pub fn populate_default_store(store: &impl Storelike) -> AtomicResult<()> {
212213
/// Generates some nice collections for classes, such as `/agent` and `/collection`.
213214
/// Requires a `self_url` to be set in the store.
214215
pub fn populate_collections(store: &impl Storelike) -> AtomicResult<()> {
215-
let classes_atoms = store.tpf(
216-
None,
217-
Some("https://atomicdata.dev/properties/isA"),
218-
Some(&Value::AtomicUrl(
219-
"https://atomicdata.dev/classes/Class".into(),
220-
)),
221-
true,
222-
)?;
216+
let mut query = Query::new_class(urls::CLASS);
217+
query.include_external = true;
218+
let result = store.query(&query)?;
223219

224-
for atom in classes_atoms {
220+
for subject in result.subjects {
225221
let mut collection =
226-
crate::collections::create_collection_resource_for_class(store, &atom.subject)?;
222+
crate::collections::create_collection_resource_for_class(store, &subject)?;
227223
collection.save_locally(store)?;
228224
}
229225

lib/src/store.rs

+149
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
//! In-memory store of Atomic data.
22
//! This provides many methods for finding, changing, serializing and parsing Atomic Data.
33
4+
use crate::storelike::QueryResult;
5+
use crate::Value;
46
use crate::{atoms::Atom, storelike::Storelike};
57
use crate::{errors::AtomicResult, Resource};
68
use std::{collections::HashMap, sync::Arc, sync::Mutex};
@@ -24,6 +26,97 @@ impl Store {
2426
crate::populate::populate_base_models(&store)?;
2527
Ok(store)
2628
}
29+
30+
/// Triple Pattern Fragments interface.
31+
/// Use this for most queries, e.g. finding all items with some property / value combination.
32+
/// Returns an empty array if nothing is found.
33+
///
34+
/// # Example
35+
///
36+
/// For example, if I want to view all Resources that are instances of the class "Property", I'd do:
37+
///
38+
/// ```
39+
/// use atomic_lib::Storelike;
40+
/// let mut store = atomic_lib::Store::init().unwrap();
41+
/// store.populate();
42+
/// let atoms = store.tpf(
43+
/// None,
44+
/// Some("https://atomicdata.dev/properties/isA"),
45+
/// Some(&atomic_lib::Value::AtomicUrl("https://atomicdata.dev/classes/Class".into())),
46+
/// true
47+
/// ).unwrap();
48+
/// assert!(atoms.len() > 11)
49+
/// ```
50+
// Very costly, slow implementation.
51+
// Does not assume any indexing.
52+
fn tpf(
53+
&self,
54+
q_subject: Option<&str>,
55+
q_property: Option<&str>,
56+
q_value: Option<&Value>,
57+
// Whether resources from outside the store should be searched through
58+
include_external: bool,
59+
) -> AtomicResult<Vec<Atom>> {
60+
let mut vec: Vec<Atom> = Vec::new();
61+
62+
let hassub = q_subject.is_some();
63+
let hasprop = q_property.is_some();
64+
let hasval = q_value.is_some();
65+
66+
// Simply return all the atoms
67+
if !hassub && !hasprop && !hasval {
68+
for resource in self.all_resources(include_external) {
69+
for (property, value) in resource.get_propvals() {
70+
vec.push(Atom::new(
71+
resource.get_subject().clone(),
72+
property.clone(),
73+
value.clone(),
74+
))
75+
}
76+
}
77+
return Ok(vec);
78+
}
79+
80+
// Find atoms matching the TPF query in a single resource
81+
let mut find_in_resource = |resource: &Resource| {
82+
let subj = resource.get_subject();
83+
for (prop, val) in resource.get_propvals().iter() {
84+
if hasprop && q_property.as_ref().unwrap() == prop {
85+
if hasval {
86+
if val.contains_value(q_value.unwrap()) {
87+
vec.push(Atom::new(subj.into(), prop.into(), val.clone()))
88+
}
89+
break;
90+
} else {
91+
vec.push(Atom::new(subj.into(), prop.into(), val.clone()))
92+
}
93+
break;
94+
} else if hasval && !hasprop && val.contains_value(q_value.unwrap()) {
95+
vec.push(Atom::new(subj.into(), prop.into(), val.clone()))
96+
}
97+
}
98+
};
99+
100+
match q_subject {
101+
Some(sub) => match self.get_resource(sub) {
102+
Ok(resource) => {
103+
if hasprop | hasval {
104+
find_in_resource(&resource);
105+
Ok(vec)
106+
} else {
107+
Ok(resource.to_atoms())
108+
}
109+
}
110+
Err(_) => Ok(vec),
111+
},
112+
None => {
113+
for resource in self.all_resources(include_external) {
114+
find_in_resource(&resource);
115+
}
116+
Ok(vec)
117+
}
118+
}
119+
}
27120
}
28121

29122
impl Storelike for Store {
@@ -126,6 +219,62 @@ impl Storelike for Store {
126219
fn set_default_agent(&self, agent: crate::agents::Agent) {
127220
self.default_agent.lock().unwrap().replace(agent);
128221
}
222+
223+
fn query(&self, q: &crate::storelike::Query) -> AtomicResult<crate::storelike::QueryResult> {
224+
let atoms = self.tpf(
225+
None,
226+
q.property.as_deref(),
227+
q.value.as_ref(),
228+
q.include_external,
229+
)?;
230+
231+
// Remove duplicate subjects
232+
let mut subjects_deduplicated: Vec<String> = atoms
233+
.iter()
234+
.map(|atom| atom.subject.clone())
235+
.collect::<std::collections::HashSet<String>>()
236+
.into_iter()
237+
.collect();
238+
239+
// Sort by subject, better than no sorting
240+
subjects_deduplicated.sort();
241+
242+
// WARNING: Entering expensive loop!
243+
// This is needed for sorting, authorization and including nested resources.
244+
// It could be skipped if there is no authorization and sorting requirement.
245+
let mut resources = Vec::new();
246+
for subject in subjects_deduplicated.iter() {
247+
// These nested resources are not fully calculated - they will be presented as -is
248+
match self.get_resource_extended(subject, true, q.for_agent.as_deref()) {
249+
Ok(resource) => {
250+
resources.push(resource);
251+
}
252+
Err(e) => match &e.error_type {
253+
crate::AtomicErrorType::NotFoundError => {}
254+
crate::AtomicErrorType::UnauthorizedError => {}
255+
_other => {
256+
return Err(
257+
format!("Error when getting resource in collection: {}", e).into()
258+
)
259+
}
260+
},
261+
}
262+
}
263+
264+
if let Some(sort) = &q.sort_by {
265+
resources = crate::collections::sort_resources(resources, sort, q.sort_desc);
266+
}
267+
let mut subjects = Vec::new();
268+
for r in resources.iter() {
269+
subjects.push(r.get_subject().clone())
270+
}
271+
272+
Ok(QueryResult {
273+
count: atoms.len(),
274+
subjects,
275+
resources,
276+
})
277+
}
129278
}
130279

131280
#[cfg(test)]

0 commit comments

Comments
 (0)