Skip to content

Commit d2e73fb

Browse files
committed
#529 WIP propvalsub index
1 parent 3b67266 commit d2e73fb

15 files changed

+455
-332
lines changed

cli/src/print.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ pub fn print_resource(
6464
Format::Json => resource.to_json(&context.store)?,
6565
Format::JsonLd => resource.to_json_ld(&context.store)?,
6666
Format::JsonAd => resource.to_json_ad()?,
67-
Format::NTriples => serialize::atoms_to_ntriples(resource.to_atoms()?, &context.store)?,
67+
Format::NTriples => serialize::atoms_to_ntriples(resource.to_atoms(), &context.store)?,
6868
Format::Pretty => pretty_print_resource(resource, &context.store)?,
6969
};
7070
println!("{}", out);

lib/src/atoms.rs

+34-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1-
//! The smallest units of data, consiting of a Subject, a Property and a Value
1+
//! The smallest units of data, consisting of a Subject, a Property and a Value
22
3-
use crate::{errors::AtomicResult, values::Value};
3+
use crate::{
4+
errors::AtomicResult,
5+
values::{ReferenceString, Value},
6+
};
47

5-
/// The Atom is the (non-validated) string representation of a piece of data.
6-
/// It's RichAtom sibling provides some extra methods.
8+
/// The Atom is the smallest meaningful piece of data.
9+
/// It describes how one value relates to a subject.
10+
/// A [Resource] can be converted into a bunch of Atoms.
711
#[derive(Clone, Debug)]
812
pub struct Atom {
913
/// The URL where the resource is located
@@ -27,6 +31,32 @@ impl Atom {
2731
let base_path = format!("{} {}", self.subject, self.property);
2832
self.value.to_subjects(Some(base_path))
2933
}
34+
35+
/// Converts one Atom to a series of stringified values that can be indexed.
36+
pub fn to_indexable_atoms(&self) -> Vec<IndexAtom> {
37+
let index_atoms = match &self.value.to_reference_index_strings() {
38+
Some(v) => v,
39+
None => return vec![],
40+
}
41+
.iter()
42+
.map(|v| IndexAtom {
43+
value: v.into(),
44+
subject: self.subject.clone(),
45+
property: self.property.clone(),
46+
})
47+
.collect();
48+
index_atoms
49+
}
50+
}
51+
52+
/// Differs from a regular [Atom], since the value here is always a string,
53+
/// and in the case of ResourceArrays, only a _single_ subject is used for each atom.
54+
/// One IndexAtom for every member of the ResourceArray is created.
55+
#[derive(Debug, Clone, PartialEq, Eq)]
56+
pub struct IndexAtom {
57+
pub subject: String,
58+
pub property: String,
59+
pub value: ReferenceString,
3060
}
3161

3262
impl std::fmt::Display for Atom {

lib/src/commit.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -344,18 +344,22 @@ impl Commit {
344344
// Remove all atoms from index if destroy
345345
if let Some(destroy) = self.destroy {
346346
if destroy {
347-
for atom in resource.to_atoms()?.into_iter() {
347+
for atom in resource.to_atoms().into_iter() {
348348
remove_atoms.push(atom);
349349
}
350350
}
351351
}
352352

353353
if update_index {
354354
for atom in remove_atoms {
355-
store.remove_atom_from_index(&atom, &resource_unedited)?;
355+
store
356+
.remove_atom_from_index(&atom, &resource_unedited)
357+
.map_err(|e| format!("Error removing atom from index: {e} Atom: {e}"))?
356358
}
357359
for atom in add_atoms {
358-
store.add_atom_to_index(&atom, &resource)?;
360+
store
361+
.add_atom_to_index(&atom, &resource)
362+
.map_err(|e| format!("Error adding atom to index: {e} Atom: {e}"))?;
359363
}
360364
}
361365
Ok(resource)

lib/src/db.rs

+65-127
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,24 @@
11
//! Persistent, ACID compliant, threadsafe to-disk store.
22
//! Powered by Sled - an embedded database.
33
4+
mod migrations;
5+
mod prop_val_sub_index;
6+
mod query_index;
7+
mod reference_index;
8+
#[cfg(test)]
9+
pub mod test;
10+
411
use std::{
512
collections::{HashMap, HashSet},
613
sync::{Arc, Mutex},
714
};
815

9-
use tracing::{instrument, trace};
16+
use tracing::{info, instrument, trace};
1017

1118
use crate::{
19+
atoms::IndexAtom,
1220
commit::CommitResponse,
21+
db::reference_index::key_to_atom,
1322
endpoints::{default_endpoints, Endpoint},
1423
errors::{AtomicError, AtomicResult},
1524
resources::PropVals,
@@ -19,20 +28,20 @@ use crate::{
1928

2029
use self::{
2130
migrations::migrate_maybe,
31+
prop_val_sub_index::{
32+
add_atom_to_prop_val_sub_index, find_in_prop_val_sub_index,
33+
remove_atom_from_prop_val_sub_index,
34+
},
2235
query_index::{
23-
atom_to_indexable_atoms, check_if_atom_matches_watched_query_filters, query_indexed,
24-
update_indexed_member, watch_collection, IndexAtom, QueryFilter, END_CHAR,
36+
check_if_atom_matches_watched_query_filters, query_indexed, update_indexed_member,
37+
watch_collection, IndexIterator, QueryFilter,
2538
},
39+
reference_index::{add_atom_to_reference_index, remove_atom_from_reference_index},
2640
};
2741

2842
// A function called by the Store when a Commit is accepted
2943
type HandleCommit = Box<dyn Fn(&CommitResponse) + Send + Sync>;
3044

31-
mod migrations;
32-
mod query_index;
33-
#[cfg(test)]
34-
pub mod test;
35-
3645
/// Inside the reference_index, each value is mapped to this type.
3746
/// The String on the left represents a Property URL, and the second one is the set of subjects.
3847
pub type PropSubjectMap = HashMap<String, HashSet<String>>;
@@ -54,13 +63,15 @@ pub struct Db {
5463
default_agent: Arc<Mutex<Option<crate::agents::Agent>>>,
5564
/// Stores all resources. The Key is the Subject as a `string.as_bytes()`, the value a [PropVals]. Propvals must be serialized using [bincode].
5665
resources: sled::Tree,
57-
/// Index for all AtomicURLs, indexed by their Value. Used to speed up TPF queries. See [key_for_reference_index]
66+
/// Index of all Atoms, sorted by {Value}-{Property}-{Subject}.
67+
/// See [reference_index]
5868
reference_index: sled::Tree,
69+
/// Index sorted by property + value.
70+
/// Used for TPF queries where the property is known.
71+
prop_val_sub_index: sled::Tree,
5972
/// Stores the members of Collections, easily sortable.
60-
/// See [collections_index]
61-
members_index: sled::Tree,
62-
/// A list of all the Collections currently being used. Is used to update `members_index`.
63-
/// See [collections_index]
73+
query_index: sled::Tree,
74+
/// A list of all the Collections currently being used. Is used to update `query_index`.
6475
watched_queries: sled::Tree,
6576
/// The address where the db will be hosted, e.g. http://localhost/
6677
server_url: String,
@@ -78,14 +89,16 @@ impl Db {
7889
let db = sled::open(path).map_err(|e|format!("Failed opening DB at this location: {:?} . Is another instance of Atomic Server running? {}", path, e))?;
7990
let resources = db.open_tree("resources_v1").map_err(|e|format!("Failed building resources. Your DB might be corrupt. Go back to a previous version and export your data. {}", e))?;
8091
let reference_index = db.open_tree("reference_index")?;
81-
let members_index = db.open_tree("members_index")?;
92+
let query_index = db.open_tree("members_index")?;
93+
let prop_val_sub_index = db.open_tree("prop_val_sub_index")?;
8294
let watched_queries = db.open_tree("watched_queries")?;
8395
let store = Db {
8496
db,
8597
default_agent: Arc::new(Mutex::new(None)),
8698
resources,
8799
reference_index,
88-
members_index,
100+
query_index,
101+
prop_val_sub_index,
89102
server_url,
90103
watched_queries,
91104
endpoints: default_endpoints(),
@@ -112,6 +125,22 @@ impl Db {
112125
Ok(store)
113126
}
114127

128+
#[instrument(skip(self))]
129+
fn all_index_atoms(&self, include_external: bool) -> IndexIterator {
130+
Box::new(
131+
self.all_resources(include_external)
132+
.flat_map(|resource| {
133+
let index_atoms: Vec<IndexAtom> = resource
134+
.to_atoms()
135+
.iter()
136+
.flat_map(|atom| atom.to_indexable_atoms())
137+
.collect();
138+
index_atoms
139+
})
140+
.map(Ok),
141+
)
142+
}
143+
115144
/// Internal method for fetching Resource data.
116145
#[instrument(skip(self))]
117146
fn set_propvals(&self, subject: &str, propvals: &PropVals) -> AtomicResult<()> {
@@ -152,15 +181,11 @@ impl Db {
152181
}
153182
}
154183

155-
/// Returns true if the index has been built.
156-
pub fn has_index(&self) -> bool {
157-
!self.reference_index.is_empty()
158-
}
159-
160184
/// Removes all values from the indexes.
161185
pub fn clear_index(&self) -> AtomicResult<()> {
162186
self.reference_index.clear()?;
163-
self.members_index.clear()?;
187+
self.prop_val_sub_index.clear()?;
188+
self.query_index.clear()?;
164189
self.watched_queries.clear()?;
165190
Ok(())
166191
}
@@ -216,9 +241,9 @@ impl Storelike for Db {
216241

217242
#[instrument(skip(self))]
218243
fn add_atom_to_index(&self, atom: &Atom, resource: &Resource) -> AtomicResult<()> {
219-
for index_atom in atom_to_indexable_atoms(atom)? {
220-
// It's OK if this overwrites a value
244+
for index_atom in atom.to_indexable_atoms() {
221245
add_atom_to_reference_index(&index_atom, self)?;
246+
add_atom_to_prop_val_sub_index(&index_atom, self)?;
222247
// Also update the query index to keep collections performant
223248
check_if_atom_matches_watched_query_filters(self, &index_atom, atom, false, resource)
224249
.map_err(|e| {
@@ -261,7 +286,7 @@ impl Storelike for Db {
261286
})?;
262287
}
263288
}
264-
for a in resource.to_atoms()? {
289+
for a in resource.to_atoms() {
265290
self.add_atom_to_index(&a, resource)
266291
.map_err(|e| format!("Failed to add atom to index {}. {}", a, e))?;
267292
}
@@ -271,8 +296,9 @@ impl Storelike for Db {
271296

272297
#[instrument(skip(self))]
273298
fn remove_atom_from_index(&self, atom: &Atom, resource: &Resource) -> AtomicResult<()> {
274-
for index_atom in atom_to_indexable_atoms(atom)? {
275-
delete_atom_from_reference_index(&index_atom, self)?;
299+
for index_atom in atom.to_indexable_atoms() {
300+
remove_atom_from_reference_index(&index_atom, self)?;
301+
remove_atom_from_prop_val_sub_index(&index_atom, self)?;
276302

277303
check_if_atom_matches_watched_query_filters(self, &index_atom, atom, true, resource)
278304
.map_err(|e| format!("Checking atom went wrong: {}", e))?;
@@ -455,75 +481,21 @@ impl Storelike for Db {
455481
}
456482
}
457483

458-
// No cache hit, perform the query
459-
let mut atoms = self.tpf(
460-
None,
461-
q.property.as_deref(),
462-
q.value.as_ref(),
463-
// We filter later on, not here
464-
true,
465-
)?;
466-
let count = atoms.len();
467-
468-
let mut subjects = Vec::new();
469-
let mut resources = Vec::new();
470-
for atom in atoms.iter() {
471-
// These nested resources are not fully calculated - they will be presented as -is
472-
subjects.push(atom.subject.clone());
473-
// We need the Resources if we want to sort by a non-subject value
474-
if q.include_nested || q.sort_by.is_some() {
475-
// We skip checking for Agent, because we don't return these results directly anyway
476-
match self.get_resource_extended(&atom.subject, true, None) {
477-
Ok(resource) => {
478-
resources.push(resource);
479-
}
480-
Err(e) => match &e.error_type {
481-
crate::AtomicErrorType::NotFoundError => {}
482-
crate::AtomicErrorType::UnauthorizedError => {}
483-
_err => {
484-
return Err(
485-
format!("Error when getting resource in collection: {}", e).into()
486-
)
487-
}
488-
},
489-
}
490-
}
491-
}
492-
493-
if atoms.is_empty() {
494-
return Ok(QueryResult {
495-
subjects: vec![],
496-
resources: vec![],
497-
count,
498-
});
499-
}
500-
501-
// If there is a sort value, we need to change the atoms to contain that sorted value, instead of the one matched in the TPF query
502-
if let Some(sort_prop) = &q.sort_by {
503-
// We don't use the existing array, we clear it.
504-
atoms = Vec::new();
505-
for r in &resources {
506-
// Users _can_ sort by optional properties! So we need a fallback defauil
507-
let fallback_default = crate::Value::String(END_CHAR.into());
508-
let sorted_val = r.get(sort_prop).unwrap_or(&fallback_default);
509-
let atom = Atom {
510-
subject: r.get_subject().to_string(),
511-
property: sort_prop.to_string(),
512-
value: sorted_val.to_owned(),
513-
};
514-
atoms.push(atom)
515-
}
516-
// Now we sort by the value that the user wants to sort by
517-
atoms.sort_by(|a, b| a.value.to_string().cmp(&b.value.to_string()));
518-
}
519-
520484
let q_filter: QueryFilter = q.into();
521485

522486
// Maybe make this optional?
523487
watch_collection(self, &q_filter)?;
524488

525-
// Add the atoms to the query_index
526-
for atom in atoms {
489+
info!(filter = ?q_filter, "Building query index");
490+
491+
let atoms: IndexIterator = match (&q.property, q.value.as_ref()) {
492+
(Some(prop), val) => find_in_prop_val_sub_index(self, prop, val),
493+
(None, None) => self.all_index_atoms(q.include_external),
494+
(None, Some(_)) => todo!(),
495+
};
496+
497+
for a in atoms {
498+
let atom = a?;
527499
update_indexed_member(self, &q_filter, &atom.subject, &atom.value, false)?;
528500
}
529501

@@ -663,7 +635,7 @@ impl Storelike for Db {
663635
find_in_resource(&resource);
664636
Ok(vec)
665637
} else {
666-
resource.to_atoms()
638+
Ok(resource.to_atoms())
667639
}
668640
}
669641
Err(_) => Ok(vec),
@@ -697,42 +669,8 @@ impl Storelike for Db {
697669
}
698670
}
699671

700-
#[instrument(skip(store))]
701-
fn add_atom_to_reference_index(index_atom: &IndexAtom, store: &Db) -> AtomicResult<()> {
702-
let _existing = store
703-
.reference_index
704-
.insert(key_for_reference_index(index_atom).as_bytes(), b"")?;
705-
Ok(())
706-
}
707-
708-
#[instrument(skip(store))]
709-
fn delete_atom_from_reference_index(index_atom: &IndexAtom, store: &Db) -> AtomicResult<()> {
710-
store
711-
.reference_index
712-
.remove(key_for_reference_index(index_atom).as_bytes())?;
713-
Ok(())
714-
}
715-
716-
/// Constructs the Key for the index_value cache.
717-
fn key_for_reference_index(atom: &IndexAtom) -> String {
718-
format!("{}\n{}\n{}", atom.value, atom.property, atom.subject)
719-
}
720-
721-
/// Parses a Value index key string, converts it into an atom. Note that the Value of the atom will allways be a single AtomicURL here.
722-
fn key_to_atom(key: &str) -> AtomicResult<Atom> {
723-
let mut parts = key.split('\n');
724-
let val = parts.next().ok_or("Invalid key for value index")?;
725-
let prop = parts.next().ok_or("Invalid key for value index")?;
726-
let subj = parts.next().ok_or("Invalid key for value index")?;
727-
Ok(Atom::new(
728-
subj.into(),
729-
prop.into(),
730-
Value::AtomicUrl(val.into()),
731-
))
732-
}
733-
734672
fn corrupt_db_message(subject: &str) -> String {
735-
format!("Could not deserialize item {} from database. DB is possibly corrupt, could be due to an update or a lack of migrations. Restore to a previous version, export / serialize your data and import your data again.", subject)
673+
format!("Could not deserialize item {} from database. DB is possibly corrupt, could be due to an update or a lack of migrations. Restore to a previous version, export your data and import your data again.", subject)
736674
}
737675

738-
const DB_CORRUPT_MSG: &str = "Could not deserialize item from database. DB is possibly corrupt, could be due to an update or a lack of migrations. Restore to a previous version, export / serialize your data and import your data again.";
676+
const DB_CORRUPT_MSG: &str = "Could not deserialize item from database. DB is possibly corrupt, could be due to an update or a lack of migrations. Restore to a previous version, export your data and import your data again.";

0 commit comments

Comments
 (0)