Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,5 @@ e2e/playwright-report
e2e/test-results
.aider*
/tools/server/.lwjgl/
/tools/server/.lwjgl/
.m2_repo/
.serena/
20 changes: 20 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,24 @@ It is illegal to `-q` when running tests!

---

## PIOSEE Decision Model (Adopted)

Use PIOSEE on every task to structure thinking and execution. It complements the routines below and ties directly into the Traceability trio (Description, Evidence, Plan).

- Problem: restate the task in one sentence, note constraints/timebox, and identify likely routine (A/B/C).
- Information: inspect modules and AGENTS.md, gather environment constraints, locate existing tests/reports, and search code to localize the work.
- Options: list 2–3 viable approaches (routine choice, test scope, fix location) and weigh them with the Proportionality Model.
- Select: choose one option and routine; update the Living Plan with exactly one `in_progress` step.
- Execute: follow the Working Loop and house rules; for Routine A add the smallest failing test first; capture an Evidence block after each grouped action.
- Evaluate: check against the Definition of Done; if gaps remain, adjust the plan or change routine; record final Evidence and a brief retrospective.

PIOSEE → Traceability trio mapping
- P/I/O → Description
- S → Plan (one `in_progress`)
- E/E → Evidence and Verification

For documentation‑only edits and other Routine B cases, still run PIOSEE briefly to confirm neutrality and reversibility.

## Proportionality Model (Think before you test)

Score the change on these lenses. If any are **High**, prefer **Routine A**.
Expand Down Expand Up @@ -342,6 +360,7 @@ It is illegal to `-q` when running tests!

## Working Loop

* **PIOSEE first:** restate Problem, gather Information, list Options; then Select, Execute, Evaluate.
* **Plan:** small, verifiable steps; keep one `in_progress`.
* **Change:** minimal, surgical edits; keep style/structure consistent.
* **Format:** `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format`
Expand Down Expand Up @@ -506,6 +525,7 @@ Do **not** modify existing headers’ years.
* **Files touched:** list file paths.
* **Commands run:** key build/test commands.
* **Verification:** which tests passed, where you checked reports.
* **PIOSEE trace (concise):** P/I/O summary, selected option/routine, key evaluate outcomes.
* **Evidence:**
*Routine A:* failing output (pre‑fix) and passing output (post‑fix).
*Routine B:* pre‑ and post‑green snippets from the **same selection** + **Hit Proof**.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics;
import org.eclipse.rdf4j.sail.Sail;
import org.eclipse.rdf4j.sail.SailException;
import org.eclipse.rdf4j.sail.base.BackingSailSource;
import org.eclipse.rdf4j.sail.base.Changeset;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.eclipse.rdf4j.model.vocabulary.XSD;
import org.eclipse.rdf4j.sail.SailException;
import org.eclipse.rdf4j.sail.nativerdf.datastore.DataStore;
import org.eclipse.rdf4j.sail.nativerdf.datastore.RecoveredDataException;
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRI;
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRIOrBNode;
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptLiteral;
Expand Down Expand Up @@ -145,7 +146,7 @@ public ValueStore(File dataDir, boolean forceSync) throws IOException {
public ValueStore(File dataDir, boolean forceSync, int valueCacheSize, int valueIDCacheSize, int namespaceCacheSize,
int namespaceIDCacheSize) throws IOException {
super();
dataStore = new DataStore(dataDir, FILENAME_PREFIX, forceSync);
dataStore = new DataStore(dataDir, FILENAME_PREFIX, forceSync, this);

valueCache = new ConcurrentCache<>(valueCacheSize);
valueIDCache = new ConcurrentCache<>(valueIDCacheSize);
Expand Down Expand Up @@ -194,15 +195,31 @@ public NativeValue getValue(int id) throws IOException {
NativeValue resultValue = valueCache.get(cacheID);

if (resultValue == null) {
// Value not in cache, fetch it from file
byte[] data = dataStore.getData(id);

if (data != null) {
resultValue = data2value(id, data);

if (!(resultValue instanceof CorruptValue)) {
// Store value in cache
valueCache.put(cacheID, resultValue);
try {
// Value not in cache, fetch it from file
byte[] data = dataStore.getData(id);
if (data != null) {
resultValue = data2value(id, data);
if (!(resultValue instanceof CorruptValue)) {
// Store value in cache
valueCache.put(cacheID, resultValue);
}
}
} catch (RecoveredDataException rde) {
byte[] recovered = rde.getData();
if (recovered != null && recovered.length > 0) {
byte t = recovered[0];
if (t == URI_VALUE) {
resultValue = new CorruptIRI(revision, id, null, recovered);
} else if (t == BNODE_VALUE) {
resultValue = new CorruptIRIOrBNode(revision, id, recovered);
} else if (t == LITERAL_VALUE) {
resultValue = new CorruptLiteral(revision, id, recovered);
} else {
resultValue = new CorruptUnknownValue(revision, id, recovered);
}
} else {
resultValue = new CorruptUnknownValue(revision, id, recovered);
}
}
}
Expand Down Expand Up @@ -434,21 +451,30 @@ public void close() throws IOException {
public void checkConsistency() throws SailException, IOException {
int maxID = dataStore.getMaxID();
for (int id = 1; id <= maxID; id++) {
byte[] data = dataStore.getData(id);
if (isNamespaceData(data)) {
String namespace = data2namespace(data);
try {
if (id == getNamespaceID(namespace, false)
&& java.net.URI.create(namespace + "part").isAbsolute()) {
continue;
try {
byte[] data = dataStore.getData(id);
if (isNamespaceData(data)) {
String namespace = data2namespace(data);
try {
if (id == getNamespaceID(namespace, false)
&& java.net.URI.create(namespace + "part").isAbsolute()) {
continue;
}
} catch (IllegalArgumentException e) {
// throw SailException
}
throw new SailException(
"Store must be manually exported and imported to fix namespaces like " + namespace);
} else {
Value value = this.data2value(id, data);
if (id != this.getID(copy(value))) {
throw new SailException(
"Store must be manually exported and imported to merge values like " + value);
}
} catch (IllegalArgumentException e) {
// throw SailException
}
throw new SailException(
"Store must be manually exported and imported to fix namespaces like " + namespace);
} else {
Value value = this.data2value(id, data);
} catch (RecoveredDataException rde) {
// Treat as a corrupt unknown value during consistency check
Value value = new CorruptUnknownValue(revision, id, rde.getData());
if (id != this.getID(copy(value))) {
throw new SailException(
"Store must be manually exported and imported to merge values like " + value);
Expand Down Expand Up @@ -584,7 +610,8 @@ private boolean isNamespaceData(byte[] data) {
return data[0] != URI_VALUE && data[0] != BNODE_VALUE && data[0] != LITERAL_VALUE;
}

private NativeValue data2value(int id, byte[] data) throws IOException {
@InternalUseOnly
public NativeValue data2value(int id, byte[] data) throws IOException {
if (data.length == 0) {
if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
logger.error("Soft fail on corrupt data: Empty data array for value with id {}", id);
Expand Down Expand Up @@ -704,8 +731,12 @@ private String getNamespace(int id) throws IOException {
String namespace = namespaceCache.get(cacheID);

if (namespace == null) {
byte[] namespaceData = dataStore.getData(id);
namespace = data2namespace(namespaceData);
try {
byte[] namespaceData = dataStore.getData(id);
namespace = data2namespace(namespaceData);
} catch (RecoveredDataException rde) {
namespace = data2namespace(rde.getData());
}

namespaceCache.put(cacheID, namespace);
}
Expand Down Expand Up @@ -829,13 +860,18 @@ public static void main(String[] args) throws Exception {

int maxID = valueStore.dataStore.getMaxID();
for (int id = 1; id <= maxID; id++) {
byte[] data = valueStore.dataStore.getData(id);
if (valueStore.isNamespaceData(data)) {
String ns = valueStore.data2namespace(data);
System.out.println("[" + id + "] " + ns);
} else {
Value value = valueStore.data2value(id, data);
System.out.println("[" + id + "] " + value.toString());
try {
byte[] data = valueStore.dataStore.getData(id);
if (valueStore.isNamespaceData(data)) {
String ns = valueStore.data2namespace(data);
System.out.println("[" + id + "] " + ns);
} else {
Value value = valueStore.data2value(id, data);
System.out.println("[" + id + "] " + value.toString());
}
} catch (RecoveredDataException rde) {
System.out.println("[" + id + "] CorruptUnknownValue:"
+ new CorruptUnknownValue(valueStore.revision, id, rde.getData()));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,30 @@ public File getFile() {
return nioFile.getFile();
}

/**
* Returns the current file size (after flushing any pending writes).
*/
public long getFileSize() throws IOException {
flush();
return nioFileSize;
}

/**
* Attempts to recover data bytes between two known entry offsets when the length field at {@code startOffset} is
* corrupt (e.g., zero). This returns up to {@code endOffset - startOffset - 4} bytes starting after the length
* field, capped to a reasonable maximum to avoid large allocations.
*/
public byte[] tryRecoverBetweenOffsets(long startOffset, long endOffset) throws IOException {
flush();
if (endOffset <= startOffset + 4) {
return new byte[0];
}
long available = endOffset - (startOffset + 4);
int cap = 32 * 1024 * 1024; // 32MB cap for recovery
int toRead = (int) Math.min(Math.max(available, 0), cap);
return nioFile.readBytes(startOffset + 4L, toRead);
}

/**
* Stores the specified data and returns the byte-offset at which it has been stored.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,16 @@
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.zip.CRC32;

import org.eclipse.rdf4j.common.io.ByteArrayUtil;
import org.eclipse.rdf4j.sail.nativerdf.NativeStore;
import org.eclipse.rdf4j.sail.nativerdf.ValueStore;
import org.eclipse.rdf4j.sail.nativerdf.model.NativeValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Class that provides indexed storage and retrieval of arbitrary length data.
Expand All @@ -35,6 +41,9 @@ public class DataStore implements Closeable {

private final HashFile hashFile;

private static final Logger logger = LoggerFactory.getLogger(DataStore.class);
private ValueStore valueStore;

/*--------------*
* Constructors *
*--------------*/
Expand All @@ -49,6 +58,11 @@ public DataStore(File dataDir, String filePrefix, boolean forceSync) throws IOEx
hashFile = new HashFile(new File(dataDir, filePrefix + ".hash"), forceSync);
}

public DataStore(File dataDir, String filePrefix, boolean forceSync, ValueStore valueStore) throws IOException {
this(dataDir, filePrefix, forceSync);
this.valueStore = valueStore;
}

/*---------*
* Methods *
*---------*/
Expand All @@ -67,7 +81,108 @@ public byte[] getData(int id) throws IOException {
long offset = idFile.getOffset(id);

if (offset != 0L) {
return dataFile.getData(offset);
byte[] data = dataFile.getData(offset);
if (data.length == 0 && NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
try {
long offsetNoCache = idFile.getOffsetNoCache(id);
if (offset != offsetNoCache) {
logger.error("IDFile cache mismatch for id {}: cached={}, raw={}. Using raw.", id, offset,
offsetNoCache);
offset = offsetNoCache;
data = dataFile.getData(offset);
}
} catch (IOException e) {
// If raw read fails, keep cached offset
}

// Attempt recovery by using neighboring offsets to infer the bounds
long startData = offset + 4; // default start if no previous valid entry
// Find previous entry end: prevOffset + 4 + prevLength
int prev = id - 1;
for (; prev >= 1; prev--) {
long po = idFile.getOffset(prev);
try {
long poRaw = idFile.getOffsetNoCache(prev);
if (po != poRaw) {
logger.error("IDFile cache mismatch for prev id {}: cached={}, raw={}. Using raw.", prev,
po, poRaw);
po = poRaw;
}
} catch (IOException e) {
// use cached po if raw read fails
}
if (po > 0L) {
try {
byte[] prevData = dataFile.getData(po);
if (prevData != null && prevData.length > 0) {
try {
if (valueStore != null && Thread.currentThread().getStackTrace().length < 512) {
NativeValue nativeValue = valueStore.data2value(prev, prevData);
logger.warn("Data in previous ID ({}) is: {}", prev, nativeValue);
} else {
logger.warn("Data in previous ID ({}) is: {}", prev,
new String(prevData, StandardCharsets.UTF_8));
}
} catch (Exception ignored) {
}
startData = po + 4L + prevData.length;
break;
}
} catch (Exception ignored) {
}
}
}

// Find next entry start as the end bound
long endOffset = 0L;
int maxId = idFile.getMaxID();
int next = id + 1;
for (; next <= maxId; next++) {
long no = idFile.getOffset(next);
try {
long noRaw = idFile.getOffsetNoCache(next);
if (no != noRaw) {
logger.error("IDFile cache mismatch for next id {}: cached={}, raw={}. Using raw.", next,
no, noRaw);
no = noRaw;
}
} catch (IOException e) {
// use cached value if raw read fails
}
if (no > 0L) {

try {
byte[] nextData = dataFile.getData(no);
if (nextData != null && nextData.length > 0) {
try {
if (valueStore != null && Thread.currentThread().getStackTrace().length < 512) {
NativeValue nativeValue = valueStore.data2value(next, nextData);
logger.warn("Data in next ID ({}) is: {}", next, nativeValue);
} else {
logger.warn("Data in next ID ({}) is: {}", next,
new String(nextData, StandardCharsets.UTF_8));
}
} catch (Exception ignored) {
}
endOffset = no;
break;
}
} catch (Exception e) {
}

}
}
if (endOffset == 0L) {
// Fallback: use current file size as end bound
endOffset = dataFile.getFileSize();
}
if (endOffset > startData) {
// tryRecoverBetweenOffsets expects an offset to a 4-byte length, so pass (startData - 4)
byte[] recovered = dataFile.tryRecoverBetweenOffsets(Math.max(0L, startData - 4L), endOffset);
throw new RecoveredDataException(id, recovered);
}
}
return data;
}

return null;
Expand Down
Loading
Loading