Skip to content

Commit

Permalink
Add Fury instance and mapdb serializer with tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mdoering committed Jan 29, 2025
1 parent 858284d commit 587e4a2
Show file tree
Hide file tree
Showing 5 changed files with 604 additions and 0 deletions.
190 changes: 190 additions & 0 deletions api/src/main/java/life/catalogue/common/fury/FuryFactory.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
package life.catalogue.common.fury;

import life.catalogue.api.model.*;
import life.catalogue.api.search.NameUsageWrapper;
import life.catalogue.api.search.SimpleDecision;
import life.catalogue.api.vocab.*;
import life.catalogue.api.vocab.terms.*;
import life.catalogue.coldp.ColdpTerm;
import life.catalogue.common.date.FuzzyDate;

import org.gbif.dwc.terms.BibTexTerm;
import org.gbif.dwc.terms.TermFactory;
import org.gbif.dwc.terms.UnknownTerm;
import org.gbif.nameparser.api.*;

import java.net.URI;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.*;

import org.apache.fury.Fury;
import org.apache.fury.ThreadLocalFury;
import org.apache.fury.ThreadSafeFury;

import de.undercouch.citeproc.csl.CSLType;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;


/**
* Creates a thread safe fury instance with pre registered API classes
*/
public class FuryFactory {

public static final ThreadSafeFury FURY = new ThreadLocalFury(classLoader -> {
Fury f = Fury.builder()
.withLanguage(org.apache.fury.config.Language.JAVA)
.withClassLoader(classLoader)
.requireClassRegistration(false) // some non public class like JumboEnumSet cannot be registered
.build();
return configure(f);
});

public static Fury configure(Fury fury) {
// clb core
fury.register(Agent.class);
fury.register(Authorship.class);
fury.register(BareName.class);
fury.register(Citation.class);
fury.register(Classification.class);
fury.register(Coordinate.class);
fury.register(DOI.class);
fury.register(Dataset.class);
fury.register(DatasetImport.class);
fury.register(Distribution.class);
fury.register(EditorialDecision.Mode.class);
fury.register(EditorialDecision.class);
fury.register(Identifier.class);
fury.register(IndexName.class);
fury.register(Media.class);
fury.register(Name.class);
fury.register(NameRelation.class);
fury.register(NameUsageWrapper.class);
fury.register(Page.class);
fury.register(ParsedName.State.class);
fury.register(ParsedName.class);
fury.register(ParsedNameUsage.class);
fury.register(Reference.class);
fury.register(Sector.Mode.class);
fury.register(Sector.class);
fury.register(SimpleDecision.class);
fury.register(SimpleName.class);
fury.register(SpeciesEstimate.class);
fury.register(SpeciesInteraction.class);
fury.register(Synonym.class);
fury.register(TaxGroup.class);
fury.register(Taxon.class);
fury.register(TaxonConceptRelation.class);
fury.register(TaxonProperty.class);
fury.register(Treatment.class);
fury.register(TypeMaterial.class);
fury.register(VerbatimRecord.class);
fury.register(VernacularName.class);

// search
fury.register(NameUsageWrapper.class);
fury.register(SimpleDecision.class);

// CSL classes & enums
fury.register(CslData.class);
fury.register(CslDate.class);
fury.register(CslName.class);
fury.register(CslName[].class);
fury.register(CSLType.class);
fury.register(int[][].class);
fury.register(String[].class);

// date/time
fury.register(FuzzyDate.class);
fury.register(LocalDate.class);
fury.register(LocalDateTime.class);

// java & commons
fury.register(int[].class);
fury.register(URI.class);
fury.register(UUID.class);
registerCollectionClasses(fury);

// areas
fury.register(Area.class);
fury.register(AreaImpl.class);
fury.register(LonghurstArea.class);
fury.register(TdwgArea.class);

// enums
fury.register(Country.class);
fury.register(DataFormat.class);
fury.register(DatasetOrigin.class);
fury.register(Setting.class);
fury.register(DatasetType.class);
fury.register(DistributionStatus.class);
fury.register(EnumMap.class);
fury.register(EnumSet.class);
fury.register(EstimateType.class);
fury.register(Frequency.class);
fury.register(Gender.class);
fury.register(Gazetteer.class);
fury.register(GeoTime.class);
fury.register(GeoTimeType.class);
fury.register(ImportState.class);
fury.register(Issue.class);
fury.register(JobStatus.class);
fury.register(License.class);
fury.register(Environment.class);
fury.register(MatchType.class);
fury.register(MediaType.class);
fury.register(NamePart.class);
fury.register(NameType.class);
fury.register(NomCode.class);
fury.register(NomRelType.class);
fury.register(NomStatus.class);
fury.register(Origin.class);
fury.register(Rank.class);
fury.register(Sex.class);
fury.register(SpeciesInteractionType.class);
fury.register(TaxonomicStatus.class);
fury.register(TaxonConceptRelType.class);
fury.register(TreatmentFormat.class);
fury.register(TypeStatus.class);
fury.register(InfoGroup.class);

// term enums
TermFactory.instance().registerTermEnum(BiboOntTerm.class);
TermFactory.instance().registerTermEnum(ColdpTerm.class);
TermFactory.instance().registerTermEnum(EolDocumentTerm.class);
TermFactory.instance().registerTermEnum(EolReferenceTerm.class);
TermFactory.instance().registerTermEnum(InatTerm.class);
TermFactory.instance().registerTermEnum(TxtTreeTerm.class);
TermFactory.instance().registerTermEnum(WfoTerm.class);
for (Class<?> cl : TermFactory.instance().listRegisteredTermEnums()) {
fury.register(cl);
}
fury.register(UnknownTerm.class);
fury.register(BibTexTerm.class);
return fury;
}

public static void registerCollectionClasses(Fury fury) {
fury.register(ArrayList.class);
fury.register(HashMap.class);
fury.register(HashSet.class);
fury.register(EnumMap.class);
fury.register(EnumSet.class);
fury.register(LinkedHashMap.class);
fury.register(LinkedList.class);
fury.register(Collections.emptyList().getClass());
// private class, special registration
try {
Class clazz = Class.forName("java.util.Arrays$ArrayList");
fury.register(clazz);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
// fastutils
fury.register(IntSet.class);
fury.register(IntOpenHashSet.class);
fury.register(ObjectArrayList.class);
}
}
55 changes: 55 additions & 0 deletions api/src/main/java/life/catalogue/common/fury/MapDbSerializer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package life.catalogue.common.fury;

import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;
import com.esotericsoftware.kryo.util.Pool;

import org.apache.commons.lang3.NotImplementedException;
import org.mapdb.DataIO;
import org.mapdb.DataInput2;
import org.mapdb.DataOutput2;
import org.mapdb.serializer.GroupSerializerObjectArray;

import java.io.ByteArrayOutputStream;
import java.io.IOException;

/**
* A mapDB serializer that uses fury under the hood to quickly serialize objects into the mapdb data output/input.
*
* @param <T> the class to serialize
*/
public class MapDbSerializer<T> extends GroupSerializerObjectArray<T> {
private final Class<T> clazz;

public MapDbSerializer(Class<T> clazz) {
this.clazz = clazz;
}

@Override
public void serialize(DataOutput2 out, T value) throws IOException {
byte[] bytes = FuryFactory.FURY.serializeJavaObject(value);
DataIO.packInt(out, bytes.length);
out.write(bytes);
}

@Override
public T deserialize(DataInput2 in, int available) throws IOException {
if (available == 0) return null;
int size = DataIO.unpackInt(in);
byte[] ret = new byte[size];
in.readFully(ret);
return FuryFactory.FURY.deserializeJavaObject(ret, clazz);
}

@Override
public boolean isTrusted() {
return true;
}

@Override
public int compare(T first, T second) {
throw new NotImplementedException("compare should not be needed for our mapdb use");
}

}
Loading

0 comments on commit 587e4a2

Please sign in to comment.