Skip to content

Commit

Permalink
Testing UniquenessChecker
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Nov 20, 2022
1 parent bd7e66c commit fb97d33
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import de.gwdg.metadataqa.api.rule.RuleCatalog;
import de.gwdg.metadataqa.api.rule.RuleChecker;
import de.gwdg.metadataqa.api.rule.logical.LogicalChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.UniqunessChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.UniquenessChecker;
import de.gwdg.metadataqa.api.schema.Schema;
import de.gwdg.metadataqa.api.schema.edm.EdmSchema;
import de.gwdg.metadataqa.api.uniqueness.DefaultSolrClient;
Expand Down Expand Up @@ -109,9 +109,9 @@ private void addRuleCatalogMeasurement() {

private void injectSolr(List<RuleChecker> ruleCheckers) {
for (RuleChecker ruleChecker : ruleCheckers) {
if (ruleChecker instanceof UniqunessChecker) {
if (ruleChecker instanceof UniquenessChecker) {
initializeSolrConfiguration();
((UniqunessChecker)ruleChecker).setSolrClient(configuration.getSolrClient());
((UniquenessChecker)ruleChecker).setSolrClient(configuration.getSolrClient());
} else if (ruleChecker instanceof LogicalChecker) {
injectSolr(((LogicalChecker)ruleChecker).getCheckers());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import java.util.List;

/**
* Uniquness calculator
* Uniqueness calculator
*
* @author Péter Király <peter.kiraly at gwdg.de>
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,20 @@
import de.gwdg.metadataqa.api.uniqueness.SolrClient;
import de.gwdg.metadataqa.api.uniqueness.UniquenessExtractor;

import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;

public class UniqunessChecker extends SingleFieldChecker {
public class UniquenessChecker extends SingleFieldChecker {

private static final long serialVersionUID = -1432138574479246596L;
public static final String PREFIX = "uniquness";
public static final String PREFIX = "uniqueness";
protected String solrField;
private SolrClient solrClient;

public UniqunessChecker(JsonBranch field) {
public UniquenessChecker(JsonBranch field) {
this(field, field.getLabel());
}

public UniqunessChecker(JsonBranch field, String header) {
public UniquenessChecker(JsonBranch field, String header) {
super(field, header + ":" + PREFIX);
this.solrField = field.getLabel().equals("recordId") ? "id" : field.getIndexField() + "_ss";
}
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/de/gwdg/metadataqa/api/schema/SchemaUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import de.gwdg.metadataqa.api.rule.singlefieldchecker.NumericValueChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.PatternChecker;
import de.gwdg.metadataqa.api.rule.RuleChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.UniqunessChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.UniquenessChecker;
import org.apache.commons.lang3.StringUtils;

import java.util.ArrayList;
Expand Down Expand Up @@ -123,7 +123,7 @@ private static List<RuleChecker> processRule(Schema schema, JsonBranch branch, R
ruleCheckers.add(new DependencyChecker(branch, rule.getDependencies()));

if (rule.getUnique() != null && rule.getUnique().equals(Boolean.TRUE))
ruleCheckers.add(new UniqunessChecker(branch));
ruleCheckers.add(new UniquenessChecker(branch));

if (rule.getLessThan() != null)
pair(schema, ruleCheckers, branch, rule.getLessThan(), "LessThan");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package de.gwdg.metadataqa.api.rule.singlefieldchecker;

import de.gwdg.metadataqa.api.calculator.Indexer;
import de.gwdg.metadataqa.api.counter.FieldCounter;
import de.gwdg.metadataqa.api.json.JsonBranch;
import de.gwdg.metadataqa.api.model.PathCacheFactory;
import de.gwdg.metadataqa.api.model.pathcache.CsvPathCache;
import de.gwdg.metadataqa.api.rule.RuleCheckerOutput;
import de.gwdg.metadataqa.api.rule.RuleCheckingOutputStatus;
import de.gwdg.metadataqa.api.rule.RuleCheckingOutputType;
import de.gwdg.metadataqa.api.schema.BaseSchema;
import de.gwdg.metadataqa.api.schema.CsvAwareSchema;
import de.gwdg.metadataqa.api.schema.Format;
import de.gwdg.metadataqa.api.schema.Schema;
import de.gwdg.metadataqa.api.uniqueness.SolrClientMock;
import de.gwdg.metadataqa.api.uniqueness.SolrConfiguration;
import de.gwdg.metadataqa.api.util.CsvReader;
import org.junit.Before;
import org.junit.Test;

import java.util.regex.Pattern;

import static org.junit.Assert.*;

public class UniquenessCheckerTest {

CsvPathCache cache;
SolrClientMock solrClient;
Schema schema;

@Before
public void setUp() throws Exception {
SolrConfiguration solrConfiguration = new SolrConfiguration("localhost", "8983", "solr");
schema = getSchema(Format.CSV);
solrClient = new SolrClientMock(solrConfiguration);

cache = (CsvPathCache) PathCacheFactory.getInstance(schema.getFormat(), "URL,two three");
cache.setCsvReader(new CsvReader().setHeader( ((CsvAwareSchema) schema).getHeader() ));
}

@Test
public void success() {
UniquenessChecker checker = new UniquenessChecker(schema.getPathByLabel("name"));
checker.setSolrClient(solrClient);

FieldCounter<RuleCheckerOutput> fieldCounter = new FieldCounter<>();
checker.update(cache, fieldCounter, RuleCheckingOutputType.BOTH);

assertEquals(2, fieldCounter.size());
assertEquals("name:uniqueness", checker.getHeaderWithoutId());
assertEquals("name:uniqueness:0", checker.getHeader());
assertTrue(Pattern.compile("^name:uniqueness:\\d+$").matcher(checker.getHeader()).matches());
assertEquals(RuleCheckingOutputStatus.PASSED, fieldCounter.get(checker.getHeader(RuleCheckingOutputType.STATUS)).getStatus());
}

@Test
public void failure() {
UniquenessChecker checker = new UniquenessChecker(schema.getPathByLabel("url"));
checker.setSolrClient(solrClient);

FieldCounter<RuleCheckerOutput> fieldCounter = new FieldCounter<>();
checker.update(cache, fieldCounter, RuleCheckingOutputType.BOTH);

assertEquals(2, fieldCounter.size());
assertEquals("url:uniqueness", checker.getHeaderWithoutId());
assertEquals("url:uniqueness:0", checker.getHeader());
assertTrue(Pattern.compile("^url:uniqueness:\\d+$").matcher(checker.getHeader()).matches());
assertEquals(RuleCheckingOutputStatus.FAILED, fieldCounter.get(checker.getHeader(RuleCheckingOutputType.STATUS)).getStatus());
}

private Schema getSchema(Format format) {
BaseSchema schema = new BaseSchema()
.setFormat(format)
.addField(new JsonBranch("url").setExtractable().setIndexField("url"))
.addField(new JsonBranch("name").setExtractable().setIndexField("name"));
schema.setRecordId(schema.getPathByLabel("url"));
return schema;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ public String getSolrSearchResponse(String solrField, String value) {
} else if (solrField.equals("dc_title_ss")
&& value.equals("Pyrker-Oberwart, Johann Ladislaus")) {
return "{\"response\":{\"numFound\":3}}";
} else if (solrField.equals("url_ss") && value.equals("URL")) {
return "{\"response\":{\"numFound\":3}}";
} else if (solrField.equals("name_ss") && value.equals("two three")) {
return "{\"response\":{\"numFound\":1}}";
} else {
System.err.printf("solrField: %s, value: %s\n", solrField, value);
}
Expand Down

0 comments on commit fb97d33

Please sign in to comment.