From fb97d3341a252b16243a62c16b29856d4a4a9eab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Kir=C3=A1ly?= Date: Sun, 20 Nov 2022 21:36:01 +0100 Subject: [PATCH] Testing UniquenessChecker --- .../api/calculator/CalculatorFactory.java | 6 +- .../api/calculator/UniquenessCalculator.java | 2 +- ...essChecker.java => UniquenessChecker.java} | 10 +-- .../metadataqa/api/schema/SchemaUtils.java | 4 +- .../UniquenessCheckerTest.java | 80 +++++++++++++++++++ .../api/uniqueness/SolrClientMock.java | 4 + 6 files changed, 94 insertions(+), 12 deletions(-) rename src/main/java/de/gwdg/metadataqa/api/rule/singlefieldchecker/{UniqunessChecker.java => UniquenessChecker.java} (88%) create mode 100644 src/test/java/de/gwdg/metadataqa/api/rule/singlefieldchecker/UniquenessCheckerTest.java diff --git a/src/main/java/de/gwdg/metadataqa/api/calculator/CalculatorFactory.java b/src/main/java/de/gwdg/metadataqa/api/calculator/CalculatorFactory.java index d4b37e00..ab309517 100644 --- a/src/main/java/de/gwdg/metadataqa/api/calculator/CalculatorFactory.java +++ b/src/main/java/de/gwdg/metadataqa/api/calculator/CalculatorFactory.java @@ -9,7 +9,7 @@ import de.gwdg.metadataqa.api.rule.RuleCatalog; import de.gwdg.metadataqa.api.rule.RuleChecker; import de.gwdg.metadataqa.api.rule.logical.LogicalChecker; -import de.gwdg.metadataqa.api.rule.singlefieldchecker.UniqunessChecker; +import de.gwdg.metadataqa.api.rule.singlefieldchecker.UniquenessChecker; import de.gwdg.metadataqa.api.schema.Schema; import de.gwdg.metadataqa.api.schema.edm.EdmSchema; import de.gwdg.metadataqa.api.uniqueness.DefaultSolrClient; @@ -109,9 +109,9 @@ private void addRuleCatalogMeasurement() { private void injectSolr(List ruleCheckers) { for (RuleChecker ruleChecker : ruleCheckers) { - if (ruleChecker instanceof UniqunessChecker) { + if (ruleChecker instanceof UniquenessChecker) { initializeSolrConfiguration(); - ((UniqunessChecker)ruleChecker).setSolrClient(configuration.getSolrClient()); + ((UniquenessChecker)ruleChecker).setSolrClient(configuration.getSolrClient()); } else if (ruleChecker instanceof LogicalChecker) { injectSolr(((LogicalChecker)ruleChecker).getCheckers()); } diff --git a/src/main/java/de/gwdg/metadataqa/api/calculator/UniquenessCalculator.java b/src/main/java/de/gwdg/metadataqa/api/calculator/UniquenessCalculator.java index 60d3d55f..652b3d17 100644 --- a/src/main/java/de/gwdg/metadataqa/api/calculator/UniquenessCalculator.java +++ b/src/main/java/de/gwdg/metadataqa/api/calculator/UniquenessCalculator.java @@ -17,7 +17,7 @@ import java.util.List; /** - * Uniquness calculator + * Uniqueness calculator * * @author Péter Király */ diff --git a/src/main/java/de/gwdg/metadataqa/api/rule/singlefieldchecker/UniqunessChecker.java b/src/main/java/de/gwdg/metadataqa/api/rule/singlefieldchecker/UniquenessChecker.java similarity index 88% rename from src/main/java/de/gwdg/metadataqa/api/rule/singlefieldchecker/UniqunessChecker.java rename to src/main/java/de/gwdg/metadataqa/api/rule/singlefieldchecker/UniquenessChecker.java index aae2eb50..a9c1d0e8 100644 --- a/src/main/java/de/gwdg/metadataqa/api/rule/singlefieldchecker/UniqunessChecker.java +++ b/src/main/java/de/gwdg/metadataqa/api/rule/singlefieldchecker/UniquenessChecker.java @@ -10,22 +10,20 @@ import de.gwdg.metadataqa.api.uniqueness.SolrClient; import de.gwdg.metadataqa.api.uniqueness.UniquenessExtractor; -import java.util.ArrayList; import java.util.List; -import java.util.logging.Level; -public class UniqunessChecker extends SingleFieldChecker { +public class UniquenessChecker extends SingleFieldChecker { private static final long serialVersionUID = -1432138574479246596L; - public static final String PREFIX = "uniquness"; + public static final String PREFIX = "uniqueness"; protected String solrField; private SolrClient solrClient; - public UniqunessChecker(JsonBranch field) { + public UniquenessChecker(JsonBranch field) { this(field, field.getLabel()); } - public UniqunessChecker(JsonBranch field, String header) { + public UniquenessChecker(JsonBranch field, String header) { super(field, header + ":" + PREFIX); this.solrField = field.getLabel().equals("recordId") ? "id" : field.getIndexField() + "_ss"; } diff --git a/src/main/java/de/gwdg/metadataqa/api/schema/SchemaUtils.java b/src/main/java/de/gwdg/metadataqa/api/schema/SchemaUtils.java index 00390e15..ec359e75 100644 --- a/src/main/java/de/gwdg/metadataqa/api/schema/SchemaUtils.java +++ b/src/main/java/de/gwdg/metadataqa/api/schema/SchemaUtils.java @@ -23,7 +23,7 @@ import de.gwdg.metadataqa.api.rule.singlefieldchecker.NumericValueChecker; import de.gwdg.metadataqa.api.rule.singlefieldchecker.PatternChecker; import de.gwdg.metadataqa.api.rule.RuleChecker; -import de.gwdg.metadataqa.api.rule.singlefieldchecker.UniqunessChecker; +import de.gwdg.metadataqa.api.rule.singlefieldchecker.UniquenessChecker; import org.apache.commons.lang3.StringUtils; import java.util.ArrayList; @@ -123,7 +123,7 @@ private static List processRule(Schema schema, JsonBranch branch, R ruleCheckers.add(new DependencyChecker(branch, rule.getDependencies())); if (rule.getUnique() != null && rule.getUnique().equals(Boolean.TRUE)) - ruleCheckers.add(new UniqunessChecker(branch)); + ruleCheckers.add(new UniquenessChecker(branch)); if (rule.getLessThan() != null) pair(schema, ruleCheckers, branch, rule.getLessThan(), "LessThan"); diff --git a/src/test/java/de/gwdg/metadataqa/api/rule/singlefieldchecker/UniquenessCheckerTest.java b/src/test/java/de/gwdg/metadataqa/api/rule/singlefieldchecker/UniquenessCheckerTest.java new file mode 100644 index 00000000..b7e8f216 --- /dev/null +++ b/src/test/java/de/gwdg/metadataqa/api/rule/singlefieldchecker/UniquenessCheckerTest.java @@ -0,0 +1,80 @@ +package de.gwdg.metadataqa.api.rule.singlefieldchecker; + +import de.gwdg.metadataqa.api.calculator.Indexer; +import de.gwdg.metadataqa.api.counter.FieldCounter; +import de.gwdg.metadataqa.api.json.JsonBranch; +import de.gwdg.metadataqa.api.model.PathCacheFactory; +import de.gwdg.metadataqa.api.model.pathcache.CsvPathCache; +import de.gwdg.metadataqa.api.rule.RuleCheckerOutput; +import de.gwdg.metadataqa.api.rule.RuleCheckingOutputStatus; +import de.gwdg.metadataqa.api.rule.RuleCheckingOutputType; +import de.gwdg.metadataqa.api.schema.BaseSchema; +import de.gwdg.metadataqa.api.schema.CsvAwareSchema; +import de.gwdg.metadataqa.api.schema.Format; +import de.gwdg.metadataqa.api.schema.Schema; +import de.gwdg.metadataqa.api.uniqueness.SolrClientMock; +import de.gwdg.metadataqa.api.uniqueness.SolrConfiguration; +import de.gwdg.metadataqa.api.util.CsvReader; +import org.junit.Before; +import org.junit.Test; + +import java.util.regex.Pattern; + +import static org.junit.Assert.*; + +public class UniquenessCheckerTest { + + CsvPathCache cache; + SolrClientMock solrClient; + Schema schema; + + @Before + public void setUp() throws Exception { + SolrConfiguration solrConfiguration = new SolrConfiguration("localhost", "8983", "solr"); + schema = getSchema(Format.CSV); + solrClient = new SolrClientMock(solrConfiguration); + + cache = (CsvPathCache) PathCacheFactory.getInstance(schema.getFormat(), "URL,two three"); + cache.setCsvReader(new CsvReader().setHeader( ((CsvAwareSchema) schema).getHeader() )); + } + + @Test + public void success() { + UniquenessChecker checker = new UniquenessChecker(schema.getPathByLabel("name")); + checker.setSolrClient(solrClient); + + FieldCounter fieldCounter = new FieldCounter<>(); + checker.update(cache, fieldCounter, RuleCheckingOutputType.BOTH); + + assertEquals(2, fieldCounter.size()); + assertEquals("name:uniqueness", checker.getHeaderWithoutId()); + assertEquals("name:uniqueness:0", checker.getHeader()); + assertTrue(Pattern.compile("^name:uniqueness:\\d+$").matcher(checker.getHeader()).matches()); + assertEquals(RuleCheckingOutputStatus.PASSED, fieldCounter.get(checker.getHeader(RuleCheckingOutputType.STATUS)).getStatus()); + } + + @Test + public void failure() { + UniquenessChecker checker = new UniquenessChecker(schema.getPathByLabel("url")); + checker.setSolrClient(solrClient); + + FieldCounter fieldCounter = new FieldCounter<>(); + checker.update(cache, fieldCounter, RuleCheckingOutputType.BOTH); + + assertEquals(2, fieldCounter.size()); + assertEquals("url:uniqueness", checker.getHeaderWithoutId()); + assertEquals("url:uniqueness:0", checker.getHeader()); + assertTrue(Pattern.compile("^url:uniqueness:\\d+$").matcher(checker.getHeader()).matches()); + assertEquals(RuleCheckingOutputStatus.FAILED, fieldCounter.get(checker.getHeader(RuleCheckingOutputType.STATUS)).getStatus()); + } + + private Schema getSchema(Format format) { + BaseSchema schema = new BaseSchema() + .setFormat(format) + .addField(new JsonBranch("url").setExtractable().setIndexField("url")) + .addField(new JsonBranch("name").setExtractable().setIndexField("name")); + schema.setRecordId(schema.getPathByLabel("url")); + return schema; + } + +} \ No newline at end of file diff --git a/src/test/java/de/gwdg/metadataqa/api/uniqueness/SolrClientMock.java b/src/test/java/de/gwdg/metadataqa/api/uniqueness/SolrClientMock.java index 5ea80e0e..bb64ef8c 100644 --- a/src/test/java/de/gwdg/metadataqa/api/uniqueness/SolrClientMock.java +++ b/src/test/java/de/gwdg/metadataqa/api/uniqueness/SolrClientMock.java @@ -31,6 +31,10 @@ public String getSolrSearchResponse(String solrField, String value) { } else if (solrField.equals("dc_title_ss") && value.equals("Pyrker-Oberwart, Johann Ladislaus")) { return "{\"response\":{\"numFound\":3}}"; + } else if (solrField.equals("url_ss") && value.equals("URL")) { + return "{\"response\":{\"numFound\":3}}"; + } else if (solrField.equals("name_ss") && value.equals("two three")) { + return "{\"response\":{\"numFound\":1}}"; } else { System.err.printf("solrField: %s, value: %s\n", solrField, value); }