From df63ff003aa33aaacebb169a3d2d1f2b52939dea Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Tue, 13 Dec 2022 17:49:57 +0100 Subject: [PATCH] Tweak rdf lookup (metafacture-core#415) - update README - integrate lookup_rdf() into lookup() - rename target_language to select_language (complements b49445d82179101d90e8d7f17704dafd3c571f84) - remove comments in integrations test.fix for these are accounted to --- README.md | 27 +++++----- .../org/metafacture/metafix/FixMethod.java | 32 +++++++----- .../metafix/MetafixLookupTest.java | 52 +++++++++++++------ .../test.fix | 5 +- .../toJson/lookupRdfObjectToSubject/test.fix | 2 +- .../test.fix | 5 +- .../lookupRdfPropertyToProperty/test.fix | 2 +- .../test.fix | 2 +- 8 files changed, 72 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index 360ef8d4..740e54d1 100644 --- a/README.md +++ b/README.md @@ -190,7 +190,6 @@ The separator (`sep_char`) will vary depending on the source file, e.g.: | CSV | `,` or `;` | | TSV | `\t` | -<<<<<<< HEAD Options: - `allow_empty_values`: Sets whether to allow empty values in the filemap or to ignore these entries. (Default: `false`) @@ -209,7 +208,7 @@ The targeted RDF property can optional be bound by an RDF language tag. ```perl put_rdfmap("", "", target: "") -put_rdfmap("", "", target: ", target_language: """) +put_rdfmap("", "", target: ", select: """) ``` ##### `put_map` @@ -232,7 +231,7 @@ The targeted RDF property can optionally be bound by an RDF language tag. ```perl put_rdfmap("", "", target: "") -put_rdfmap("", "", target: "", target_language: "") +put_rdfmap("", "", target: "", select_language: "") ``` ##### `put_var` @@ -576,7 +575,7 @@ join_field("", "") ##### `lookup` -Looks up matching values in a map and replaces the field value with this match. [External files](#put_filemap) as well as [internal maps](#put_map) can be used. +Looks up matching values in a map and replaces the field value with this match. [External files](#put_filemap), [internal maps](#put_map) as well as [rdf resources](#put_rdfmap) can be used. Parameters: @@ -627,19 +626,19 @@ lookup("path.to.field", "map-name", __default: "NA") # with printing unknown values to a file lookup("path.to.field", "map-name", print_unknown: "true", destination: "unknown.txt") -``` - -##### `lookup_rdf` -Looks up matching values in an RDF resource and replaces the field value with a match defined by a targeted RDF property. External files or HTTP(S) resources as well as internal RDF maps can be used. +# rdf: Looks up matching values in an RDF resource and replaces the field value with a match defined by a targeted RDF property. External files or HTTP(S) resources as well as internal RDF maps can be used. The targeted RDF property can optionally be bound by an RDF language tag. -```perl -lookup_rdf("", "", target: "") -lookup_rdf("", "", target: "") -lookup_rdf("", "", target: "") -lookup_rdf("", "", target: "", target_language: "") -lookup_rdf("", "", target: "", __default: "NA") +## rdf map (explicit) +put_rdfmap("path/to/file", "rdf-map", "target:") +lookup("path.to.field", "rdf-map") + +## rdf with mandatory "target" (implicit) +lookup("path.to.field", "path/to/file|URL", target: "") + +## rdf with mandatory "target" and "select_language" (implicit) +lookup("path.to.field", "path/to/file|URL", target: "", select_language: "") ``` ##### `prepend` diff --git a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java index 87bb5680..d4576901 100644 --- a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java +++ b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java @@ -97,7 +97,7 @@ public void apply(final Metafix metafix, final Record record, final List put_rdfmap { @Override public void apply(final Metafix metafix, final Record record, final List params, final Map options) { - final String rdfMapName = params.size() == 1 ? params.get(0) : params.get(1) + options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, ""); + final String rdfMapName = params.size() == 1 ? params.get(0) + options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, "") : params.get(1); final String replaceTargets = options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, ""); final String resourceName = Optional.ofNullable(params.get(0)) .map(str -> str.replaceAll(replaceTargets + "$", "")) @@ -501,15 +501,10 @@ public void apply(final Metafix metafix, final Record record, final List map = options; } else { - final String mapName = params.get(1); + String mapName = params.get(1); if (!metafix.getMapNames().contains(mapName)) { - if (mapName.contains(".") || mapName.contains(File.separator)) { - put_filemap.apply(metafix, record, Arrays.asList(mapName), options); - } - else { - // Probably an unknown internal map? Log a warning? - } + mapName = putMapAndGetMapName(metafix, record, params, options, mapName); } map = metafix.getMap(mapName); @@ -541,11 +536,22 @@ public void apply(final Metafix metafix, final Record record, final List consumer.accept(null); } } - }, - lookup_rdf { - @Override - public void apply(final Metafix metafix, final Record record, final List params, final Map options) { - lookup(metafix, record, params, options, put_rdfmap); + + private String putMapAndGetMapName(final Metafix metafix, final Record record, final List params, final Map options, final String mapName) { + String newMapName = mapName; + if (options.containsKey(RdfMap.TARGET)) { + put_rdfmap.apply(metafix, record, Arrays.asList(params.get(1)), options); + newMapName = params.get(1) + options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, ""); + } + else { + if (mapName.contains(".") || mapName.contains(File.separator)) { + put_filemap.apply(metafix, record, Arrays.asList(mapName), options); + } + else { + // Probably an unknown internal map? Log a warning? + } + } + return newMapName; } }, prepend { diff --git a/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java b/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java index dfbd1be6..080ef06c 100644 --- a/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java +++ b/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java @@ -35,7 +35,6 @@ */ @ExtendWith(MockitoExtension.class) public class MetafixLookupTest { - private static final String CSV_MAP = "src/test/resources/org/metafacture/metafix/maps/test.csv"; private static final String RDF_MAP = "src/test/resources/org/metafacture/metafix/maps/test.ttl"; @@ -964,7 +963,7 @@ public void shouldPrintUnknownToFileWithoutAppend() throws IOException { public void shouldLookupInSeparateExternalRdfFileMapWithName() { assertRdfMap( "put_rdfmap('" + RDF_MAP + "', 'testMapSkosNotation', target: 'skos:notation')", - "lookup_rdf('notation', 'testMapSkosNotation', target: 'skos:notation')" + "lookup('notation', 'testMapSkosNotation', target: 'skos:notation')" ); } @@ -973,21 +972,21 @@ public void shouldLookupInSeparateExternalRdfFileMapWithDifferentTargets() { assertRdfMapWithDifferentTargets( "put_rdfmap('" + RDF_MAP + "', 'testRdfMapSkosNotation', target: 'skos:notation')", "put_rdfmap('" + RDF_MAP + "', 'testRdfMapCreated', target: 'created', __default: '__default')", - "lookup_rdf('notation', 'testRdfMapSkosNotation', target: 'skos:notation')", - "lookup_rdf('created', 'testRdfMapCreated', target: 'created')"); + "lookup('notation', 'testRdfMapSkosNotation', target: 'skos:notation')", + "lookup('created', 'testRdfMapCreated', target: 'created')"); } @Test public void shouldLookupInExternalRdfWithDifferentTargets() { assertRdfMapWithDifferentTargets( - "lookup_rdf('notation', '" + RDF_MAP + "', target: 'skos:notation')", - "lookup_rdf('created', '" + RDF_MAP + "', target: 'created', __default: '__default')"); + "lookup('notation', '" + RDF_MAP + "', target: 'skos:notation')", + "lookup('created', '" + RDF_MAP + "', target: 'created', __default: '__default')"); } @Test public void shouldLookupInExternalRdfUseDefinedDefaultValueIfNotFound() { MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( - "lookup_rdf('created', '" + RDF_MAP + "', target: 'created', __default: '0000-01-01')" + "lookup('created', '" + RDF_MAP + "', target: 'created', __default: '0000-01-01')" ), i -> { i.startRecord("1"); @@ -1005,7 +1004,7 @@ public void shouldLookupInExternalRdfUseDefinedDefaultValueIfNotFound() { @Test public void shouldLookupInExternalRdfUseDefaultValueIfNotFound() { MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( - "lookup_rdf('created', '" + RDF_MAP + "', target: 'created', __default: '__default')" + "lookup('created', '" + RDF_MAP + "', target: 'created', __default: '__default')" ), i -> { i.startRecord("1"); @@ -1023,14 +1022,33 @@ public void shouldLookupInExternalRdfUseDefaultValueIfNotFound() { @Test public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicate() { assertRdfMap( - "lookup_rdf('notation', '" + RDF_MAP + "', target: 'skos:notation')" + "lookup('notation', '" + RDF_MAP + "', target: 'skos:notation')" + ); + } + + @Test + public void shouldExplicitLookupRdfUrlWithRedirection() { + MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( + "put_rdfmap('" + RDF_URL + "', 'testMapSkosNotation', target: 'skos:prefLabel')", + "lookup('prefLabel', 'testMapSkosNotation' , target: 'skos:prefLabel')" + ), + i -> { + i.startRecord("1"); + i.literal("prefLabel", "http://purl.org/lobid/rpb#n882022"); + i.endRecord(); + }, + o -> { + o.get().startRecord("1"); + o.get().literal("prefLabel", "Presserecht"); + o.get().endRecord(); + } ); } @Test public void shouldLookupRdfUrlWithRedirection() { MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( - "lookup_rdf('prefLabel', '" + RDF_URL + "', target: 'skos:prefLabel')" + "lookup('prefLabel', '" + RDF_URL + "', target: 'skos:prefLabel')" ), i -> { i.startRecord("1"); @@ -1049,7 +1067,7 @@ public void shouldLookupRdfUrlWithRedirection() { public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicateOfSpecificLanguage() { MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( "set_array('prefLabel', 'https://w3id.org/kim/hochschulfaechersystematik/n4')", - "lookup_rdf('prefLabel.*', '" + RDF_MAP + "', target: 'skos:prefLabel', select_language: 'de')" + "lookup('prefLabel.*', '" + RDF_MAP + "', target: 'skos:prefLabel', select_language: 'de')" ), i -> { i.startRecord("1"); @@ -1067,7 +1085,7 @@ public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicateO public void shouldLookupInExternalRdfMapGetSubjectWithTargetedPredicateOfSpecificLanguage() { MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( "set_array('id', 'Mathematics, Natural Sciences')", - "lookup_rdf('id.*', '" + RDF_MAP + "', target: 'skos:prefLabel', select_language: 'en')" + "lookup('id.*', '" + RDF_MAP + "', target: 'skos:prefLabel', select_language: 'en')" ), i -> { i.startRecord("1"); @@ -1086,7 +1104,7 @@ public void shouldLookupInExternalRdfMapGetSubjectWithTargetedPredicateOfSpecifi @Test // Scenario lookupRdfPropertyToProperty public void shouldLookupInExternalRdfMapGetPropertyOfSpecificLanguageWithTargetedPredicate() { MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( - "lookup_rdf('a', '" + HCRT_RDF_MAP + "', target: 'skos:prefLabel', select_language: 'en')" + "lookup('a', '" + HCRT_RDF_MAP + "', target: 'skos:prefLabel', select_language: 'en')" ), i -> { i.startRecord("1"); @@ -1104,7 +1122,7 @@ public void shouldLookupInExternalRdfMapGetPropertyOfSpecificLanguageWithTargete @Test // Scenario lookupRdfPropertyToSubject public void shouldLookupInExternalRdfMapGetSubjectOfPropertyWithTargetedPredicate() { MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( - "lookup_rdf('a', '" + HCRT_RDF_MAP + "', target: 'skos:prefLabel')" + "lookup('a', '" + HCRT_RDF_MAP + "', target: 'skos:prefLabel')" ), i -> { i.startRecord("1"); @@ -1122,7 +1140,7 @@ public void shouldLookupInExternalRdfMapGetSubjectOfPropertyWithTargetedPredicat @Test public void lookupRdfDefinedPropertyToSubjectNonDefault() { MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( - "lookup_rdf('a', '" + HCRT_RDF_MAP + "', target: 'skos:prefLabel', select_language: 'de')" + "lookup('a', '" + HCRT_RDF_MAP + "', target: 'skos:prefLabel', select_language: 'de')" ), i -> { i.startRecord("1"); @@ -1160,7 +1178,7 @@ public void shouldLookupInExternalRdfMapGetObjectWithTargetedPredicateOfSpecific @Test public void shouldLookupRdfDefinedPropertyToSubject() { MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( - "lookup_rdf('a', '" + HCRT_RDF_MAP + "', target: 'skos:prefLabel', select_language: 'de', select: 'subject')" + "lookup('a', '" + HCRT_RDF_MAP + "', target: 'skos:prefLabel', select_language: 'de', select:: 'subject')" ), i -> { i.startRecord("1"); @@ -1204,7 +1222,7 @@ public void shouldLookupRdfDefinedPropertyToSubject() { private void shouldLookupInExternalRdfMapGetObjectWithTargetedPredicateOfSpecificLanguage(final String target) { MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( "set_array('prefLabel', 'Mathematics, Natural Sciences')", - "lookup_rdf('prefLabel.*', '" + RDF_MAP + "', target: '" + target + "', select_language: 'de')" + "lookup('prefLabel.*', '" + RDF_MAP + "', target: '" + target + "', select_language: 'de')" ), i -> { i.startRecord("1"); diff --git a/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfObjectToLanguageVariantOfTheSameObject/test.fix b/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfObjectToLanguageVariantOfTheSameObject/test.fix index ad18850e..97b430b1 100644 --- a/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfObjectToLanguageVariantOfTheSameObject/test.fix +++ b/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfObjectToLanguageVariantOfTheSameObject/test.fix @@ -1,4 +1 @@ -lookup_rdf("a", "../../../../../maps/hcrt.ttl", target: "http://www.w3.org/2004/02/skos/core#prefLabel", select_language: "en") -# Cant define specific lookup-match fields, would expect something like this: -# lookup_rdf("a", "./hcrt.ttl", match="http://www.w3.org/2004/02/skos/core#prefLabel", match_language: "de", target: "http://www.w3.org/2004/02/skos/core#prefLabel", select_language: "en") - +lookup("a", "../../../../../maps/hcrt.ttl", target: "http://www.w3.org/2004/02/skos/core#prefLabel", select_language: "en") diff --git a/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfObjectToSubject/test.fix b/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfObjectToSubject/test.fix index 6f6a5498..d12cd18d 100644 --- a/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfObjectToSubject/test.fix +++ b/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfObjectToSubject/test.fix @@ -1 +1 @@ -lookup_rdf("a", "../../../../../maps/hcrt.ttl", target: "skos:prefLabel") +lookup("a", "../../../../../maps/hcrt.ttl", target: "skos:prefLabel") diff --git a/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfObjectWithSpecificLanguageToSubject/test.fix b/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfObjectWithSpecificLanguageToSubject/test.fix index 4890cd65..bc695389 100644 --- a/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfObjectWithSpecificLanguageToSubject/test.fix +++ b/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfObjectWithSpecificLanguageToSubject/test.fix @@ -1,4 +1 @@ -lookup_rdf('a', '../../../../../maps/hcrt.ttl', target: 'http://www.w3.org/2004/02/skos/core#prefLabel', select_language: 'de', select: 'subject') -# Cant define specific lookup-match fields, would expect something like this: -# lookup_rdf("a", "./hcrt.ttl", match="http://www.w3.org/2004/02/skos/core#prefLabel", match_language: "de") - +lookup('a', '../../../../../maps/hcrt.ttl', target: 'http://www.w3.org/2004/02/skos/core#prefLabel', select_language: 'de', select: 'subject') diff --git a/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfPropertyToProperty/test.fix b/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfPropertyToProperty/test.fix index dd18b144..c6defbcd 100644 --- a/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfPropertyToProperty/test.fix +++ b/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfPropertyToProperty/test.fix @@ -1 +1 @@ -lookup_rdf("a", "../../../../../maps/hcrt.ttl", target: "http://www.w3.org/2004/02/skos/core#prefLabel", select_language: 'en') +lookup("a", "../../../../../maps/hcrt.ttl", target: "http://www.w3.org/2004/02/skos/core#prefLabel", select_language: 'en') diff --git a/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfSubjectToObjectWithSpecificLanguage/test.fix b/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfSubjectToObjectWithSpecificLanguage/test.fix index ee40f8c0..11aaa3b6 100644 --- a/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfSubjectToObjectWithSpecificLanguage/test.fix +++ b/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfSubjectToObjectWithSpecificLanguage/test.fix @@ -1,2 +1,2 @@ # Lookup replaces with default value by default. Should be an option. -lookup_rdf("a", "../../../../../maps/hcrt.ttl", target: "http://www.w3.org/2004/02/skos/core#prefLabel", select_language: "de") +lookup("a", "../../../../../maps/hcrt.ttl", target: "http://www.w3.org/2004/02/skos/core#prefLabel", select_language: "de")