Skip to content

Commit

Permalink
Tweak rdf lookup (metafacture-core#415)
Browse files Browse the repository at this point in the history
- update README
- integrate lookup_rdf() into lookup()
- rename target_language to select_language (complements b49445d)
- remove comments in integrations test.fix for these are accounted to
  • Loading branch information
dr0i committed Dec 13, 2022
1 parent 9251ca3 commit df63ff0
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 55 deletions.
27 changes: 13 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,6 @@ The separator (`sep_char`) will vary depending on the source file, e.g.:
| CSV | `,` or `;` |
| TSV | `\t` |

<<<<<<< HEAD
Options:

- `allow_empty_values`: Sets whether to allow empty values in the filemap or to ignore these entries. (Default: `false`)
Expand All @@ -209,7 +208,7 @@ The targeted RDF property can optional be bound by an RDF language tag.

```perl
put_rdfmap("<rdfResource>", "<rdfMapName>", target: "<rdfProperty>")
put_rdfmap("<rdfResource>", "<rdfMapName>", target: "<rdfProperty>, target_language: "<rdfLanguageTag>"")
put_rdfmap("<rdfResource>", "<rdfMapName>", target: "<rdfProperty>, select: "<rdfLanguageTag>"")
```

##### `put_map`
Expand All @@ -232,7 +231,7 @@ The targeted RDF property can optionally be bound by an RDF language tag.

```perl
put_rdfmap("<rdfResource>", "<rdfMapName>", target: "<rdfProperty>")
put_rdfmap("<rdfResource>", "<rdfMapName>", target: "<rdfProperty>", target_language: "<rdfLanguageTag>")
put_rdfmap("<rdfResource>", "<rdfMapName>", target: "<rdfProperty>", select_language: "<rdfLanguageTag>")
```

##### `put_var`
Expand Down Expand Up @@ -576,7 +575,7 @@ join_field("<sourceField>", "<separator>")

##### `lookup`

Looks up matching values in a map and replaces the field value with this match. [External files](#put_filemap) as well as [internal maps](#put_map) can be used.
Looks up matching values in a map and replaces the field value with this match. [External files](#put_filemap), [internal maps](#put_map) as well as [rdf resources](#put_rdfmap) can be used.

Parameters:

Expand Down Expand Up @@ -627,19 +626,19 @@ lookup("path.to.field", "map-name", __default: "NA")

# with printing unknown values to a file
lookup("path.to.field", "map-name", print_unknown: "true", destination: "unknown.txt")
```

##### `lookup_rdf`

Looks up matching values in an RDF resource and replaces the field value with a match defined by a targeted RDF property. External files or HTTP(S) resources as well as internal RDF maps can be used.
# rdf: Looks up matching values in an RDF resource and replaces the field value with a match defined by a targeted RDF property. External files or HTTP(S) resources as well as internal RDF maps can be used.
The targeted RDF property can optionally be bound by an RDF language tag.

```perl
lookup_rdf("<sourceField>", "<rdfFile>", target: "<rdfProperty>")
lookup_rdf("<sourceField>", "<rdfHttpUri>", target: "<rdfProperty>")
lookup_rdf("<sourceField>", "<rdfMapName>", target: "<rdfProperty>")
lookup_rdf("<sourceField>", "<rdfMapName>", target: "<rdfProperty>", target_language: "<rdfLanguageTag>")
lookup_rdf("<sourceField>", "<rdfMapName>", target: "<rdfProperty>", __default: "NA")
## rdf map (explicit)
put_rdfmap("path/to/file", "rdf-map", "target:<rdfProperty>")
lookup("path.to.field", "rdf-map")

## rdf with mandatory "target" (implicit)
lookup("path.to.field", "path/to/file|URL", target: "<rdfProperty>")

## rdf with mandatory "target" and "select_language" (implicit)
lookup("path.to.field", "path/to/file|URL", target: "<rdfProperty>", select_language: "<rdfLanguageTag>")
```

##### `prepend`
Expand Down
32 changes: 19 additions & 13 deletions metafix/src/main/java/org/metafacture/metafix/FixMethod.java
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ public void apply(final Metafix metafix, final Record record, final List<String>
put_rdfmap {
@Override
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
final String rdfMapName = params.size() == 1 ? params.get(0) : params.get(1) + options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, "");
final String rdfMapName = params.size() == 1 ? params.get(0) + options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, "") : params.get(1);
final String replaceTargets = options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, "");
final String resourceName = Optional.ofNullable(params.get(0))
.map(str -> str.replaceAll(replaceTargets + "$", ""))
Expand Down Expand Up @@ -501,15 +501,10 @@ public void apply(final Metafix metafix, final Record record, final List<String>
map = options;
}
else {
final String mapName = params.get(1);
String mapName = params.get(1);

if (!metafix.getMapNames().contains(mapName)) {
if (mapName.contains(".") || mapName.contains(File.separator)) {
put_filemap.apply(metafix, record, Arrays.asList(mapName), options);
}
else {
// Probably an unknown internal map? Log a warning?
}
mapName = putMapAndGetMapName(metafix, record, params, options, mapName);
}

map = metafix.getMap(mapName);
Expand Down Expand Up @@ -541,11 +536,22 @@ public void apply(final Metafix metafix, final Record record, final List<String>
consumer.accept(null);
}
}
},
lookup_rdf {
@Override
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
lookup(metafix, record, params, options, put_rdfmap);

private String putMapAndGetMapName(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options, final String mapName) {
String newMapName = mapName;
if (options.containsKey(RdfMap.TARGET)) {
put_rdfmap.apply(metafix, record, Arrays.asList(params.get(1)), options);
newMapName = params.get(1) + options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, "");
}
else {
if (mapName.contains(".") || mapName.contains(File.separator)) {
put_filemap.apply(metafix, record, Arrays.asList(mapName), options);
}
else {
// Probably an unknown internal map? Log a warning?
}
}
return newMapName;
}
},
prepend {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
*/
@ExtendWith(MockitoExtension.class)
public class MetafixLookupTest {

private static final String CSV_MAP = "src/test/resources/org/metafacture/metafix/maps/test.csv";
private static final String RDF_MAP = "src/test/resources/org/metafacture/metafix/maps/test.ttl";

Expand Down Expand Up @@ -964,7 +963,7 @@ public void shouldPrintUnknownToFileWithoutAppend() throws IOException {
public void shouldLookupInSeparateExternalRdfFileMapWithName() {
assertRdfMap(
"put_rdfmap('" + RDF_MAP + "', 'testMapSkosNotation', target: 'skos:notation')",
"lookup_rdf('notation', 'testMapSkosNotation', target: 'skos:notation')"
"lookup('notation', 'testMapSkosNotation', target: 'skos:notation')"
);
}

Expand All @@ -973,21 +972,21 @@ public void shouldLookupInSeparateExternalRdfFileMapWithDifferentTargets() {
assertRdfMapWithDifferentTargets(
"put_rdfmap('" + RDF_MAP + "', 'testRdfMapSkosNotation', target: 'skos:notation')",
"put_rdfmap('" + RDF_MAP + "', 'testRdfMapCreated', target: 'created', __default: '__default')",
"lookup_rdf('notation', 'testRdfMapSkosNotation', target: 'skos:notation')",
"lookup_rdf('created', 'testRdfMapCreated', target: 'created')");
"lookup('notation', 'testRdfMapSkosNotation', target: 'skos:notation')",
"lookup('created', 'testRdfMapCreated', target: 'created')");
}

@Test
public void shouldLookupInExternalRdfWithDifferentTargets() {
assertRdfMapWithDifferentTargets(
"lookup_rdf('notation', '" + RDF_MAP + "', target: 'skos:notation')",
"lookup_rdf('created', '" + RDF_MAP + "', target: 'created', __default: '__default')");
"lookup('notation', '" + RDF_MAP + "', target: 'skos:notation')",
"lookup('created', '" + RDF_MAP + "', target: 'created', __default: '__default')");
}

@Test
public void shouldLookupInExternalRdfUseDefinedDefaultValueIfNotFound() {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
"lookup_rdf('created', '" + RDF_MAP + "', target: 'created', __default: '0000-01-01')"
"lookup('created', '" + RDF_MAP + "', target: 'created', __default: '0000-01-01')"
),
i -> {
i.startRecord("1");
Expand All @@ -1005,7 +1004,7 @@ public void shouldLookupInExternalRdfUseDefinedDefaultValueIfNotFound() {
@Test
public void shouldLookupInExternalRdfUseDefaultValueIfNotFound() {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
"lookup_rdf('created', '" + RDF_MAP + "', target: 'created', __default: '__default')"
"lookup('created', '" + RDF_MAP + "', target: 'created', __default: '__default')"
),
i -> {
i.startRecord("1");
Expand All @@ -1023,14 +1022,33 @@ public void shouldLookupInExternalRdfUseDefaultValueIfNotFound() {
@Test
public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicate() {
assertRdfMap(
"lookup_rdf('notation', '" + RDF_MAP + "', target: 'skos:notation')"
"lookup('notation', '" + RDF_MAP + "', target: 'skos:notation')"
);
}

@Test
public void shouldExplicitLookupRdfUrlWithRedirection() {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
"put_rdfmap('" + RDF_URL + "', 'testMapSkosNotation', target: 'skos:prefLabel')",
"lookup('prefLabel', 'testMapSkosNotation' , target: 'skos:prefLabel')"
),
i -> {
i.startRecord("1");
i.literal("prefLabel", "http://purl.org/lobid/rpb#n882022");
i.endRecord();
},
o -> {
o.get().startRecord("1");
o.get().literal("prefLabel", "Presserecht");
o.get().endRecord();
}
);
}

@Test
public void shouldLookupRdfUrlWithRedirection() {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
"lookup_rdf('prefLabel', '" + RDF_URL + "', target: 'skos:prefLabel')"
"lookup('prefLabel', '" + RDF_URL + "', target: 'skos:prefLabel')"
),
i -> {
i.startRecord("1");
Expand All @@ -1049,7 +1067,7 @@ public void shouldLookupRdfUrlWithRedirection() {
public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicateOfSpecificLanguage() {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
"set_array('prefLabel', 'https://w3id.org/kim/hochschulfaechersystematik/n4')",
"lookup_rdf('prefLabel.*', '" + RDF_MAP + "', target: 'skos:prefLabel', select_language: 'de')"
"lookup('prefLabel.*', '" + RDF_MAP + "', target: 'skos:prefLabel', select_language: 'de')"
),
i -> {
i.startRecord("1");
Expand All @@ -1067,7 +1085,7 @@ public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicateO
public void shouldLookupInExternalRdfMapGetSubjectWithTargetedPredicateOfSpecificLanguage() {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
"set_array('id', 'Mathematics, Natural Sciences')",
"lookup_rdf('id.*', '" + RDF_MAP + "', target: 'skos:prefLabel', select_language: 'en')"
"lookup('id.*', '" + RDF_MAP + "', target: 'skos:prefLabel', select_language: 'en')"
),
i -> {
i.startRecord("1");
Expand All @@ -1086,7 +1104,7 @@ public void shouldLookupInExternalRdfMapGetSubjectWithTargetedPredicateOfSpecifi
@Test // Scenario lookupRdfPropertyToProperty
public void shouldLookupInExternalRdfMapGetPropertyOfSpecificLanguageWithTargetedPredicate() {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
"lookup_rdf('a', '" + HCRT_RDF_MAP + "', target: 'skos:prefLabel', select_language: 'en')"
"lookup('a', '" + HCRT_RDF_MAP + "', target: 'skos:prefLabel', select_language: 'en')"
),
i -> {
i.startRecord("1");
Expand All @@ -1104,7 +1122,7 @@ public void shouldLookupInExternalRdfMapGetPropertyOfSpecificLanguageWithTargete
@Test // Scenario lookupRdfPropertyToSubject
public void shouldLookupInExternalRdfMapGetSubjectOfPropertyWithTargetedPredicate() {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
"lookup_rdf('a', '" + HCRT_RDF_MAP + "', target: 'skos:prefLabel')"
"lookup('a', '" + HCRT_RDF_MAP + "', target: 'skos:prefLabel')"
),
i -> {
i.startRecord("1");
Expand All @@ -1122,7 +1140,7 @@ public void shouldLookupInExternalRdfMapGetSubjectOfPropertyWithTargetedPredicat
@Test
public void lookupRdfDefinedPropertyToSubjectNonDefault() {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
"lookup_rdf('a', '" + HCRT_RDF_MAP + "', target: 'skos:prefLabel', select_language: 'de')"
"lookup('a', '" + HCRT_RDF_MAP + "', target: 'skos:prefLabel', select_language: 'de')"
),
i -> {
i.startRecord("1");
Expand Down Expand Up @@ -1160,7 +1178,7 @@ public void shouldLookupInExternalRdfMapGetObjectWithTargetedPredicateOfSpecific
@Test
public void shouldLookupRdfDefinedPropertyToSubject() {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
"lookup_rdf('a', '" + HCRT_RDF_MAP + "', target: 'skos:prefLabel', select_language: 'de', select: 'subject')"
"lookup('a', '" + HCRT_RDF_MAP + "', target: 'skos:prefLabel', select_language: 'de', select:: 'subject')"
),
i -> {
i.startRecord("1");
Expand Down Expand Up @@ -1204,7 +1222,7 @@ public void shouldLookupRdfDefinedPropertyToSubject() {
private void shouldLookupInExternalRdfMapGetObjectWithTargetedPredicateOfSpecificLanguage(final String target) {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
"set_array('prefLabel', 'Mathematics, Natural Sciences')",
"lookup_rdf('prefLabel.*', '" + RDF_MAP + "', target: '" + target + "', select_language: 'de')"
"lookup('prefLabel.*', '" + RDF_MAP + "', target: '" + target + "', select_language: 'de')"
),
i -> {
i.startRecord("1");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1 @@
lookup_rdf("a", "../../../../../maps/hcrt.ttl", target: "http://www.w3.org/2004/02/skos/core#prefLabel", select_language: "en")
# Cant define specific lookup-match fields, would expect something like this:
# lookup_rdf("a", "./hcrt.ttl", match="http://www.w3.org/2004/02/skos/core#prefLabel", match_language: "de", target: "http://www.w3.org/2004/02/skos/core#prefLabel", select_language: "en")

lookup("a", "../../../../../maps/hcrt.ttl", target: "http://www.w3.org/2004/02/skos/core#prefLabel", select_language: "en")
Original file line number Diff line number Diff line change
@@ -1 +1 @@
lookup_rdf("a", "../../../../../maps/hcrt.ttl", target: "skos:prefLabel")
lookup("a", "../../../../../maps/hcrt.ttl", target: "skos:prefLabel")
Original file line number Diff line number Diff line change
@@ -1,4 +1 @@
lookup_rdf('a', '../../../../../maps/hcrt.ttl', target: 'http://www.w3.org/2004/02/skos/core#prefLabel', select_language: 'de', select: 'subject')
# Cant define specific lookup-match fields, would expect something like this:
# lookup_rdf("a", "./hcrt.ttl", match="http://www.w3.org/2004/02/skos/core#prefLabel", match_language: "de")

lookup('a', '../../../../../maps/hcrt.ttl', target: 'http://www.w3.org/2004/02/skos/core#prefLabel', select_language: 'de', select: 'subject')
Original file line number Diff line number Diff line change
@@ -1 +1 @@
lookup_rdf("a", "../../../../../maps/hcrt.ttl", target: "http://www.w3.org/2004/02/skos/core#prefLabel", select_language: 'en')
lookup("a", "../../../../../maps/hcrt.ttl", target: "http://www.w3.org/2004/02/skos/core#prefLabel", select_language: 'en')
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# Lookup replaces with default value by default. Should be an option.
lookup_rdf("a", "../../../../../maps/hcrt.ttl", target: "http://www.w3.org/2004/02/skos/core#prefLabel", select_language: "de")
lookup("a", "../../../../../maps/hcrt.ttl", target: "http://www.w3.org/2004/02/skos/core#prefLabel", select_language: "de")

0 comments on commit df63ff0

Please sign in to comment.