Skip to content

Commit 8828182

Browse files
adammljyucsiromarqh
authored
Adding DatasetDownload to Schema.org export (#99)
* Addressing Schema.org of opengeospatial/netcdf-ld#34 * Add distribution ENUM * Resolving conflicts * Trying to import Enum * Added SchemaOrg class to __init__.py With distribution method * enabling running of nc2rdf with schemaOrg code * remove print stmt * test schemOrg class * Updating Schema.org output * Working on Schema.org output * Working on Schema.org output * Working on Schema.org output * Working on Schema.org output * Working on Schema.org output * Working on Schema.org output * Working on Schema.org output * Edited Schema.org test TTL * Changed Schema.org test TTL * Editing Schema.org test TTL * Working on Schema.org test * Updating Schema.org output for tests * update results Co-authored-by: Jonathan Yu <[email protected]> Co-authored-by: marqh <[email protected]>
1 parent dd88fa4 commit 8828182

File tree

8 files changed

+126
-24
lines changed

8 files changed

+126
-24
lines changed

lib/bald/__init__.py

+64-20
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import contextlib
33
import copy
44
from difflib import SequenceMatcher
5+
56
import json
67
import operator
78
import os
@@ -19,11 +20,10 @@
1920
import requests
2021
import six
2122

22-
from bald import datetime
23+
from bald import datetime, distribution
2324
import bald.validation as bv
2425

25-
__version__ = '0.3'
26-
26+
__version__ = '0.3.1'
2727

2828
def _graph_html():
2929
return('''<html>
@@ -216,7 +216,7 @@ def _network_js_close():
216216
return(''' joint.layout.DirectedGraph.layout(graph, { setLinkVertices: false,
217217
nodeSep: 150, rankSep: 100,
218218
marginX: 100, marginY: 100,
219-
rankDir: 'LR' });
219+
rankDir: 'LR' });
220220
221221
222222
for (var i = 0; i < instance_list.length; i++) {
@@ -542,17 +542,17 @@ def _dcat_location(self, graph, selfnode):
542542
graph.bind('dcat', 'http://www.w3.org/ns/dcat#')
543543
graph.bind('dct', 'http://purl.org/dc/terms/')
544544
# template = ('dcat:distribution [
545-
# a dcat:Distribution;
546-
# dcat:downloadURL <{}>;
547-
# dcat:mediaType [
548-
# a dct:MediaType;
549-
# dct:identifier "application/x-netcdf"
550-
# ];
551-
# dct:format [
552-
# a dct:MediaType;
553-
# dct:identifier <http://vocab.nerc.ac.uk/collection/M01/current/NC/>
554-
# ]
555-
# ].')
545+
# a dcat:Distribution;
546+
# dcat:downloadURL <{}>;
547+
# dcat:mediaType [
548+
# a dct:MediaType;
549+
# dct:identifier "application/x-netcdf"
550+
# ];
551+
# dct:format [
552+
# a dct:MediaType;
553+
# dct:identifier <http://vocab.nerc.ac.uk/collection/M01/current/NC/>
554+
# ]
555+
# ].')
556556
dcatnode = rdflib.BNode()
557557
dcfnode = rdflib.BNode()
558558
graph.add((selfnode, rdflib.URIRef('http://www.w3.org/ns/dcat#distribution'), dcatnode))
@@ -561,12 +561,12 @@ def _dcat_location(self, graph, selfnode):
561561
graph.add((dcatnode, rdflib.URIRef('http://www.w3.org/ns/dcat#downloadURL'), rdflib.URIRef(self.file_locator)))
562562
dcatmednode = rdflib.BNode()
563563
graph.add((dcatmednode, rdflib.namespace.RDF.type, rdflib.URIRef('http://www.w3.org/ns/dcat#MediaType')))
564-
graph.add((dcatmednode, rdflib.URIRef('http://purl.org/dc/terms/identifier'), rdflib.Literal('application/x-netcdf')))
564+
graph.add((dcatmednode, rdflib.URIRef('http://purl.org/dc/terms/identifier'), rdflib.Literal(distribution.BaldDistributionEnum.MIME_TYPE.value)))
565565
graph.add((dcatnode, rdflib.URIRef('http://www.w3.org/ns/dcat#mediaType'), dcatmednode))
566566

567567
graph.add((dcfnode, rdflib.namespace.RDF.type, rdflib.URIRef('http://purl.org/dc/terms/MediaType')))
568568
graph.add((dcfnode, rdflib.URIRef('http://purl.org/dc/terms/identifier'),
569-
rdflib.URIRef('http://vocab.nerc.ac.uk/collection/M01/current/NC/')))
569+
rdflib.URIRef(distribution.BaldDistributionEnum.LINKED_DATA_RESOURCE_DEFINING_NETCDF.value)))
570570
graph.add((selfnode, rdflib.URIRef('http://purl.org/dc/terms/format'), dcfnode))
571571

572572

@@ -1456,7 +1456,51 @@ def _hdf_references(fhandle, root_container, file_variables):
14561456
if isinstance(member, Container):
14571457
_hdf_references(fhandle, member, file_variables)
14581458

1459-
1460-
1461-
1459+
class schemaOrg:
1460+
__schemaGraph = rdflib.Graph()
1461+
__so = rdflib.Namespace("http://schema.org/")
1462+
__baldGraph = None
1463+
1464+
def __init__(self, graph, path=None, baseuri=None):
1465+
"""
1466+
Export a Schema.org graph for a BALD graph
1467+
1468+
Required inputs -
1469+
graph a BALD Graph URI
1470+
path
1471+
baseuri a URI string or None
1472+
1473+
Returns a rdflib graph of Schema,org content
1474+
"""
1475+
if baseuri is not None:
1476+
container = rdflib.URIRef(baseuri)
1477+
else:
1478+
container = rdflib.BNode()
1479+
self.__baldGraph = graph
1480+
self.__schemaGraph.add( (container, rdflib.URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), self.__so.Dataset) )
1481+
1482+
self.__distribution(container, path)
1483+
1484+
1485+
def __distribution(self, container, path):
1486+
"""
1487+
Export a Schema.org distribution
1488+
1489+
Required inputs -
1490+
container a bald Container URI
1491+
path a URI string or None
1492+
1493+
1494+
"""
1495+
1496+
distributionNode = rdflib.BNode()
1497+
self.__schemaGraph.add( (container, self.__so.distribution, distributionNode) )
1498+
self.__schemaGraph.add( (distributionNode, rdflib.RDF.type, self.__so.DataDownload) )
1499+
self.__schemaGraph.add( (distributionNode, self.__so.encodingFormat, rdflib.Literal(distribution.BaldDistributionEnum.MIME_TYPE.value)) )
1500+
self.__schemaGraph.add( (distributionNode, self.__so.encodingFormat, rdflib.URIRef(distribution.BaldDistributionEnum.LINKED_DATA_RESOURCE_DEFINING_NETCDF.value)) )
1501+
if path is not None:
1502+
self.__schemaGraph.add( (distributionNode, self.__so.contentUrl, rdflib.URIRef(path)) )
1503+
return None
14621504

1505+
def getSchemaOrgGraph(self):
1506+
return self.__schemaGraph

lib/bald/distribution.py

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from enum import Enum
2+
"""
3+
Adds an Enum for MIME_TYPE and LINKED_DATA_RESOURCE_DEFINING_NETCDF for use
4+
in Schema.org and DCAT outputs
5+
"""
6+
class BaldDistributionEnum(Enum):
7+
MIME_TYPE = "application/x-netcdf"
8+
LINKED_DATA_RESOURCE_DEFINING_NETCDF = "http://vocab.nerc.ac.uk/collection/M01/current/NC/"

lib/bald/tests/integration/HTML/array_reference.html

+1-1
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@
183183
joint.layout.DirectedGraph.layout(graph, { setLinkVertices: false,
184184
nodeSep: 150, rankSep: 100,
185185
marginX: 100, marginY: 100,
186-
rankDir: 'LR' });
186+
rankDir: 'LR' });
187187

188188

189189
for (var i = 0; i < instance_list.length; i++) {

lib/bald/tests/integration/HTML/multi_array_reference.html

+1-1
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@
197197
joint.layout.DirectedGraph.layout(graph, { setLinkVertices: false,
198198
nodeSep: 150, rankSep: 100,
199199
marginX: 100, marginY: 100,
200-
rankDir: 'LR' });
200+
rankDir: 'LR' });
201201

202202

203203
for (var i = 0; i < instance_list.length; i++) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
@prefix ns1: <http://schema.org/> .
2+
@prefix this: <https://www.unidata.ucar.edu/software/netcdf/examples/test_hgroups.cdl/> .
3+
4+
this: a ns1:Dataset ;
5+
ns1:distribution [ a ns1:DataDownload ;
6+
ns1:contentUrl <https://www.unidata.ucar.edu/software/netcdf/examples/test_hgroups.cdl> ;
7+
ns1:encodingFormat <http://vocab.nerc.ac.uk/collection/M01/current/NC/>, "application/x-netcdf" ] .
8+

lib/bald/tests/integration/test_cdl_rdfgraph.py

+33
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,39 @@ def test_hgroups(self):
246246
expected_rdfgraph.parse(sf, format='n3')
247247
self.check_result(rdfgraph, expected_rdfgraph)
248248

249+
def test_hgroups_schema_dot_org(self):
250+
with self.temp_filename('.nc') as tfile:
251+
name = 'hgroups_schema.org'
252+
hgurl = 'https://www.unidata.ucar.edu/software/netcdf/examples/test_hgroups.cdl'
253+
baseuri = hgurl+'/'
254+
res = requests.get(hgurl)
255+
if res.status_code != 200:
256+
raise ValueError('{} failed to download: {}'.format(hgurl, res.status_code))
257+
with self.temp_filename('.cdl.') as cdlfile:
258+
with open(cdlfile, 'w') as fh:
259+
fh.write(res.text)
260+
#cdl_file = os.path.join(self.cdl_path, '{}.cdl'.format(name))
261+
subprocess.check_call(['ncgen', '-o', tfile, cdlfile])
262+
cdl_file_uri = 'file://CDL/{}.cdl'.format(name)
263+
264+
alias_dict = {'NetCDF': 'http://def.scitools.org.uk/NetCDF',
265+
'CFTerms': 'http://def.scitools.org.uk/CFTerms',
266+
'cf_sname': 'http://vocab.nerc.ac.uk/standard_name/'
267+
}
268+
root_container = bald.load_netcdf(tfile, baseuri=baseuri,
269+
alias_dict=alias_dict, cache=self.acache, file_locator=hgurl)
270+
rdfgraph = root_container.rdfgraph()
271+
schema_org_inst = bald.schemaOrg(rdfgraph,hgurl,baseuri).getSchemaOrgGraph()
272+
#rdfgraph = schema_org_inst.distribution(baseuri, rdfgraph, hgurl)
273+
ttl = schema_org_inst.serialize(format='n3').decode("utf-8")
274+
if os.environ.get('bald_update_results') is not None:
275+
with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'w') as sf:
276+
sf.write(ttl)
277+
with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'r') as sf:
278+
expected_rdfgraph = rdflib.Graph()
279+
expected_rdfgraph.parse(sf, format='n3')
280+
self.check_result(schema_org_inst, expected_rdfgraph)
281+
249282
def test_group_array_geo(self):
250283
with self.temp_filename('.nc') as tfile:
251284
name = 'group_array_geo'

nc2rdf/nc2rdf.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ def baldgraph2schemaorg(graph, path=None, baseuri=None):
3535
# HACK: The following mappings ignore prefixes as well as prefixes in nc file
3636
# TODO: Fix references to prefixes/aliases proper
3737

38+
#encoding formats to use - one as Text, one as URL
39+
encodingFormats = ["application/x-netcdf",
40+
"http://vocab.nerc.ac.uk/collection/M01/current/NC/"]
41+
3842
#load mappings
3943
mapping_idx = {}
4044
mapping_data = []
@@ -83,6 +87,11 @@ def baldgraph2schemaorg(graph, path=None, baseuri=None):
8387
#print('schemaorg:' + mapping_idx[currField], "\t", row[1])
8488
lit = Literal(row[1])
8589
schema_g.add( (container, predUri, lit) )
90+
#
91+
# Add some distrbution details
92+
#
93+
schema_org_inst = bald.schemaOrg()
94+
schema_g = schema_org_inst.distribution(container, schema_g, baseuri)
8695
return schema_g
8796

8897
def nc2schemaorg(ncfilename, outformat, baseuri=None):
@@ -92,7 +101,7 @@ def nc2schemaorg(ncfilename, outformat, baseuri=None):
92101

93102
if(outformat == 'json-ld'):
94103
context = "http://schema.org/"
95-
s = schema_g.serialize(format=outformat, context=context, indent=4).decode("utf-8")
104+
s = schema_g.serialize(format=outformat, context=context, indent=4)
96105
else:
97106
s = schema_g.serialize(format=outformat).decode("utf-8")
98107
print(s)

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
numpy
22
h5py
3-
netCDF4=1.3.1
3+
netCDF4==1.3.1
44
requests
55
rdflib
66
jinja2

0 commit comments

Comments
 (0)