Skip to content
This repository was archived by the owner on Jan 25, 2018. It is now read-only.

[WIP - No Merge] Use json-ld dcat schema #99

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions v2/gbl1_to_jsonld.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
##
# Convert a Geoblacklight 1.0 schema record into a 2.0 JSON-LD record
# @param [Hash]
# @return [Hash]
def transform_record(record)
## Note: this is a LOSSY transform from GBL 1.0 -> 2.0 DCAT
## TODO: array / multivalue checking
## TODO: simple conversions of datetime to ISO 8601 (no '/' for separation of date components)

dcat_record = {}
dcat_record[:@context] = "https://raw.githubusercontent.com/geoblacklight/geoblacklight-schema/json-ld-schema/v2/schema/context.jsonld"
dcat_record[:@id] = record.fetch('dc_identifier_s', nil)
dcat_record[:@type] = "dcat:Dataset"
dcat_record[:accessLevel] = record.fetch('dc_rights_s', nil)
dcat_record[:creator] = record.fetch('dc_creator_sm', nil)
dcat_record[:description] = record.fetch('dc_description_s', nil)
dcat_record[:distribution] = []
dcat_record[:geom] = record.fetch('solr_geom', nil)
dcat_record[:geomType] = record.fetch('layer_geom_type_s', nil)
dcat_record[:isPartOf] = record.fetch('dct_isPartOf_sm', nil)
dcat_record[:issued] = record.fetch('dct_issued_s', nil)
dcat_record[:landingPage] = nil
dcat_record[:language] = record.fetch('dc_language_s', nil)
dcat_record[:license] = nil
dcat_record[:modified] = record.fetch('layer_modified_dt', nil)
dcat_record[:provenance] = record.fetch('dct_provenance_s', nil)
dcat_record[:publisher] = record.fetch('dc_publisher_s', nil)
dcat_record[:resourceType] = record.fetch('dc_type_s', nil)
dcat_record[:rights] = nil
dcat_record[:slug] = record.fetch('layer_slug_s', nil)
dcat_record[:source] = record.fetch('dc_source_sm', nil)
dcat_record[:spatial] = record.fetch('dct_spatial_sm', nil)
dcat_record[:subject] = record.fetch('dc_subject_sm', nil)
dcat_record[:temporal] = record.fetch('dct_temporal_sm', nil)
dcat_record[:title] = record.fetch('dc_title_s', nil)

parsed_references = JSON.parse(record['dct_references_s'])

parsed_references.each do |k,v|
case k
when "http://schema.org/downloadUrl" ## Direct-download URL
dcat_record[:distribution] << generate_distribution_download(record['dc_format_s'], record['dc_format_s'], "application/octet-stream", v)
when "http://schema.org/url" ## URL
dcat_record[:landingPage] = v
when "http://www.opengis.net/cat/csw/csdgm" ## FGDC metadata
dcat_record[:distribution] << generate_distribution_download("FGDC Metadata", "FGDC", "application/xml", v, k)
when "http://www.w3.org/1999/xhtml" ## HTML metadata
dcat_record[:distribution] << generate_distribution_download("HTML Metadata", "HTML", "application/html", v, k)
when "http://iiif.io/api/image" ## IIIF image
dcat_record[:distribution] << generate_distribution_service(v, k, nil, ["JPG"])
when "http://iiif.io/api/presentation#manifest" ## IIIF manifest
dcat_record[:distribution] << generate_distribution_service(v, k)
when "http://www.isotc211.org/schemas/2005/gmd/" ## ISO 19139 metadata
dcat_record[:distribution] << generate_distribution_download("ISO 19139 Metadata", "ISO19139", "application/xml", v, k)
when "http://www.loc.gov/mods/v3" ## MODS metadata
dcat_record[:distribution] << generate_distribution_download("MODS Metadata", "MODS", "application/mods+xml", v, k)
when "http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf" ## Shapefile (download?)
dcat_record[:distribution] << generate_distribution_download("Shapefile", "Shapefile", "application/octet-stream", v, k)
when "http://www.opengis.net/def/serviceType/ogc/wcs" ## WCS web service
dcat_record[:distribution] << generate_distribution_service(v, k, record['layer_id_s'], [])
when "http://www.opengis.net/def/serviceType/ogc/wfs" ## WFS web service
dcat_record[:distribution] << generate_distribution_service(v, k, record['layer_id_s'], downloadable_formats(record['dc_format_s'],k))
when "http://www.opengis.net/def/serviceType/ogc/wms" ## WMS web service
dcat_record[:distribution] << generate_distribution_service(v, k, record['layer_id_s'], downloadable_formats(record['dc_format_s'],k))
when "http://schema.org/DownloadAction" ## Harvard downloader
dcat_record[:distribution] << generate_distribution_service(v, k, record['layer_id_s'])
## when "http://schema.org/UserDownloads" ## (Is this being used?)
when "urn:x-esri:serviceType:ArcGIS#FeatureLayer" ## ESRI feature layer
dcat_record[:distribution] << generate_distribution_service(v, k)
when "urn:x-esri:serviceType:ArcGIS#TiledMapLayer"
dcat_record[:distribution] << generate_distribution_service(v, k)
when "urn:x-esri:serviceType:ArcGIS#DynamicMapLayer"
dcat_record[:distribution] << generate_distribution_service(v, k)
when "urn:x-esri:serviceType:ArcGIS#ImageMapLayer"
dcat_record[:distribution] << generate_distribution_service(v, k)
when "http://lccn.loc.gov/sh85035852" ## Data dictionary / codebook
dcat_record[:describedBy] = v
else
puts "** Unknown key: #{k} **"
end
end

raise 'Missing dc_identifier_s' unless dcat_record[:@id]
raise 'Missing dc_rights_s' unless dcat_record[:accessLevel]
raise 'Missing solr_geom' unless dcat_record[:geom]
raise 'Missing dc_provenance_s' unless dcat_record[:provenance]
raise 'Missing layer_slug_s' unless dcat_record[:slug]
raise 'Missing dc_title_s' unless dcat_record[:title]

dcat_record.delete_if { |k, v| v.nil? }
end

##
# Create default download types given a web service URI (contingent on format field)
# @param [String, String]
# @return [Array<String>]
def downloadable_formats(dc_format_s, webservice_uri)
case dc_format_s
when "Shapefile"
formats = []
if webservice_uri == "http://www.opengis.net/def/serviceType/ogc/wms"
["KMZ"]
elsif webservice_uri == "http://www.opengis.net/def/serviceType/ogc/wfs"
["Shapefile", "GeoJSON"]
end
when "GeoTIFF", "ArcGRID"
if webservice_uri == "http://www.opengis.net/def/serviceType/ogc/wms"
["GeoTIFF"]
else
[]
end
else
[]
end
end

##
# Create dcat:distribution object for a download
# @param [String, String,String,String,String]
# @return [Hash]
def generate_distribution_download(title,format,mediaType,value,conformsTo = nil)
{
:@type => 'dcat:Distribution',
title: title,
format: format,
downloadURL: value,
mediaType: mediaType,
conformsTo: conformsTo
}.delete_if { |k,v| v.nil? }
end

##
# Create dcat:distribution object for a web service
# @param [String, String,String,String,String]
# @return [Hash]
def generate_distribution_service(value,conformsTo,layerId = nil,downloadableAs = nil)
{
:@type => 'dcat:Distribution',
accessURL: value,
conformsTo: conformsTo,
layerId: layerId,
downloadableAs: downloadableAs,
}.delete_if { |k,v| v.nil? }
end
115 changes: 115 additions & 0 deletions v2/schema/context.jsonld
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
{
"@context": {
"@vocab": "http://www.w3.org/ns/dcat#",
"cql": "http://www.opengis.net/def/serviceType/ogc/csw/2.0.2/cql#envelope#",
"dc": "http://purl.org/dc/terms/",
"dcat": "http://www.w3.org/ns/dcat#",
"dctypes": "http://purl.org/dc/dcmitype/",
"edm": "http://www.europeana.eu/schemas/edm/",
"gbl": "http://geoblacklight.org/schema/2.0#",
"pod": "https://project-open-data.cio.gov/v1.1/schema#",
"xsd": "http://www.w3.org/2001/XMLSchema#",
"accrualPeriodicity": {
"@type": "@id",
"@id": "dc:accrualPeriodicity"
},
"accessLevel": {
"@id": "pod:accessLevel"
},
"accessURL": {
"@type": "@id",
"@id": "dcat:accessURL"
},
"conformsTo": {
"@type": "@id",
"@id": "dc:conformsTo"
},
"creator": {
"@id": "dc:creator"
},
"describedBy": {
"@id": "http://www.w3.org/2007/05/powder#describedby",
"@type": "@id"
},
"describedByType": {
"@type": "@id",
"@id": "pod:describedByType"
},
"description": {
"@id": "dc:description"
},
"downloadableAs": {
"@id": "gbl:downloadableAs"
},
"downloadURL": {
"@type": "@id",
"@id": "dcat:downloadURL"
},
"format": {
"@type": "gbl:format",
"@id": "dctypes:format"
},
"geom": {
"@id": "cql:envelope"
},
"geomType": {
"@id": "gbl:geomType"
},
"isPartOf": {
"@type": "@id",
"@id": "dc:isPartOf"
},
"issued": {
"@id": "pod:issued"
},
"language": {
"@id": "dc:language"
},
"layerId": {
"@id": "dc:identifier"
},
"license": {
"@id": "edm:rights"
},
"mediaType": {
"@id": "dcat:mediaType"
},
"modified": {
"@id": "pod:modified"
},
"publisher": {
"@id": "dc:publisher"
},
"provenance": {
"@id": "dc:provenance"
},
"resourceType": {
"@type": "http://purl.org/dc/dcmitype/",
"@id": "dc:type"
},
"rights": {
"@id": "dc:rights"
},
"slug": {
"@id": "gbl:slug"
},
"spatial": {
"@id": "dc:spatial"
},
"subject": {
"@id": "dc:subject"
},
"temporal": {
"@id": "dc:temporal"
},
"theme": {
"@id": "dcat:theme"
},
"title": {
"@id": "dc:title"
},
"year": {
"@id": "xsd:gYear"
}
}
}
56 changes: 56 additions & 0 deletions v2/schema/example.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{
"accessLevel": "public",
"creator": [
"GIS Lab, Newman Library, Baruch CUNY"
],
"description": "The subway complexes layer was created to represent ridership data for the NYC subway system (Metropolitan Transportation Authority, or MTA).",
"distribution": [
"{\"@type\":\"dcat:Distribution\",\"conformsTo\":\"http://www.opengis.net/def/serviceType/ogc/wms\",\"accessURL\":\"https://maps-public.geo.nyu.edu/geoserver/sdr/wms\",\"layerId\":\"sdr:nyu_2451_34502\",\"downloadableAs\":[\"KMZ\"]}",
"{\"@type\":\"dcat:Distribution\",\"conformsTo\":\"http://www.opengis.net/def/serviceType/ogc/wfs\",\"accessURL\":\"https://maps-public.geo.nyu.edu/geoserver/sdr/wfs\",\"layerId\":\"sdr:nyu_2451_34502\",\"downloadableAs\":[\"Shapefile\",\"GeoJSON\"]}",
"{\"@type\":\"dcat:Distribution\",\"downloadURL\":\"https://archive.nyu.edu/retrieve/74701/nyu_2451_34502.zip\",\"format\":\"Shapefile\",\"mediaType\":\"application/zip\",\"title\":\"Original Shapefile\"}",
"{\"@type\":\"dcat:Distribution\",\"conformsTo\":\"http://www.isotc211.org/schemas/2005/gmd/\",\"downloadURL\":\"http://metadata.geo.nyu.edu/records/edu.nyu/handle/2451/34502/iso19139.xml\",\"format\":\"ISO19139\",\"mediaType\":\"application/xml\",\"title\":\"ISO19139 Metadata\"}"
],
"geom": "ENVELOPE(-74.030876, -73.755405, 40.9031249999998, 40.5761269999998)",
"geomType": "Point",
"isPartOf": "NYC Geodatabase (version jan2016)",
"issued": "2016-1-15",
"identifier": "http://hdl.handle.net/2451/34502",
"language": "en",
"landingPage": "http://hdl.handle.net/2451/34502",
"license": "https://creativecommons.org/licenses/by/4.0/",
"modified": "2016-5-2T18:21:5Z",
"provenance": "Baruch CUNY",
"publisher": [
"Newman Library (Bernard M. Baruch College)"
],
"resourceType": "Dataset",
"rights": "The database and associated documentation are licensed under a Creative Commons Attribution-NonCommercial- ShareAlike license CC BY-NC-SA http://creativecommons.org/licenses/by-nc-sa/4.0/. You are free to share and to adapt the work as long as you cite the source, do not use it for commercial purposes, and release adaptations under the same license.",
"slug": "nyu_2451_34502",
"source": [
"nyu_2451_34635",
"nyu_2451_34636"
],
"spatial": [
"New York, New York, United States",
"Bronx County, New York, United States",
"Kings County, New York, United States",
"New York County, New York, United States",
"Queens County, New York, United States",
"Borough of Bronx, New York, United States",
"Borough of Brooklyn, New York, United States",
"Borough of Manhattan, New York, United States",
"Borough of Queens, New York, United States"
],
"subject": [
"Subway stations",
"Transportation",
"Urban transportation",
"Local transit",
"Commuting"
],
"temporal": [
"2015"
],
"title": "2015 New York City Subway Complexes and Ridership",
"year": 2015
}
Loading