Skip to content
This repository was archived by the owner on Dec 2, 2021. It is now read-only.

Commit 30b40ce

Browse files
committed
Add webpages and parent/child relationships
Fixes #12
1 parent dfde2ca commit 30b40ce

File tree

5 files changed

+68
-10
lines changed

5 files changed

+68
-10
lines changed

lib/rialto/etl/configs/stanford_organizations.rb

+21-5
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,24 @@
1212
provide 'reader_class_name', 'Rialto::Etl::Readers::StanfordOrganizationsJsonReader'
1313
end
1414

15-
to_field '@id', extract_json('$.alias'), transform: transform(prepend: 'http://authorities.stanford.edu/orgs#'), single: true
16-
to_field '@type', extract_json('$.type', translation_map: 'stanford_organizations_to_vivo_types'), single: true
17-
to_field 'http://www.w3.org/2000/01/rdf-schema#label', extract_json('$.name'), single: true
18-
to_field 'http://www.w3.org/2000/01/rdf-schema#seeAlso', extract_json('$.url'), single: true
19-
to_field 'http://vivoweb.org/ontology/core#abbreviation', extract_json('$.orgCodes'), single: true
15+
to_field '@webpage', extract_json('$.url'), single: true
16+
to_field '@id',
17+
extract_json('$.alias'),
18+
transform: transform(prepend: 'http://rialto.stanford.edu/organizations/'),
19+
single: true
20+
to_field '@type',
21+
extract_json('$.type', translation_map: 'stanford_organizations_to_vivo_types'),
22+
single: true
23+
to_field '@parent',
24+
extract_json('$.parent'),
25+
transform: transform(prepend: 'http://rialto.stanford.edu/organizations/'),
26+
single: true
27+
to_field 'http://www.w3.org/2000/01/rdf-schema#label',
28+
extract_json('$.name'),
29+
single: true
30+
to_field 'http://vivoweb.org/ontology/core#abbreviation',
31+
extract_json('$.orgCodes'),
32+
single: true
33+
to_field 'http://dbpedia.org/ontology/alias',
34+
extract_json('$.alias'),
35+
single: true

lib/rialto/etl/readers/stanford_organizations_json_reader.rb

+4-3
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,18 @@ class StanfordOrganizationsJsonReader < TrajectPlus::JsonReader
1313
# @param block [#call] a block that is executed on each organization
1414
# @return [String] JSON representation of an organization
1515
def each(&block)
16-
yield_children(json, block)
16+
yield_children(hash: json, block: block)
1717
end
1818

1919
private
2020

21-
def yield_children(hash, block)
21+
def yield_children(hash:, block:, parent: nil)
22+
hash['parent'] = parent if parent
2223
block.call(hash)
2324
children = children_path(hash)
2425
return if children.blank?
2526
children.each do |child|
26-
yield_children(child, block)
27+
yield_children(hash: child, block: block, parent: hash['alias'])
2728
end
2829
end
2930

lib/rialto/etl/writers/ntriples_writer.rb

+25
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,17 @@
22

33
require 'rdf'
44
require 'traject'
5+
require 'uuid'
56

67
module Rialto
78
module Etl
89
# Holds writers for use in Traject mappings
910
module Writers
1011
# Write NTriples records
1112
class NtriplesWriter < Traject::LineWriter
13+
# rubocop:disable Metrics/AbcSize
14+
# rubocop:disable Metrics/MethodLength
15+
# rubocop:disable Metrics/LineLength
1216
# Overrides the serialization routine from superclass
1317
#
1418
# @param context [Traject::Indexer::Context] a Traject context
@@ -19,13 +23,34 @@ def serialize(context)
1923
subject = RDF::URI.new(hash.delete('@id'))
2024
type = RDF::URI.new(hash.delete('@type'))
2125
graph = RDF::Graph.new << [subject, RDF.type, type]
26+
if hash.key?('@parent')
27+
parent = RDF::URI.new(hash.delete('@parent'))
28+
graph << [subject, RDF::URI.new('http://purl.obolibrary.org/obo/BFO_0000050'), RDF::URI.new(parent)]
29+
end
30+
if hash.key?('@webpage')
31+
webpage = RDF::Literal.new(hash.delete('@webpage'), datatype: RDF::XSD.anyURI)
32+
vcard_kind = RDF::URI.new("http://rialto.stanford.edu/cards/#{UUID.generate}")
33+
vcard_url = RDF::URI.new("http://rialto.stanford.edu/cards/#{UUID.generate}")
34+
graph << [subject, RDF::URI.new('http://purl.obolibrary.org/obo/ARG_2000028'), vcard_kind]
35+
graph << [vcard_kind, RDF.type, RDF::URI.new('http://www.w3.org/2006/vcard/ns#Kind')]
36+
graph << [vcard_kind, RDF.type, RDF::URI.new('http://www.w3.org/2006/vcard/ns#Individual')]
37+
graph << [vcard_kind, RDF::URI.new('http://purl.obolibrary.org/obo/ARG_2000029'), subject]
38+
graph << [vcard_kind, RDF::URI.new('http://www.w3.org/2006/vcard/ns#hasURL'), vcard_url]
39+
graph << [vcard_url, RDF.type, RDF::URI.new('http://www.w3.org/2006/vcard/ns#URL')]
40+
graph << [vcard_url, RDF::RDFS.label, 'Website']
41+
graph << [vcard_url, RDF::URI.new('http://vivoweb.org/ontology/core#rank'), RDF::Literal.new('1', datatype: RDF::XSD.int)]
42+
graph << [vcard_url, RDF::URI.new('http://www.w3.org/2006/vcard/ns#url'), webpage]
43+
end
2244
hash.each_pair do |field, values|
2345
Array(values).each do |value|
2446
graph << [subject, RDF::URI.new(field), value]
2547
end
2648
end
2749
graph.dump(:ntriples)
2850
end
51+
# rubocop:enable Metrics/AbcSize
52+
# rubocop:enable Metrics/MethodLength
53+
# rubocop:enable Metrics/LineLength
2954
end
3055
end
3156
end

rialto-etl.gemspec

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Gem::Specification.new do |spec|
2222
spec.add_dependency 'httpclient'
2323
spec.add_dependency 'rdf'
2424
spec.add_dependency 'traject_plus', '>= 0.0.2'
25+
spec.add_dependency 'uuid'
2526

2627
spec.add_development_dependency 'bundler', '~> 1.11'
2728
spec.add_development_dependency 'rake', '~> 10.0'

spec/writers/ntriples_writer_spec.rb

+17-2
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,33 @@
2424
'@id' => 'http://id.example.org/1234',
2525
'@type' => 'http://types.example.org/bar',
2626
'http://example.org/baz' => 'quux',
27-
'http://example.net/ns#quuux' => 'quuuux'
27+
'http://example.net/ns#quuux' => 'quuuux',
28+
'@parent' => 'http://id.example.org/5678',
29+
'@webpage' => 'http://sites.example.org/awesome.html'
2830
}
2931
end
32+
# rubocop:disable Metrics/LineLength
3033
let(:output_lines) do
3134
[
3235
'<http://id.example.org/1234> <http://example.net/ns#quuux> "quuuux" .',
3336
'<http://id.example.org/1234> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://types.example.org/bar> .',
34-
'<http://id.example.org/1234> <http://example.org/baz> "quux" .'
37+
'<http://id.example.org/1234> <http://example.org/baz> "quux" .',
38+
'<http://id.example.org/1234> <http://purl.obolibrary.org/obo/BFO_0000050> <http://id.example.org/5678> .',
39+
'<http://id.example.org/1234> <http://purl.obolibrary.org/obo/ARG_2000028> <http://rialto.stanford.edu/cards/5a69f5d0-e201-0135-1f1a-54ee756b784d> .',
40+
'<http://rialto.stanford.edu/cards/5a69f5d0-e201-0135-1f1a-54ee756b784d> <http://www.w3.org/2006/vcard/ns#hasURL> <http://rialto.stanford.edu/cards/f6213850-e201-0135-1f1c-54ee756b784d> .',
41+
'<http://rialto.stanford.edu/cards/f6213850-e201-0135-1f1c-54ee756b784d> <http://www.w3.org/2006/vcard/ns#url> "http://sites.example.org/awesome.html"^^<http://www.w3.org/2001/XMLSchema#anyURI> .'
3542
]
3643
end
44+
# rubocop:enable Metrics/LineLength
3745
let(:serialized) { writer.serialize(context) }
3846

47+
before do
48+
allow(UUID).to receive(:generate).and_return(
49+
'5a69f5d0-e201-0135-1f1a-54ee756b784d',
50+
'f6213850-e201-0135-1f1c-54ee756b784d'
51+
)
52+
end
53+
3954
it 'dumps an ntriples representation of the context' do
4055
output_lines.each do |line|
4156
expect(serialized).to include(line)

0 commit comments

Comments
 (0)