Skip to content

Commit 2433520

Browse files
Add XML format detection for GeoNetwork sync
1 parent 4b43371 commit 2433520

File tree

2 files changed

+260
-1
lines changed

2 files changed

+260
-1
lines changed

Diff for: src/lib/metadata-formats.js

+245
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
/**
2+
* GeoNetwork XML Format Detection Utilities
3+
*
4+
* This file contains detection methods for various XML formats that
5+
* GeoNetwork transforms to ISO19139.
6+
*/
7+
8+
import { JSDOM } from 'jsdom';
9+
10+
/**
11+
* Main function to detect the format of an XML string from GeoNetwork
12+
* @param {string} xmlString - The XML response from GeoNetwork
13+
* @returns {string} The detected format transformation type
14+
*/
15+
export function detectFormatWith(xmlString) {
16+
const { window } = new JSDOM();
17+
const parser = new window.DOMParser();
18+
const xmlDoc = parser.parseFromString(xmlString, "text/xml");
19+
20+
// Check for parsing errors
21+
if (xmlDoc.documentElement.nodeName === "parsererror") {
22+
console.error("Error parsing XML:", xmlDoc.documentElement.textContent);
23+
return "Invalid XML";
24+
}
25+
26+
const rootElement = xmlDoc.documentElement;
27+
const namespaces = getNamespaces(rootElement);
28+
29+
// Run through various detection methods in order
30+
if (isDIF(rootElement, namespaces))
31+
return "DIF-to-ISO19139";
32+
33+
if (isEsriGeosticker(rootElement, namespaces))
34+
return "EsriGeosticker-to-ISO19139";
35+
36+
if (isISO19115(rootElement, namespaces))
37+
return "ISO19115-to-ISO19139";
38+
39+
if (isCSWGetCapabilities(rootElement, namespaces))
40+
return "OGCCSWGetCapabilities-to-ISO19119_ISO19139";
41+
42+
if (isOGCSLD(rootElement, namespaces))
43+
return "OGCSLD-to-ISO19139";
44+
45+
if (isSOSGetCapabilities(rootElement, namespaces))
46+
return "OGCSOSGetCapabilities-to-ISO19119_ISO19139";
47+
48+
if (isWCSGetCapabilities(rootElement, namespaces))
49+
return "OGCWCSGetCapabilities-to-ISO19119_ISO19139";
50+
51+
if (isWFSDescribeFeatureType(rootElement, namespaces))
52+
return "OGCWFSDescribeFeatureType-to-ISO19110";
53+
54+
if (isWFSGetCapabilities(rootElement, namespaces))
55+
return "OGCWFSGetCapabilities-to-ISO19119_ISO19139";
56+
57+
if (isWMCorOWSC(rootElement, namespaces))
58+
return "OGCWMC-OR-OWSC-to-ISO19139";
59+
60+
if (isWMSGetCapabilities(rootElement, namespaces))
61+
return "OGCWMSGetCapabilities-to-ISO19119_ISO19139";
62+
63+
if (isWPSGetCapabilities(rootElement, namespaces))
64+
return "OGCWPSGetCapabilities-to-ISO19119_ISO19139";
65+
66+
if (isGenericWxSGetCapabilities(rootElement, namespaces))
67+
return "OGCWxSGetCapabilities-to-ISO19119_ISO19139";
68+
69+
if (isThreddsCatalog(rootElement, namespaces))
70+
return "ThreddsCatalog-to-ISO19119_ISO19139";
71+
72+
// If already ISO19139, report it as such
73+
if (isISO19139(rootElement, namespaces))
74+
return null;
75+
76+
return "Unknown XML Format";
77+
}
78+
79+
/**
80+
* Helper function to extract all namespaces from an XML element
81+
* @param {Element} element - The XML element to extract namespaces from
82+
* @returns {Object} Object with namespace prefixes as keys and URIs as values
83+
*/
84+
function getNamespaces(element) {
85+
const namespaces = {};
86+
87+
// Get all attributes that define namespaces
88+
for (const attr of element.attributes) {
89+
if (attr.name.startsWith('xmlns:')) {
90+
const prefix = attr.name.split(':')[1];
91+
namespaces[prefix] = attr.value;
92+
} else if (attr.name === 'xmlns') {
93+
namespaces['default'] = attr.value;
94+
}
95+
}
96+
97+
return namespaces;
98+
}
99+
100+
/**
101+
* Detects if the XML is in DIF (Directory Interchange Format) format
102+
*/
103+
function isDIF(rootElement, namespaces) {
104+
return rootElement.nodeName === 'DIF' ||
105+
(namespaces.dif && rootElement.getElementsByTagNameNS(namespaces.dif, 'DIF').length > 0);
106+
}
107+
108+
/**
109+
* Detects if the XML is in ESRI Geosticker format
110+
*/
111+
function isEsriGeosticker(rootElement, namespaces) {
112+
return rootElement.nodeName === 'metadata' &&
113+
(rootElement.getAttribute('esri_format') === 'geosticker' ||
114+
rootElement.getElementsByTagName('esri').length > 0);
115+
}
116+
117+
/**
118+
* Detects if the XML is in ISO 19115 format
119+
*/
120+
function isISO19115(rootElement, namespaces) {
121+
return rootElement.nodeName === 'MD_Metadata' &&
122+
!namespaces.gmd && !namespaces.gco;
123+
}
124+
125+
/**
126+
* Detects if the XML is already in ISO 19139 format
127+
*/
128+
function isISO19139(rootElement, namespaces) {
129+
return (rootElement.nodeName === 'MD_Metadata' || rootElement.nodeName.endsWith(':MD_Metadata')) &&
130+
(namespaces.gmd || namespaces.gco);
131+
}
132+
133+
/**
134+
* Detects if the XML is a CSW GetCapabilities document
135+
*/
136+
function isCSWGetCapabilities(rootElement, namespaces) {
137+
return (rootElement.nodeName === 'Capabilities' || rootElement.nodeName.endsWith(':Capabilities')) &&
138+
(namespaces.csw ||
139+
(rootElement.getAttribute('service') === 'CSW' &&
140+
rootElement.getElementsByTagName('OperationsMetadata').length > 0));
141+
}
142+
143+
/**
144+
* Detects if the XML is an OGC SLD (Styled Layer Descriptor) document
145+
*/
146+
function isOGCSLD(rootElement, namespaces) {
147+
return rootElement.nodeName === 'StyledLayerDescriptor' ||
148+
rootElement.nodeName.endsWith(':StyledLayerDescriptor') ||
149+
namespaces.sld;
150+
}
151+
152+
/**
153+
* Detects if the XML is a SOS GetCapabilities document
154+
*/
155+
function isSOSGetCapabilities(rootElement, namespaces) {
156+
return (rootElement.nodeName === 'Capabilities' || rootElement.nodeName.endsWith(':Capabilities')) &&
157+
(namespaces.sos ||
158+
(rootElement.getAttribute('service') === 'SOS' &&
159+
rootElement.getElementsByTagName('OperationsMetadata').length > 0));
160+
}
161+
162+
/**
163+
* Detects if the XML is a WCS GetCapabilities document
164+
*/
165+
function isWCSGetCapabilities(rootElement, namespaces) {
166+
return (rootElement.nodeName === 'Capabilities' || rootElement.nodeName.endsWith(':Capabilities')) &&
167+
(namespaces.wcs ||
168+
(rootElement.getAttribute('service') === 'WCS' &&
169+
(rootElement.getElementsByTagName('ContentMetadata').length > 0 ||
170+
rootElement.getElementsByTagName('CoverageOfferingBrief').length > 0 ||
171+
rootElement.getElementsByTagName('Contents').length > 0)));
172+
}
173+
174+
/**
175+
* Detects if the XML is a WFS DescribeFeatureType document
176+
*/
177+
function isWFSDescribeFeatureType(rootElement, namespaces) {
178+
return (rootElement.nodeName === 'schema' || rootElement.nodeName.endsWith(':schema')) &&
179+
(namespaces.xsd || namespaces.xs) &&
180+
(rootElement.getAttribute('targetNamespace') &&
181+
rootElement.getAttribute('targetNamespace').includes('wfs'));
182+
}
183+
184+
/**
185+
* Detects if the XML is a WFS GetCapabilities document
186+
*/
187+
function isWFSGetCapabilities(rootElement, namespaces) {
188+
return (rootElement.nodeName === 'Capabilities' || rootElement.nodeName.endsWith(':Capabilities')) &&
189+
(namespaces.wfs ||
190+
(rootElement.getAttribute('service') === 'WFS' &&
191+
(rootElement.getElementsByTagName('FeatureTypeList').length > 0 ||
192+
rootElement.getElementsByTagName('FeatureType').length > 0)));
193+
}
194+
195+
/**
196+
* Detects if the XML is a Web Map Context (WMC) or OWS Context (OWSC) document
197+
*/
198+
function isWMCorOWSC(rootElement, namespaces) {
199+
return (rootElement.nodeName === 'ViewContext' || rootElement.nodeName.endsWith(':ViewContext') ||
200+
rootElement.nodeName === 'OWSContext' || rootElement.nodeName.endsWith(':OWSContext')) &&
201+
(namespaces.wmc || namespaces.owc || namespaces.owsc);
202+
}
203+
204+
/**
205+
* Detects if the XML is a WMS GetCapabilities document
206+
*/
207+
function isWMSGetCapabilities(rootElement, namespaces) {
208+
return (rootElement.nodeName === 'Capabilities' || rootElement.nodeName.endsWith(':Capabilities') ||
209+
rootElement.nodeName === 'WMS_Capabilities' || rootElement.nodeName.endsWith(':WMS_Capabilities')) &&
210+
(namespaces.wms ||
211+
(rootElement.getAttribute('service') === 'WMS' &&
212+
(rootElement.getElementsByTagName('Layer').length > 0 ||
213+
rootElement.getElementsByTagName('Capability').length > 0)));
214+
}
215+
216+
/**
217+
* Detects if the XML is a WPS GetCapabilities document
218+
*/
219+
function isWPSGetCapabilities(rootElement, namespaces) {
220+
return (rootElement.nodeName === 'Capabilities' || rootElement.nodeName.endsWith(':Capabilities')) &&
221+
(namespaces.wps ||
222+
(rootElement.getAttribute('service') === 'WPS' &&
223+
rootElement.getElementsByTagName('ProcessOfferings').length > 0));
224+
}
225+
226+
/**
227+
* Detects if the XML is a generic WxS GetCapabilities document
228+
* This is a fallback for other OGC web services not specifically handled
229+
*/
230+
function isGenericWxSGetCapabilities(rootElement, namespaces) {
231+
return (rootElement.nodeName === 'Capabilities' || rootElement.nodeName.endsWith(':Capabilities')) &&
232+
(rootElement.getAttribute('service') &&
233+
rootElement.getAttribute('service').match(/^W[A-Z]S$/) &&
234+
rootElement.getElementsByTagName('OperationsMetadata').length > 0);
235+
}
236+
237+
/**
238+
* Detects if the XML is a THREDDS catalog
239+
*/
240+
function isThreddsCatalog(rootElement, namespaces) {
241+
return (rootElement.nodeName === 'catalog' || rootElement.nodeName.endsWith(':catalog')) &&
242+
(namespaces.thredds ||
243+
rootElement.getAttribute('xmlns') === 'http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0' ||
244+
rootElement.getElementsByTagName('dataset').length > 0);
245+
}

Diff for: src/scripts/sync-external-metadata.js

+15-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { formatMenusRecursive } from '../lib/format-menu.js'
55
import { datocmsRequest } from '../lib/datocms.js'
66
import { buildMenuTree } from '../lib/build-menu-tree.js'
77
import { Geonetwork } from '../lib/geonetwork.js'
8+
import { detectFormatWith } from '../lib/metadata-formats.js'
89

910
const __filename = fileURLToPath(import.meta.url)
1011
const __dirname = dirname(__filename)
@@ -74,7 +75,6 @@ const findExternalMetadata = (menuTree) => {
7475
return externalMetadatas
7576
}
7677

77-
7878
async function sync() {
7979
try {
8080
const { menus } = await datocmsRequest({
@@ -100,13 +100,20 @@ const syncExternalMetadata = async (externalMetadatas) => {
100100

101101
const transformedSource = transformSourceUrl(sourceUrl)
102102

103+
console.log(transformedSource)
104+
103105
const geonetwork = new Geonetwork(geoNetworkUrl, destination.geonetwork.username, destination.geonetwork.password)
104106

107+
const transformWith = await detectTransform(transformedSource)
108+
109+
console.log(transformWith)
110+
105111
const response = await geonetwork.putRecord({
106112
params: {
107113
metadataType: "METADATA",
108114
uuidProcessing: "OVERWRITE",
109115
url: transformedSource,
116+
// ...(transformWith ? { transformWith } : {})
110117
},
111118
headers: {
112119
'Accept': 'application/json, text/plain, */*',
@@ -118,6 +125,13 @@ const syncExternalMetadata = async (externalMetadatas) => {
118125
}
119126
}
120127

128+
const detectTransform = async (transformedSource) => {
129+
const source = await fetch(`${transformedSource}/formatters/xml`)
130+
const sourceContent = await source.text()
131+
132+
return detectFormatWith(sourceContent)
133+
}
134+
121135
const transformSourceUrl = (sourceUrl) => {
122136
const url = new URL(sourceUrl)
123137
const baseUrl = url.origin

0 commit comments

Comments
 (0)