Skip to content

Commit f13c47e

Browse files
committed
Update WikiDataEntities.wl
1 parent 7acd442 commit f13c47e

File tree

1 file changed

+129
-30
lines changed

1 file changed

+129
-30
lines changed

WikiDataEntities.wl

+129-30
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@ BeginPackage["WikiDataEntities`"]
1313

1414
WikiData::usage="A message head for playing with WikiData";
1515
WikiDataDataset::usage="WikiDataDataset[class] returns the dataset for an entity class
16-
WikiDataDataset[class, pred] returns the dataset for an entity class and special predicate
16+
WikiDataDataset[class, pred] returns the dataset for an entity class and predicate
1717
";
1818
WikiDataClasses::usage="WikiDataClasses[pred] returns the classes implementing pred";
1919
WikiDataEntityStore::usage="";
20+
WikiDataPredicates::usage=
21+
"WikiDataPredicates[name] returns the predicates matching name";
2022

2123

2224
(* ::Subsubsection::Closed:: *)
@@ -100,8 +102,9 @@ prepQuery[query_, limit_]:=
100102
(*wikidataQuery*)
101103

102104

103-
wikidataQuery[query_, limit_] :=
104-
executeRequest@
105+
wikidataQuery//Clear
106+
wikidataQuery[query_, limit_, returnTiming_:False] :=
107+
If[returnTiming, AbsoluteTiming, Identity]@executeRequest@
105108
HTTPRequest[
106109
$wikiDataBase,
107110
<|
@@ -122,6 +125,26 @@ wikidataQuery[query_, limit_] :=
122125
]
123126

124127

128+
(* ::Subsubsection::Closed:: *)
129+
(*wikidataQueryVerbose*)
130+
131+
132+
wikidataQueryVerbose[query_, limit_]:=
133+
Module[
134+
{
135+
queryResults
136+
},
137+
queryResults=wikidataQuery[query, limit, True];
138+
Print@
139+
Internal`LoadingPanel@
140+
StringForm[
141+
"Query execution took `` seconds",
142+
queryResults[[1]]
143+
];
144+
queryResults[[2]]
145+
];
146+
147+
125148
(* ::Subsubsection::Closed:: *)
126149
(*$entityPrefixHead*)
127150

@@ -186,7 +209,7 @@ $propertyQueryInner="{
186209
hint:Query hint:optimizer 'None' .
187210
{ BIND(?entity AS ?valUrl) .
188211
BIND(\"N/A\" AS ?propUrl ) .
189-
BIND(\"Name\"@`lang` AS ?propLabel ) .
212+
BIND(\"Label\"@`lang` AS ?propLabel ) .
190213
?entity rdfs:label ?val .
191214
192215
FILTER (LANG(?val) = \"`lang`\")
@@ -299,27 +322,45 @@ extractEntityIDs[json_]:=
299322
(*aggregateEntityClassDataset*)
300323

301324

325+
$entityPrefix="http://www.wikidata.org/entity/";
326+
327+
302328
aggregateEntityClassDataset[res_]:=
303329
Module[{resDS, resGroups},
304330
resDS = Global`ugh = Dataset[Join@@res[[All, "results", "bindings"]]];
305331
resGroups=
306332
GroupBy[
307333
resDS,
308-
#["entity", "value"]&,
334+
StringTrim[#["entity", "value"], $entityPrefix]&,
309335
extractPropertyDataset
310-
]
336+
];
337+
resGroups
311338
]
312339

313340

314341
(* ::Subsubsection::Closed:: *)
315342
(*cleanData*)
316343

317344

345+
$xmlDataTypePrefix="http://www.w3.org/2001/XMLSchema#";
346+
347+
318348
cleanData//Clear;
319-
cleanData["http://www.w3.org/2001/XMLSchema#dateTime", val_]:=
349+
icleanData[$xmlDataTypePrefix<>"dateTime", val_]:=
320350
DateObject[val];
321-
cleanData["http://www.w3.org/2001/XMLSchema#decimal", val_]:=
351+
icleanData[$xmlDataTypePrefix<>"decimal", val_]:=
322352
Internal`StringToDouble[val];
353+
icleanData[_, val_]:=
354+
val;
355+
Function[
356+
cleanData[k:$xmlDataTypePrefix<>#, val_]:=
357+
icleanData[k, val]
358+
]/@{
359+
"dateTime",
360+
"decimal"
361+
};
362+
cleanData[k_String, val_]:=
363+
icleanData[$xmlDataTypePrefix<>k, val];
323364
cleanData[_, val_]:=
324365
val;
325366

@@ -329,37 +370,43 @@ cleanData[_, val_]:=
329370

330371

331372
extractPropertyDataset[resDS_]:=
332-
Module[{resPairs},
373+
Module[{ resPairs, resMerge },
333374
resPairs =
334375
#propLabel["value"]->cleanData[#val["datatype"], #val["value"]]&/@
335376
resDS[[All, {"propLabel", "val"}, {"value", "datatype"}]];
336-
Merge[resPairs,
337-
If[Length[#]==1, #[[1]], #]&
338-
]
377+
resMerge=
378+
Merge[resPairs,
379+
If[Length[#]==1, #[[1]], #]&
380+
];
381+
KeySortBy[resMerge, #=!="Label"&]
339382
]
340383

341384

342385
(* ::Subsubsection::Closed:: *)
343386
(*wikidataEntityClassDataset*)
344387

345388

389+
$wikiDataChunkSize=15;
390+
391+
346392
wikidataEntityClassDataset//Clear
347393
wikidataEntityClassDataset[
348394
ids:{__String},
349395
limit_,
350-
lang_
396+
lang_,
397+
verb_
351398
]:=
352399
Module[
353400
{
354401
data,
355402
chunks,
356-
chunkSize=5,
403+
chunkSize=$wikiDataChunkSize,
357404
chunkData
358405
},
359406
chunks = Partition[ids, UpTo[chunkSize]];
360407
data=
361-
wikidataQuery[
362-
propertyQuery[ids, lang],
408+
If[verb, wikidataQueryVerbose, wikidataQuery][
409+
propertyQuery[#, lang],
363410
limit
364411
]&/@chunks;
365412
If[AllTrue[data, AssociationQ],
@@ -391,10 +438,15 @@ wikidataEntityClassDataset[
391438
]
392439

393440

394-
(* ::Subsubsection:: *)
441+
(* ::Subsubsection::Closed:: *)
395442
(*$wikiDataProperties*)
396443

397444

445+
(* ::Text:: *)
446+
(*Iteratively scraped off of:*)
447+
(* https://www.wikidata.org/wiki/Category:Wikidata:List_of_properties*)
448+
449+
398450
$propsURL=
399451
"https://raw.githubusercontent.com/b3m2a1/mathematica-tools/master/WikiDataProps.wl";
400452

@@ -405,7 +457,11 @@ $wikiDataProperties//Clear
405457
If[Length@OwnValues[$wikiDataProperties]==0,
406458
$wikiDataProperties:=
407459
$wikiDataProperties=
408-
Association/@Import[$propsURL]
460+
KeyMap[
461+
StringRiffle@
462+
StringSplit[StringTrim[#, "Wikidata_property"], "_"]&,
463+
Association/@Import[$propsURL]
464+
]
409465
];
410466

411467

@@ -419,7 +475,7 @@ wikidataRelatedStuff[baseData_, baseType_, subType_]:=
419475
With[{tag=#},
420476
KeyMap[tag<>":"<>#&, #2]
421477
]&,
422-
KeySelect[
478+
KeySelect[
423479
#,
424480
StringContainsQ[subType]
425481
]&/@KeySelect[baseData, StringContainsQ[baseType]]
@@ -437,7 +493,7 @@ wikidataRelatedStuff[baseData_, query_]:=
437493
]
438494

439495

440-
(* ::Subsubsection:: *)
496+
(* ::Subsubsection::Closed:: *)
441497
(*wikidataRelatedProps*)
442498

443499

@@ -498,22 +554,29 @@ WikiDataDataset//Clear
498554
Options[WikiDataDataset]=
499555
{
500556
"MaxItems"->1000,
501-
"Language"->Automatic
557+
"Language"->Automatic,
558+
"Verbose"->False,
559+
"ChunkSize"->15
502560
};
503561
WikiDataDataset[class_String, predicate:_String:"P31", ops:OptionsPattern[]]:=
504562
Module[
505563
{
506564
limit=OptionValue["MaxItems"],
507565
lang=OptionValue["Language"],
508566
pred,
509-
cls
567+
cls,
568+
verb=TrueQ@OptionValue["Verbose"],
569+
chunks=OptionValue["ChunkSize"]
510570
},
511571
lang=Replace[lang, Automatic:>$Language];
512572
If[StringLength[lang]>2, lang=LanguageData[lang, "Codes"][[1]]];
513573
pred=getWikiType[predicate, "P", wikidataRelatedProps];
514574
cls=getWikiType[predicate, "Q", wikidataRelatedItems];
515575
If[StringQ[pred]&&StringQ[cls],
516-
wikidataEntityClassDataset[cls, pred, limit, lang],
576+
Block[
577+
{$wikiDataChunkSize=chunks},
578+
wikidataEntityClassDataset[cls, pred, limit, lang, verb]
579+
],
517580
Failure["BadQuery",
518581
<|
519582
"MessageTemplate"->"Can't process query for class `` and predicate ``",
@@ -526,37 +589,48 @@ WikiDataDataset[ids:{__String}, ops:OptionsPattern[]]:=
526589
Module[
527590
{
528591
limit=OptionValue["MaxItems"],
529-
lang=OptionValue["Language"]
592+
lang=OptionValue["Language"],
593+
chunks=OptionValue["ChunkSize"],
594+
verb=TrueQ@OptionValue["Verbose"]
530595
},
531596
lang=Replace[lang, Automatic:>$Language];
532597
If[StringLength[lang]>2, lang=LanguageData[lang, "Codes"][[1]]];
533-
wikidataEntityClassDataset[ids, limit, lang]
598+
Block[
599+
{$wikiDataChunkSize=chunks},
600+
wikidataEntityClassDataset[ids, limit, lang, verb]
601+
]
534602
];
535603

536604

537-
(* ::Subsubsection:: *)
605+
(* ::Subsubsection::Closed:: *)
538606
(*WikiDataClasses*)
539607

540608

541609
Options[WikiDataClasses]=
542610
{
543611
"MaxItems"->1000,
544-
"Language"->Automatic
612+
"Language"->Automatic,
613+
"Verbose"->False
545614
};
546615
WikiDataClasses[predicate:_String:"P31", ops:OptionsPattern[]]:=
547616
Module[
548617
{
549618
limit=OptionValue["MaxItems"],
550619
lang=OptionValue["Language"],
551620
pred,
552-
cls
621+
cls,
622+
verb=TrueQ@OptionValue["Verbose"],
623+
queryResults
553624
},
554625
lang=Replace[lang, Automatic:>$Language];
555626
If[StringLength[lang]>2, lang=LanguageData[lang, "Codes"][[1]]];
556627
pred=getWikiType[predicate, "P", wikidataRelatedProps];
557628
If[StringQ@pred,
558629
extractEntityIDs@
559-
wikidataQuery[entityClassQuery@pred, limit],
630+
If[verb,
631+
wikidataQueryVerbose,
632+
wikidataQuery
633+
][entityQuery[cls, pred], limit],
560634
Failure["BadQuery",
561635
<|
562636
"MessageTemplate"->"Can't process query for predicate class ``",
@@ -579,7 +653,10 @@ WikiDataClasses[class_String, predicate_String, ops:OptionsPattern[]]:=
579653
cls=getWikiType[predicate, "Q", wikidataRelatedItems];
580654
If[StringQ[pred]&&StringQ[cls],
581655
extractEntityIDs@
582-
wikidataQuery[entityQuery[cls, pred], limit],
656+
If[verb,
657+
wikidataQueryVerbose,
658+
wikidataQuery
659+
][entityQuery[cls, pred], limit],
583660
Failure["BadQuery",
584661
<|
585662
"MessageTemplate"->"Can't process query for class `` and predicate ``",
@@ -590,6 +667,28 @@ WikiDataClasses[class_String, predicate_String, ops:OptionsPattern[]]:=
590667
]
591668

592669

670+
(* ::Subsubsection::Closed:: *)
671+
(*WikiDataPredicates*)
672+
673+
674+
groupPreds[res_]:=
675+
GroupBy[
676+
Thread[{
677+
StringSplit[Keys[res], ":"],
678+
Values[res]
679+
}],
680+
#[[1, 1]]&->(#[[1, 2]]->#[[2]]&),
681+
Association
682+
]
683+
684+
685+
WikiDataPredicates[pred_String]:=
686+
wikidataRelatedProps[pred]
687+
WikiDataPredicates[baseType_String, pred_String]:=
688+
groupPreds@
689+
wikidataRelatedProps[baseType, pred]
690+
691+
593692
(* ::Subsubsection:: *)
594693
(*End*)
595694

0 commit comments

Comments
 (0)