@@ -13,10 +13,12 @@ BeginPackage["WikiDataEntities`"]
13
13
14
14
WikiData ::usage = "A message head for playing with WikiData" ;
15
15
WikiDataDataset ::usage = "WikiDataDataset[class] returns the dataset for an entity class
16
- WikiDataDataset[class, pred] returns the dataset for an entity class and special predicate
16
+ WikiDataDataset[class, pred] returns the dataset for an entity class and predicate
17
17
" ;
18
18
WikiDataClasses ::usage = "WikiDataClasses[pred] returns the classes implementing pred" ;
19
19
WikiDataEntityStore ::usage = "" ;
20
+ WikiDataPredicates ::usage =
21
+ "WikiDataPredicates[name] returns the predicates matching name" ;
20
22
21
23
22
24
(* ::Subsubsection::Closed:: *)
@@ -100,8 +102,9 @@ prepQuery[query_, limit_]:=
100
102
(*wikidataQuery*)
101
103
102
104
103
- wikidataQuery [query_ , limit_ ] :=
104
- executeRequest @
105
+ wikidataQuery // Clear
106
+ wikidataQuery [query_ , limit_ , returnTiming_ :False ] :=
107
+ If [returnTiming , AbsoluteTiming , Identity ]@ executeRequest @
105
108
HTTPRequest [
106
109
$wikiDataBase ,
107
110
< |
@@ -122,6 +125,26 @@ wikidataQuery[query_, limit_] :=
122
125
]
123
126
124
127
128
+ (* ::Subsubsection::Closed:: *)
129
+ (*wikidataQueryVerbose*)
130
+
131
+
132
+ wikidataQueryVerbose [query_ , limit_ ]:=
133
+ Module [
134
+ {
135
+ queryResults
136
+ },
137
+ queryResults = wikidataQuery [query , limit , True ];
138
+ Print @
139
+ Internal ` LoadingPanel @
140
+ StringForm [
141
+ "Query execution took `` seconds" ,
142
+ queryResults [[1 ]]
143
+ ];
144
+ queryResults [[2 ]]
145
+ ];
146
+
147
+
125
148
(* ::Subsubsection::Closed:: *)
126
149
(*$entityPrefixHead*)
127
150
@@ -186,7 +209,7 @@ $propertyQueryInner="{
186
209
hint:Query hint:optimizer 'None' .
187
210
{ BIND(?entity AS ?valUrl) .
188
211
BIND(\" N/A\" AS ?propUrl ) .
189
- BIND(\" Name \" @`lang` AS ?propLabel ) .
212
+ BIND(\" Label \" @`lang` AS ?propLabel ) .
190
213
?entity rdfs:label ?val .
191
214
192
215
FILTER (LANG(?val) = \" `lang`\" )
@@ -299,27 +322,45 @@ extractEntityIDs[json_]:=
299
322
(*aggregateEntityClassDataset*)
300
323
301
324
325
+ $entityPrefix = "http://www.wikidata.org/entity/" ;
326
+
327
+
302
328
aggregateEntityClassDataset [res_ ]:=
303
329
Module [{resDS , resGroups },
304
330
resDS = Global ` ugh = Dataset [Join @@ res [[All , "results" , "bindings" ]]];
305
331
resGroups =
306
332
GroupBy [
307
333
resDS ,
308
- # ["entity" , "value" ]& ,
334
+ StringTrim [ # ["entity" , "value" ], $entityPrefix ]& ,
309
335
extractPropertyDataset
310
- ]
336
+ ];
337
+ resGroups
311
338
]
312
339
313
340
314
341
(* ::Subsubsection::Closed:: *)
315
342
(*cleanData*)
316
343
317
344
345
+ $xmlDataTypePrefix = "http://www.w3.org/2001/XMLSchema#" ;
346
+
347
+
318
348
cleanData // Clear ;
319
- cleanData [ "http://www.w3.org/2001/XMLSchema# dateTime" , val_ ]:=
349
+ icleanData [ $xmlDataTypePrefix <> " dateTime" , val_ ]:=
320
350
DateObject [val ];
321
- cleanData [ "http://www.w3.org/2001/XMLSchema# decimal" , val_ ]:=
351
+ icleanData [ $xmlDataTypePrefix <> " decimal" , val_ ]:=
322
352
Internal ` StringToDouble [val ];
353
+ icleanData [_ , val_ ]:=
354
+ val ;
355
+ Function [
356
+ cleanData [k :$xmlDataTypePrefix <> # , val_ ]:=
357
+ icleanData [k , val ]
358
+ ]/@ {
359
+ "dateTime" ,
360
+ "decimal"
361
+ };
362
+ cleanData [k_ String , val_ ]:=
363
+ icleanData [$xmlDataTypePrefix <> k , val ];
323
364
cleanData [_ , val_ ]:=
324
365
val ;
325
366
@@ -329,37 +370,43 @@ cleanData[_, val_]:=
329
370
330
371
331
372
extractPropertyDataset [resDS_ ]:=
332
- Module [{resPairs },
373
+ Module [{ resPairs , resMerge },
333
374
resPairs =
334
375
# propLabel ["value" ]-> cleanData [# val ["datatype" ], # val ["value" ]]& /@
335
376
resDS [[All , {"propLabel" , "val" }, {"value" , "datatype" }]];
336
- Merge [resPairs ,
337
- If [Length [# ]== 1 , # [[1 ]], # ]&
338
- ]
377
+ resMerge =
378
+ Merge [resPairs ,
379
+ If [Length [# ]== 1 , # [[1 ]], # ]&
380
+ ];
381
+ KeySortBy [resMerge , # =!= "Label" & ]
339
382
]
340
383
341
384
342
385
(* ::Subsubsection::Closed:: *)
343
386
(*wikidataEntityClassDataset*)
344
387
345
388
389
+ $wikiDataChunkSize = 15 ;
390
+
391
+
346
392
wikidataEntityClassDataset // Clear
347
393
wikidataEntityClassDataset [
348
394
ids :{__ String },
349
395
limit_ ,
350
- lang_
396
+ lang_ ,
397
+ verb_
351
398
]:=
352
399
Module [
353
400
{
354
401
data ,
355
402
chunks ,
356
- chunkSize = 5 ,
403
+ chunkSize = $wikiDataChunkSize ,
357
404
chunkData
358
405
},
359
406
chunks = Partition [ids , UpTo [chunkSize ]];
360
407
data =
361
- wikidataQuery [
362
- propertyQuery [ids , lang ],
408
+ If [ verb , wikidataQueryVerbose , wikidataQuery ] [
409
+ propertyQuery [# , lang ],
363
410
limit
364
411
]& /@ chunks ;
365
412
If [AllTrue [data , AssociationQ ],
@@ -391,10 +438,15 @@ wikidataEntityClassDataset[
391
438
]
392
439
393
440
394
- (* ::Subsubsection:: *)
441
+ (* ::Subsubsection::Closed:: *)
395
442
(*$wikiDataProperties*)
396
443
397
444
445
+ (* ::Text:: *)
446
+ (*Iteratively scraped off of:*)
447
+ (* https://www.wikidata.org/wiki/Category:Wikidata:List_of_properties*)
448
+
449
+
398
450
$propsURL =
399
451
"https://raw.githubusercontent.com/b3m2a1/mathematica-tools/master/WikiDataProps.wl" ;
400
452
@@ -405,7 +457,11 @@ $wikiDataProperties//Clear
405
457
If [Length @ OwnValues [$wikiDataProperties ]== 0 ,
406
458
$wikiDataProperties :=
407
459
$wikiDataProperties =
408
- Association /@ Import [$propsURL ]
460
+ KeyMap [
461
+ StringRiffle @
462
+ StringSplit [StringTrim [# , "Wikidata_property" ], "_" ]& ,
463
+ Association /@ Import [$propsURL ]
464
+ ]
409
465
];
410
466
411
467
@@ -419,7 +475,7 @@ wikidataRelatedStuff[baseData_, baseType_, subType_]:=
419
475
With [{tag = # },
420
476
KeyMap [tag <> ":" <> # & , #2 ]
421
477
]& ,
422
- KeySelect [
478
+ KeySelect [
423
479
# ,
424
480
StringContainsQ [subType ]
425
481
]& /@ KeySelect [baseData , StringContainsQ [baseType ]]
@@ -437,7 +493,7 @@ wikidataRelatedStuff[baseData_, query_]:=
437
493
]
438
494
439
495
440
- (* ::Subsubsection:: *)
496
+ (* ::Subsubsection::Closed:: *)
441
497
(*wikidataRelatedProps*)
442
498
443
499
@@ -498,22 +554,29 @@ WikiDataDataset//Clear
498
554
Options [WikiDataDataset ]=
499
555
{
500
556
"MaxItems" -> 1000 ,
501
- "Language" -> Automatic
557
+ "Language" -> Automatic ,
558
+ "Verbose" -> False ,
559
+ "ChunkSize" -> 15
502
560
};
503
561
WikiDataDataset [class_ String , predicate :_ String :"P31" , ops :OptionsPattern []]:=
504
562
Module [
505
563
{
506
564
limit = OptionValue ["MaxItems" ],
507
565
lang = OptionValue ["Language" ],
508
566
pred ,
509
- cls
567
+ cls ,
568
+ verb = TrueQ @ OptionValue ["Verbose" ],
569
+ chunks = OptionValue ["ChunkSize" ]
510
570
},
511
571
lang = Replace [lang , Automatic :> $Language ];
512
572
If [StringLength [lang ]> 2 , lang = LanguageData [lang , "Codes" ][[1 ]]];
513
573
pred = getWikiType [predicate , "P" , wikidataRelatedProps ];
514
574
cls = getWikiType [predicate , "Q" , wikidataRelatedItems ];
515
575
If [StringQ [pred ]&& StringQ [cls ],
516
- wikidataEntityClassDataset [cls , pred , limit , lang ],
576
+ Block [
577
+ {$wikiDataChunkSize = chunks },
578
+ wikidataEntityClassDataset [cls , pred , limit , lang , verb ]
579
+ ],
517
580
Failure ["BadQuery" ,
518
581
< |
519
582
"MessageTemplate" -> "Can't process query for class `` and predicate ``" ,
@@ -526,37 +589,48 @@ WikiDataDataset[ids:{__String}, ops:OptionsPattern[]]:=
526
589
Module [
527
590
{
528
591
limit = OptionValue ["MaxItems" ],
529
- lang = OptionValue ["Language" ]
592
+ lang = OptionValue ["Language" ],
593
+ chunks = OptionValue ["ChunkSize" ],
594
+ verb = TrueQ @ OptionValue ["Verbose" ]
530
595
},
531
596
lang = Replace [lang , Automatic :> $Language ];
532
597
If [StringLength [lang ]> 2 , lang = LanguageData [lang , "Codes" ][[1 ]]];
533
- wikidataEntityClassDataset [ids , limit , lang ]
598
+ Block [
599
+ {$wikiDataChunkSize = chunks },
600
+ wikidataEntityClassDataset [ids , limit , lang , verb ]
601
+ ]
534
602
];
535
603
536
604
537
- (* ::Subsubsection:: *)
605
+ (* ::Subsubsection::Closed:: *)
538
606
(*WikiDataClasses*)
539
607
540
608
541
609
Options [WikiDataClasses ]=
542
610
{
543
611
"MaxItems" -> 1000 ,
544
- "Language" -> Automatic
612
+ "Language" -> Automatic ,
613
+ "Verbose" -> False
545
614
};
546
615
WikiDataClasses [predicate :_ String :"P31" , ops :OptionsPattern []]:=
547
616
Module [
548
617
{
549
618
limit = OptionValue ["MaxItems" ],
550
619
lang = OptionValue ["Language" ],
551
620
pred ,
552
- cls
621
+ cls ,
622
+ verb = TrueQ @ OptionValue ["Verbose" ],
623
+ queryResults
553
624
},
554
625
lang = Replace [lang , Automatic :> $Language ];
555
626
If [StringLength [lang ]> 2 , lang = LanguageData [lang , "Codes" ][[1 ]]];
556
627
pred = getWikiType [predicate , "P" , wikidataRelatedProps ];
557
628
If [StringQ @ pred ,
558
629
extractEntityIDs @
559
- wikidataQuery [entityClassQuery @ pred , limit ],
630
+ If [verb ,
631
+ wikidataQueryVerbose ,
632
+ wikidataQuery
633
+ ][entityQuery [cls , pred ], limit ],
560
634
Failure ["BadQuery" ,
561
635
< |
562
636
"MessageTemplate" -> "Can't process query for predicate class ``" ,
@@ -579,7 +653,10 @@ WikiDataClasses[class_String, predicate_String, ops:OptionsPattern[]]:=
579
653
cls = getWikiType [predicate , "Q" , wikidataRelatedItems ];
580
654
If [StringQ [pred ]&& StringQ [cls ],
581
655
extractEntityIDs @
582
- wikidataQuery [entityQuery [cls , pred ], limit ],
656
+ If [verb ,
657
+ wikidataQueryVerbose ,
658
+ wikidataQuery
659
+ ][entityQuery [cls , pred ], limit ],
583
660
Failure ["BadQuery" ,
584
661
< |
585
662
"MessageTemplate" -> "Can't process query for class `` and predicate ``" ,
@@ -590,6 +667,28 @@ WikiDataClasses[class_String, predicate_String, ops:OptionsPattern[]]:=
590
667
]
591
668
592
669
670
+ (* ::Subsubsection::Closed:: *)
671
+ (*WikiDataPredicates*)
672
+
673
+
674
+ groupPreds [res_ ]:=
675
+ GroupBy [
676
+ Thread [{
677
+ StringSplit [Keys [res ], ":" ],
678
+ Values [res ]
679
+ }],
680
+ # [[1 , 1 ]]& -> (# [[1 , 2 ]]-> # [[2 ]]& ),
681
+ Association
682
+ ]
683
+
684
+
685
+ WikiDataPredicates [pred_ String ]:=
686
+ wikidataRelatedProps [pred ]
687
+ WikiDataPredicates [baseType_ String , pred_ String ]:=
688
+ groupPreds @
689
+ wikidataRelatedProps [baseType , pred ]
690
+
691
+
593
692
(* ::Subsubsection:: *)
594
693
(*End*)
595
694
0 commit comments