1
+ {
2
+ "nbformat" : 4 ,
3
+ "nbformat_minor" : 0 ,
4
+ "metadata" : {
5
+ "colab" : {
6
+ "name" : " speculative-datasets-famous-trees.ipynb" ,
7
+ "provenance" : [],
8
+ "collapsed_sections" : [],
9
+ "authorship_tag" : " ABX9TyPiyVv1urQHvTyoWfIHPaHP" ,
10
+ "include_colab_link" : true
11
+ },
12
+ "kernelspec" : {
13
+ "name" : " python3" ,
14
+ "display_name" : " Python 3"
15
+ },
16
+ "language_info" : {
17
+ "name" : " python"
18
+ }
19
+ },
20
+ "cells" : [
21
+ {
22
+ "cell_type" : " markdown" ,
23
+ "metadata" : {
24
+ "id" : " view-in-github" ,
25
+ "colab_type" : " text"
26
+ },
27
+ "source" : [
28
+ " <a href=\" https://colab.research.google.com/github/alexabruck/speculative-datasets/blob/master/Famous_trees.ipynb\" target=\" _parent\" ><img src=\" https://colab.research.google.com/assets/colab-badge.svg\" alt=\" Open In Colab\" /></a>"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type" : " markdown" ,
33
+ "metadata" : {
34
+ "id" : " 3QB2TSe_RObI"
35
+ },
36
+ "source" : [
37
+ " # Famous trees"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type" : " code" ,
42
+ "metadata" : {
43
+ "id" : " PTn9oj6jMmiW"
44
+ },
45
+ "source" : [
46
+ " !pip install SPARQLWrapper"
47
+ ],
48
+ "execution_count" : null ,
49
+ "outputs" : []
50
+ },
51
+ {
52
+ "cell_type" : " code" ,
53
+ "metadata" : {
54
+ "colab" : {
55
+ "base_uri" : " https://localhost:8080/"
56
+ },
57
+ "id" : " 5DFBHRhTKrOe" ,
58
+ "outputId" : " 7611b096-1fe5-4d71-836c-7f0d67a364bd"
59
+ },
60
+ "source" : [
61
+ " from SPARQLWrapper import SPARQLWrapper, JSON\n " ,
62
+ " \n " ,
63
+ " ENDPOINT_URL = \" https://query.wikidata.org/sparql\"\n " ,
64
+ " \n " ,
65
+ " def get(query):\n " ,
66
+ " user_agent = \" Speculative Datasets (https://github.com/alexabruck/speculative-datasets)\"\n " ,
67
+ " # TODO adjust user agent; see https://w.wiki/CX6\n " ,
68
+ " sparql = SPARQLWrapper(ENDPOINT_URL, agent=user_agent)\n " ,
69
+ " sparql.setQuery(query)\n " ,
70
+ " sparql.setReturnFormat(JSON)\n " ,
71
+ " return sparql.query().convert()\n " ,
72
+ " \n " ,
73
+ " query = \"\"\" SELECT ?item ?itemLabel ?pic\n " ,
74
+ " WHERE \n " ,
75
+ " {\n " ,
76
+ " ?item wdt:P31 wd:Q10884. # Must be a tree\n " ,
77
+ " ?item wdt:P18 ?image_. #has pic\n " ,
78
+ " BIND(REPLACE(wikibase:decodeUri(STR(?image_)), \" http://commons.wikimedia.org/wiki/Special:FilePath/\" , \"\" ) AS ?imageFileName_)\n " ,
79
+ " BIND(REPLACE(?imageFileName_, \" \" , \" _\" ) AS ?imageFileNameSafe_)\n " ,
80
+ " BIND(MD5(?imageFileNameSafe_) AS ?imageFileNameHash_)\n " ,
81
+ " BIND(CONCAT(\" https://upload.wikimedia.org/wikipedia/commons/thumb/\" , SUBSTR(?imageFileNameHash_, 1 , 1 ), \" /\" , SUBSTR(?imageFileNameHash_, 1 , 2 ), \" /\" , ?imageFileNameSafe_, \" /600px-\" , ?imageFileNameSafe_) AS ?pic)\n " ,
82
+ " SERVICE wikibase:label { bd:serviceParam wikibase:language \" [AUTO_LANGUAGE],en\" . } # Helps get the label in your language, if not, then en language\n " ,
83
+ " }\"\"\"\n " ,
84
+ " \n " ,
85
+ " response = get(query)\n " ,
86
+ " items = response[\" results\" ][\" bindings\" ]\n " ,
87
+ " pics = [item['pic']['value'] for item in items]\n " ,
88
+ " print(len(pics))"
89
+ ],
90
+ "execution_count" : 2 ,
91
+ "outputs" : [
92
+ {
93
+ "output_type" : " stream" ,
94
+ "name" : " stdout" ,
95
+ "text" : [
96
+ " 553\n "
97
+ ]
98
+ }
99
+ ]
100
+ },
101
+ {
102
+ "cell_type" : " markdown" ,
103
+ "metadata" : {
104
+ "id" : " PAxXeie4PFOI"
105
+ },
106
+ "source" : [
107
+ " ## Download the pics"
108
+ ]
109
+ },
110
+ {
111
+ "cell_type" : " code" ,
112
+ "metadata" : {
113
+ "id" : " -MvFOLDxAqx0"
114
+ },
115
+ "source" : [
116
+ " !rm -rd pics"
117
+ ],
118
+ "execution_count" : null ,
119
+ "outputs" : []
120
+ },
121
+ {
122
+ "cell_type" : " code" ,
123
+ "metadata" : {
124
+ "id" : " dXOAb4tEM3R1"
125
+ },
126
+ "source" : [
127
+ " !mkdir pics\n " ,
128
+ " \n " ,
129
+ " for pic in pics:\n " ,
130
+ " !wget $pic -P pics"
131
+ ],
132
+ "execution_count" : null ,
133
+ "outputs" : []
134
+ },
135
+ {
136
+ "cell_type" : " code" ,
137
+ "metadata" : {
138
+ "id" : " EHyUVfcKqU1v"
139
+ },
140
+ "source" : [
141
+ " !zip -r results.zip pics\n " ,
142
+ " from google.colab import files\n " ,
143
+ " files.download(\" results.zip\" )"
144
+ ],
145
+ "execution_count" : null ,
146
+ "outputs" : []
147
+ }
148
+ ]
149
+ }
0 commit comments