Skip to content

Commit 7cf69d6

Browse files
committed
Created using Colaboratory
1 parent 225525c commit 7cf69d6

File tree

1 file changed

+149
-0
lines changed

1 file changed

+149
-0
lines changed

Famous_trees.ipynb

+149
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
{
2+
"nbformat": 4,
3+
"nbformat_minor": 0,
4+
"metadata": {
5+
"colab": {
6+
"name": "speculative-datasets-famous-trees.ipynb",
7+
"provenance": [],
8+
"collapsed_sections": [],
9+
"authorship_tag": "ABX9TyPiyVv1urQHvTyoWfIHPaHP",
10+
"include_colab_link": true
11+
},
12+
"kernelspec": {
13+
"name": "python3",
14+
"display_name": "Python 3"
15+
},
16+
"language_info": {
17+
"name": "python"
18+
}
19+
},
20+
"cells": [
21+
{
22+
"cell_type": "markdown",
23+
"metadata": {
24+
"id": "view-in-github",
25+
"colab_type": "text"
26+
},
27+
"source": [
28+
"<a href=\"https://colab.research.google.com/github/alexabruck/speculative-datasets/blob/master/Famous_trees.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
29+
]
30+
},
31+
{
32+
"cell_type": "markdown",
33+
"metadata": {
34+
"id": "3QB2TSe_RObI"
35+
},
36+
"source": [
37+
"# Famous trees"
38+
]
39+
},
40+
{
41+
"cell_type": "code",
42+
"metadata": {
43+
"id": "PTn9oj6jMmiW"
44+
},
45+
"source": [
46+
"!pip install SPARQLWrapper"
47+
],
48+
"execution_count": null,
49+
"outputs": []
50+
},
51+
{
52+
"cell_type": "code",
53+
"metadata": {
54+
"colab": {
55+
"base_uri": "https://localhost:8080/"
56+
},
57+
"id": "5DFBHRhTKrOe",
58+
"outputId": "7611b096-1fe5-4d71-836c-7f0d67a364bd"
59+
},
60+
"source": [
61+
"from SPARQLWrapper import SPARQLWrapper, JSON\n",
62+
"\n",
63+
"ENDPOINT_URL = \"https://query.wikidata.org/sparql\"\n",
64+
"\n",
65+
"def get(query):\n",
66+
" user_agent = \"Speculative Datasets (https://github.com/alexabruck/speculative-datasets)\"\n",
67+
" # TODO adjust user agent; see https://w.wiki/CX6\n",
68+
" sparql = SPARQLWrapper(ENDPOINT_URL, agent=user_agent)\n",
69+
" sparql.setQuery(query)\n",
70+
" sparql.setReturnFormat(JSON)\n",
71+
" return sparql.query().convert()\n",
72+
"\n",
73+
"query = \"\"\"SELECT ?item ?itemLabel ?pic\n",
74+
"WHERE \n",
75+
"{\n",
76+
" ?item wdt:P31 wd:Q10884. # Must be a tree\n",
77+
" ?item wdt:P18 ?image_. #has pic\n",
78+
" BIND(REPLACE(wikibase:decodeUri(STR(?image_)), \"http://commons.wikimedia.org/wiki/Special:FilePath/\", \"\") AS ?imageFileName_)\n",
79+
" BIND(REPLACE(?imageFileName_, \" \", \"_\") AS ?imageFileNameSafe_)\n",
80+
" BIND(MD5(?imageFileNameSafe_) AS ?imageFileNameHash_)\n",
81+
" BIND(CONCAT(\"https://upload.wikimedia.org/wikipedia/commons/thumb/\", SUBSTR(?imageFileNameHash_, 1 , 1 ), \"/\", SUBSTR(?imageFileNameHash_, 1 , 2 ), \"/\", ?imageFileNameSafe_, \"/600px-\", ?imageFileNameSafe_) AS ?pic)\n",
82+
" SERVICE wikibase:label { bd:serviceParam wikibase:language \"[AUTO_LANGUAGE],en\". } # Helps get the label in your language, if not, then en language\n",
83+
"}\"\"\"\n",
84+
"\n",
85+
"response = get(query)\n",
86+
"items = response[\"results\"][\"bindings\"]\n",
87+
"pics = [item['pic']['value'] for item in items]\n",
88+
"print(len(pics))"
89+
],
90+
"execution_count": 2,
91+
"outputs": [
92+
{
93+
"output_type": "stream",
94+
"name": "stdout",
95+
"text": [
96+
"553\n"
97+
]
98+
}
99+
]
100+
},
101+
{
102+
"cell_type": "markdown",
103+
"metadata": {
104+
"id": "PAxXeie4PFOI"
105+
},
106+
"source": [
107+
"## Download the pics"
108+
]
109+
},
110+
{
111+
"cell_type": "code",
112+
"metadata": {
113+
"id": "-MvFOLDxAqx0"
114+
},
115+
"source": [
116+
"!rm -rd pics"
117+
],
118+
"execution_count": null,
119+
"outputs": []
120+
},
121+
{
122+
"cell_type": "code",
123+
"metadata": {
124+
"id": "dXOAb4tEM3R1"
125+
},
126+
"source": [
127+
"!mkdir pics\n",
128+
"\n",
129+
"for pic in pics:\n",
130+
" !wget $pic -P pics"
131+
],
132+
"execution_count": null,
133+
"outputs": []
134+
},
135+
{
136+
"cell_type": "code",
137+
"metadata": {
138+
"id": "EHyUVfcKqU1v"
139+
},
140+
"source": [
141+
"!zip -r results.zip pics\n",
142+
"from google.colab import files\n",
143+
"files.download(\"results.zip\")"
144+
],
145+
"execution_count": null,
146+
"outputs": []
147+
}
148+
]
149+
}

0 commit comments

Comments
 (0)