Created using Colaboratory

alexabruck · alexabruck · commit 7cf69d69154d · 2021-10-10T18:52:11.000+02:00
diff --git a/Famous_trees.ipynb b/Famous_trees.ipynb
@@ -0,0 +1,149 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "speculative-datasets-famous-trees.ipynb",
+      "provenance": [],
+      "collapsed_sections": [],
+      "authorship_tag": "ABX9TyPiyVv1urQHvTyoWfIHPaHP",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/alexabruck/speculative-datasets/blob/master/Famous_trees.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3QB2TSe_RObI"
+      },
+      "source": [
+        "# Famous trees"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "PTn9oj6jMmiW"
+      },
+      "source": [
+        "!pip install SPARQLWrapper"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "5DFBHRhTKrOe",
+        "outputId": "7611b096-1fe5-4d71-836c-7f0d67a364bd"
+      },
+      "source": [
+        "from SPARQLWrapper import SPARQLWrapper, JSON\n",
+        "\n",
+        "ENDPOINT_URL = \"https://query.wikidata.org/sparql\"\n",
+        "\n",
+        "def get(query):\n",
+        "    user_agent = \"Speculative Datasets (https://github.com/alexabruck/speculative-datasets)\"\n",
+        "    # TODO adjust user agent; see https://w.wiki/CX6\n",
+        "    sparql = SPARQLWrapper(ENDPOINT_URL, agent=user_agent)\n",
+        "    sparql.setQuery(query)\n",
+        "    sparql.setReturnFormat(JSON)\n",
+        "    return sparql.query().convert()\n",
+        "\n",
+        "query = \"\"\"SELECT ?item ?itemLabel ?pic\n",
+        "WHERE \n",
+        "{\n",
+        "  ?item wdt:P31 wd:Q10884. # Must be a tree\n",
+        "  ?item wdt:P18 ?image_. #has pic\n",
+        "  BIND(REPLACE(wikibase:decodeUri(STR(?image_)), \"http://commons.wikimedia.org/wiki/Special:FilePath/\", \"\") AS ?imageFileName_)\n",
+        "  BIND(REPLACE(?imageFileName_, \" \", \"_\") AS ?imageFileNameSafe_)\n",
+        "  BIND(MD5(?imageFileNameSafe_) AS ?imageFileNameHash_)\n",
+        "  BIND(CONCAT(\"https://upload.wikimedia.org/wikipedia/commons/thumb/\", SUBSTR(?imageFileNameHash_, 1 , 1 ), \"/\", SUBSTR(?imageFileNameHash_, 1 , 2 ), \"/\", ?imageFileNameSafe_, \"/600px-\", ?imageFileNameSafe_) AS ?pic)\n",
+        "  SERVICE wikibase:label { bd:serviceParam wikibase:language \"[AUTO_LANGUAGE],en\". } # Helps get the label in your language, if not, then en language\n",
+        "}\"\"\"\n",
+        "\n",
+        "response = get(query)\n",
+        "items = response[\"results\"][\"bindings\"]\n",
+        "pics = [item['pic']['value'] for item in items]\n",
+        "print(len(pics))"
+      ],
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "553\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PAxXeie4PFOI"
+      },
+      "source": [
+        "## Download the pics"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-MvFOLDxAqx0"
+      },
+      "source": [
+        "!rm -rd pics"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "dXOAb4tEM3R1"
+      },
+      "source": [
+        "!mkdir pics\n",
+        "\n",
+        "for pic in pics:\n",
+        "  !wget $pic -P pics"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "EHyUVfcKqU1v"
+      },
+      "source": [
+        "!zip -r results.zip pics\n",
+        "from google.colab import files\n",
+        "files.download(\"results.zip\")"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}