From ae33e9d977e6bfc90a56329d49e973429b4728ad Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 3 Feb 2025 06:59:38 -0500
Subject: [PATCH] docs(campaign-finance-blog-post): use nullif

---
 .../campaign-finance/index/execute-results/html.json     | 9 ++++++---
 docs/posts/campaign-finance/index.qmd                    | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/docs/_freeze/posts/campaign-finance/index/execute-results/html.json b/docs/_freeze/posts/campaign-finance/index/execute-results/html.json
index 4f436cb38c6f..fc7975a0c67d 100644
--- a/docs/_freeze/posts/campaign-finance/index/execute-results/html.json
+++ b/docs/_freeze/posts/campaign-finance/index/execute-results/html.json
@@ -1,15 +1,18 @@
 {
-  "hash": "989ed0f2ebddb8e202db6a33bc1bf790",
+  "hash": "1d202727fff4e489f89e9f3f7806520e",
   "result": {
     "engine": "jupyter",
-    "markdown": "---\ntitle: \"Exploring campaign finance data\"\nauthor: \"Nick Crews\"\ndate: \"2023-03-24\"\ncategories:\n    - blog\n    - data engineering\n    - case study\n    - duckdb\n    - performance\n---\n\nHi! My name is [Nick Crews](https://www.linkedin.com/in/nicholas-b-crews/),\nand I'm a data engineer that looks at public campaign finance data.\n\nIn this post, I'll walk through how I use Ibis to explore public campaign contribution\ndata from the Federal Election Commission (FEC). We'll do some loading,\ncleaning, featurizing, and visualization. There will be filtering, sorting, grouping,\nand aggregation.\n\n## Downloading The Data\n\n::: {#e29f35c8 .cell execution_count=2}\n``` {.python .cell-code}\nfrom pathlib import Path\nfrom zipfile import ZipFile\nfrom urllib.request import urlretrieve\n\n# Download and unzip the 2018 individual contributions data\nurl = \"https://cg-519a459a-0ea3-42c2-b7bc-fa1143481f74.s3-us-gov-west-1.amazonaws.com/bulk-downloads/2018/indiv18.zip\"\nzip_path = Path(\"indiv18.zip\")\ncsv_path = Path(\"indiv18.csv\")\n\nif not zip_path.exists():\n    urlretrieve(url, zip_path)\n\nif not csv_path.exists():\n    with ZipFile(zip_path) as zip_file, csv_path.open(\"w\") as csv_file:\n        for line in zip_file.open(\"itcont.txt\"):\n            csv_file.write(line.decode())\n```\n:::\n\n\n## Loading the data\n\nNow that we have our raw data in a .csv format, let's load it into Ibis,\nusing the duckdb backend.\n\nNote that a 4.3 GB .csv would be near the limit of what pandas could\nhandle on my laptop with 16GB of RAM. In pandas, typically every time\nyou perform a transformation on the data, a copy of the data is made.\nI could only do a few transformations before I ran out of memory.\n\nWith Ibis, this problem is solved in two different ways.\n\nFirst, because they are designed to work with very large datasets,\nmany (all?) SQL backends support out of core operations.\nThe data lives on disk, and are only loaded in a streaming fashion\nwhen needed, and then written back to disk as the operation is performed.\n\nSecond, unless you explicitly ask for it, Ibis makes use of lazy\nevaluation. This means that when you ask for a result, the\nresult is not persisted in memory. Only the original source\ndata is persisted. Everything else is derived from this on the fly.\n\n::: {#0a6991f4 .cell execution_count=3}\n``` {.python .cell-code}\nimport ibis\nfrom ibis import _\n\nibis.options.interactive = True\n\n# The raw .csv file doesn't have column names, so we will add them in the next step.\nraw = ibis.read_csv(csv_path)\nraw\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> C00401224 </span>┃<span style=\"font-weight: bold\"> A      </span>┃<span style=\"font-weight: bold\"> M6     </span>┃<span style=\"font-weight: bold\"> P      </span>┃<span style=\"font-weight: bold\"> 201804059101866001 </span>┃<span style=\"font-weight: bold\"> 24T    </span>┃<span style=\"font-weight: bold\"> IND    </span>┃<span style=\"font-weight: bold\"> STOUFFER, LEIGH   </span>┃<span style=\"font-weight: bold\"> AMSTELVEEN   </span>┃<span style=\"font-weight: bold\"> ZZ     </span>┃<span style=\"font-weight: bold\"> 1187RC    </span>┃<span style=\"font-weight: bold\"> MYSELF            </span>┃<span style=\"font-weight: bold\"> SELF EMPLOYED           </span>┃<span style=\"font-weight: bold\"> 05172017 </span>┃<span style=\"font-weight: bold\"> 10    </span>┃<span style=\"font-weight: bold\"> C00458000 </span>┃<span style=\"font-weight: bold\"> SA11AI_81445687 </span>┃<span style=\"font-weight: bold\"> 1217152 </span>┃<span style=\"font-weight: bold\"> column18 </span>┃<span style=\"font-weight: bold\"> EARMARKED FOR PROGRESSIVE CHANGE CAMPAIGN COMMITTEE (C00458000) </span>┃<span style=\"font-weight: bold\"> 4050820181544765358 </span>┃\n┡━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>              │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>            │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>            │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>                  │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>   │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>   │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>   │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>                                                          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>               │\n├───────────┼────────┼────────┼────────┼────────────────────┼────────┼────────┼───────────────────┼──────────────┼────────┼───────────┼───────────────────┼─────────────────────────┼──────────┼───────┼───────────┼─────────────────┼─────────┼──────────┼─────────────────────────────────────────────────────────────────┼─────────────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101867748</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STRAWS, JOYCE    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">OCOEE       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">34761    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SILVERSEA CRUISES</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">RESERVATIONS SUPERVISOR</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05182017</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_81592336</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544770597</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101867748</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STRAWS, JOYCE    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">OCOEE       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">34761    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SILVERSEA CRUISES</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">RESERVATIONS SUPERVISOR</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05192017</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_81627562</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544770598</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101865942</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STOTT, JIM       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">039020760</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE             </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE                   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05132017</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_81047921</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544765179</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101865942</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STOTT, JIM       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">039020760</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE             </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE                   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05152017</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_81209209</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544765180</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101865942</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STOTT, JIM       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">039020760</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE             </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE                   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05192017</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_81605223</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544765181</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101865943</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STOTT, JIM       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">039020760</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE             </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE                   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05242017</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_82200022</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544765182</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101865943</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STOTT, JIM       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">03902    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NOT EMPLOYED     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NOT EMPLOYED           </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05292017</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00213512</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_82589834</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR NANCY PELOSI FOR CONGRESS (C00213512)            </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544765184</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101865944</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STOTT, JIM       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">039020760</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE             </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE                   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05302017</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_82643727</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544765185</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101867050</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STRANGE, WINIFRED</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ANNA MSRIA  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">34216    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NOT EMPLOYED     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NOT EMPLOYED           </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05162017</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_81325918</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544768505</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101867051</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STRANGE, WINIFRED</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ANNA MSRIA  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">34216    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NOT EMPLOYED     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NOT EMPLOYED           </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05232017</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_81991189</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544768506</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │                  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                 │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>            │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                 │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>        │     <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>               │       <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                                                               │                   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │\n└───────────┴────────┴────────┴────────┴────────────────────┴────────┴────────┴───────────────────┴──────────────┴────────┴───────────┴───────────────────┴─────────────────────────┴──────────┴───────┴───────────┴─────────────────┴─────────┴──────────┴─────────────────────────────────────────────────────────────────┴─────────────────────┘\n</pre>\n```\n:::\n:::\n\n\n::: {#ebb6e702 .cell execution_count=4}\n``` {.python .cell-code}\n# For a more comprehesive description of the columns and their meaning, see\n# https://www.fec.gov/campaign-finance-data/contributions-individuals-file-description/\ncolumns = {\n    \"CMTE_ID\": \"keep\",  # Committee ID\n    \"AMNDT_IND\": \"drop\",  # Amendment indicator. A = amendment, N = new, T = termination\n    \"RPT_TP\": \"drop\",  # Report type (monthly, quarterly, etc)\n    \"TRANSACTION_PGI\": \"keep\",  # Primary/general indicator\n    \"IMAGE_NUM\": \"drop\",  # Image number\n    \"TRANSACTION_TP\": \"drop\",  # Transaction type\n    \"ENTITY_TP\": \"keep\",  # Entity type\n    \"NAME\": \"drop\",  # Contributor name\n    \"CITY\": \"keep\",  # Contributor city\n    \"STATE\": \"keep\",  # Contributor state\n    \"ZIP_CODE\": \"drop\",  # Contributor zip code\n    \"EMPLOYER\": \"drop\",  # Contributor employer\n    \"OCCUPATION\": \"drop\",  # Contributor occupation\n    \"TRANSACTION_DT\": \"keep\",  # Transaction date\n    \"TRANSACTION_AMT\": \"keep\",  # Transaction amount\n    # Other ID. For individual contributions will be null. For contributions from\n    # other FEC committees, will be the committee ID of the other committee.\n    \"OTHER_ID\": \"drop\",\n    \"TRAN_ID\": \"drop\",  # Transaction ID\n    \"FILE_NUM\": \"drop\",  # File number, unique number assigned to each report filed with the FEC\n    \"MEMO_CD\": \"drop\",  # Memo code\n    \"MEMO_TEXT\": \"drop\",  # Memo text\n    \"SUB_ID\": \"drop\",  # Submission ID. Unique number assigned to each transaction.\n}\n\nrenaming = {old: new for old, new in zip(raw.columns, columns.keys())}\nto_keep = [k for k, v in columns.items() if v == \"keep\"]\nkept = raw.relabel(renaming)[to_keep]\nkept\n```\n\n::: {.cell-output .cell-output-display execution_count=17}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> CMTE_ID   </span>┃<span style=\"font-weight: bold\"> TRANSACTION_PGI </span>┃<span style=\"font-weight: bold\"> ENTITY_TP </span>┃<span style=\"font-weight: bold\"> CITY         </span>┃<span style=\"font-weight: bold\"> STATE  </span>┃<span style=\"font-weight: bold\"> TRANSACTION_DT </span>┃<span style=\"font-weight: bold\"> TRANSACTION_AMT </span>┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>           │\n├───────────┼─────────────────┼───────────┼──────────────┼────────┼────────────────┼─────────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">OCOEE       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05182017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">OCOEE       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05192017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05132017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05152017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05192017      </span> │               <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05242017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05292017      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05302017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ANNA MSRIA  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05162017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ANNA MSRIA  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05232017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>               │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>            │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>              │               <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │\n└───────────┴─────────────────┴───────────┴──────────────┴────────┴────────────────┴─────────────────┘\n</pre>\n```\n:::\n:::\n\n\n::: {#3f4ad522 .cell execution_count=5}\n``` {.python .cell-code}\n# 21 million rows\nkept.count()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=18}\n\n::: {.ansi-escaped-output}\n```{=html}\n<pre>┌──────────┐\n│ <span class=\"ansi-cyan-fg ansi-bold\">21730730</span> │\n└──────────┘</pre>\n```\n:::\n\n:::\n:::\n\n\nHuh, what's up with those timings? Previewing the head only took a fraction of a second,\nbut finding the number of rows took 10 seconds.\n\nThat's because duckdb is scanning the .csv file on the fly every time we access it.\nSo we only have to read the first few lines to get that preview,\nbut we have to read the whole file to get the number of rows.\n\nNote that this isn't a feature of Ibis, but a feature of Duckdb. This what I think is\none of the strengths of Ibis: Ibis itself doesn't have to implement any of the\noptimimizations or features of the backends. Those backends can focus on what they do\nbest, and Ibis can get those things for free.\n\nSo, let's tell duckdb to actually read in the file to its native format so later accesses\nwill be faster. This will be a ~20 seconds that we'll only have to pay once.\n\n::: {#c45e7319 .cell execution_count=6}\n``` {.python .cell-code}\nkept = kept.cache()\nkept\n```\n\n::: {.cell-output .cell-output-display execution_count=19}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> CMTE_ID   </span>┃<span style=\"font-weight: bold\"> TRANSACTION_PGI </span>┃<span style=\"font-weight: bold\"> ENTITY_TP </span>┃<span style=\"font-weight: bold\"> CITY         </span>┃<span style=\"font-weight: bold\"> STATE  </span>┃<span style=\"font-weight: bold\"> TRANSACTION_DT </span>┃<span style=\"font-weight: bold\"> TRANSACTION_AMT </span>┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>           │\n├───────────┼─────────────────┼───────────┼──────────────┼────────┼────────────────┼─────────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">OCOEE       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05182017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">OCOEE       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05192017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05132017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05152017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05192017      </span> │               <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05242017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05292017      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05302017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ANNA MSRIA  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05162017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ANNA MSRIA  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05232017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>               │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>            │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>              │               <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │\n└───────────┴─────────────────┴───────────┴──────────────┴────────┴────────────────┴─────────────────┘\n</pre>\n```\n:::\n:::\n\n\nLook, now accessing it only takes a fraction of a second!\n\n::: {#881326dd .cell execution_count=7}\n``` {.python .cell-code}\nkept.count()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=20}\n\n::: {.ansi-escaped-output}\n```{=html}\n<pre>┌──────────┐\n│ <span class=\"ansi-cyan-fg ansi-bold\">21730730</span> │\n└──────────┘</pre>\n```\n:::\n\n:::\n:::\n\n\n### Committees Data\n\nThe contributions only list an opaque `CMTE_ID` column. We want to know which actual\ncommittee this is. Let's load the committees table so we can lookup from\ncommittee ID to committee name.\n\n::: {#ae8760f6 .cell execution_count=8}\n``` {.python .cell-code}\ndef read_committees():\n    committees_url = \"https://cg-519a459a-0ea3-42c2-b7bc-fa1143481f74.s3-us-gov-west-1.amazonaws.com/bulk-downloads/2018/committee_summary_2018.csv\"\n    # This just creates a view, it doesn't actually fetch the data yet\n    tmp = ibis.read_csv(committees_url)\n    tmp = tmp[\"CMTE_ID\", \"CMTE_NM\"]\n    # The raw table contains multiple rows for each committee id, so lets pick\n    # an arbitrary row for each committee id as the representative name.\n    deduped = tmp.group_by(\"CMTE_ID\").agg(CMTE_NM=_.CMTE_NM.arbitrary())\n    return deduped\n\n\ncomms = read_committees().cache()\ncomms\n```\n\n::: {.cell-output .cell-output-display execution_count=21}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> CMTE_ID   </span>┃<span style=\"font-weight: bold\"> CMTE_NM                                                        </span>┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>                                                         │\n├───────────┼────────────────────────────────────────────────────────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00659441</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">JASON ORTITAY FOR CONGRESS                                    </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00297911</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">TEXAS FORESTRY ASSOCIATION FORESTRY POLITICAL ACTION COMMITTEE</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00340745</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">WADDELL &amp; REED FINANCIAL, INC. POLITICAL ACTION COMMITTEE     </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00679217</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CANTWELL-WARREN VICTORY FUND                                  </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00101204</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NATIONAL FISHERIES INSTITUTE (FISHPAC)                        </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00010520</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MEREDITH CORPORATION EMPLOYEES FUND FOR BETTER GOVERNMENT     </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00532788</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">LAFAYETTE COUNTY DEMOCRATIC PARTY                             </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00128561</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">TOLL BROS. INC. PAC                                           </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00510958</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">WENDYROGERS.ORG                                               </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00665604</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">COMMITTEE TO ELECT BILL EBBEN                                 </span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                                                              │\n└───────────┴────────────────────────────────────────────────────────────────┘\n</pre>\n```\n:::\n:::\n\n\nNow add the committee name to the contributions table:\n\n::: {#8fe204d4 .cell execution_count=9}\n``` {.python .cell-code}\ntogether = kept.left_join(comms, \"CMTE_ID\").drop(\"CMTE_ID\", \"CMTE_ID_right\")\ntogether\n```\n\n::: {.cell-output .cell-output-display execution_count=22}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> TRANSACTION_PGI </span>┃<span style=\"font-weight: bold\"> ENTITY_TP </span>┃<span style=\"font-weight: bold\"> CITY             </span>┃<span style=\"font-weight: bold\"> STATE  </span>┃<span style=\"font-weight: bold\"> TRANSACTION_DT </span>┃<span style=\"font-weight: bold\"> TRANSACTION_AMT </span>┃<span style=\"font-weight: bold\"> CMTE_NM                                         </span>┃\n┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>           │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>           │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>                                          │\n├─────────────────┼───────────┼──────────────────┼────────┼────────────────┼─────────────────┼─────────────────────────────────────────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">COHASSET        </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MA    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">01312017      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">230</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">KEY LARGO       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">01042017      </span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5000</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">LOOKOUT MOUNTAIN</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">GA    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">01312017      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">230</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NORTH YARMOUTH  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">01312017      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ALPHARETTA      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">GA    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">01312017      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FALMOUTH        </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">01312017      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FALMOUTH        </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">01312017      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">HOLLIS CENTER   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">01312017      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FALMOUTH        </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">01312017      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ALEXANDRIA      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">VA    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">01312017      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>               │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>              │               <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                                               │\n└─────────────────┴───────────┴──────────────────┴────────┴────────────────┴─────────────────┴─────────────────────────────────────────────────┘\n</pre>\n```\n:::\n:::\n\n\n## Cleaning\n\nFirst, let's drop any contributions that don't have a committee name. There are only 6 of them.\n\n::: {#215670b2 .cell execution_count=10}\n``` {.python .cell-code}\n# We can do this fearlessly, no .copy() needed, because\n# everything in Ibis is immutable. If we did this in pandas,\n# we might start modifying the original DataFrame accidentally!\ncleaned = together\n\nhas_name = cleaned.CMTE_NM.notnull()\ncleaned = cleaned[has_name]\nhas_name.value_counts()\n```\n\n::: {.cell-output .cell-output-display execution_count=23}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> NotNull(CMTE_NM) </span>┃<span style=\"font-weight: bold\"> NotNull(CMTE_NM)_count </span>┃\n┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">boolean</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>                  │\n├──────────────────┼────────────────────────┤\n│ True             │               <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">21730724</span> │\n│ False            │                      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6</span> │\n└──────────────────┴────────────────────────┘\n</pre>\n```\n:::\n:::\n\n\nLet's look at the `ENTITY_TP` column. This represents the type of entity that\nmade the contribution:\n\n::: {#8e39507b .cell execution_count=11}\n``` {.python .cell-code}\ntogether.ENTITY_TP.value_counts()\n```\n\n::: {.cell-output .cell-output-display execution_count=24}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> ENTITY_TP </span>┃<span style=\"font-weight: bold\"> ENTITY_TP_count </span>┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>           │\n├───────────┼─────────────────┤\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>      │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5289</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">CAN      </span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">13659</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">COM      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">867</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">21687992</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">ORG      </span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">18555</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">PAC      </span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3621</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">PTY      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">49</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">CCM      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">698</span> │\n└───────────┴─────────────────┘\n</pre>\n```\n:::\n:::\n\n\nWe only care about contributions from individuals.\n\nOnce we filter on this column, the contents of it are irrelevant, so let's drop it.\n\n::: {#e1453e27 .cell execution_count=12}\n``` {.python .cell-code}\ncleaned = together[_.ENTITY_TP == \"IND\"].drop(\"ENTITY_TP\")\n```\n:::\n\n\nIt looks like the `TRANSACTION_DT` column was a raw string like \"MMDDYYYY\",\nso let's convert that to a proper date type.\n\n::: {#bf3dadc7 .cell execution_count=13}\n``` {.python .cell-code}\nfrom ibis.expr.types import StringValue, DateValue\n\n\ndef mmddyyyy_to_date(val: StringValue) -> DateValue:\n    return val.cast(str).lpad(8, \"0\").to_timestamp(\"%m%d%Y\").date()\n\n\ncleaned = cleaned.mutate(date=mmddyyyy_to_date(_.TRANSACTION_DT)).drop(\"TRANSACTION_DT\")\ncleaned\n```\n\n::: {.cell-output .cell-output-display execution_count=26}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> TRANSACTION_PGI </span>┃<span style=\"font-weight: bold\"> CITY             </span>┃<span style=\"font-weight: bold\"> STATE  </span>┃<span style=\"font-weight: bold\"> TRANSACTION_AMT </span>┃<span style=\"font-weight: bold\"> CMTE_NM                                         </span>┃<span style=\"font-weight: bold\"> date       </span>┃\n┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>           │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>           │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>                                          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">date</span>       │\n├─────────────────┼──────────────────┼────────┼─────────────────┼─────────────────────────────────────────────────┼────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">COHASSET        </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MA    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">230</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-01-31</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">KEY LARGO       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5000</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-01-04</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">LOOKOUT MOUNTAIN</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">GA    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">230</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-01-31</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NORTH YARMOUTH  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-01-31</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ALPHARETTA      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">GA    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-01-31</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FALMOUTH        </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-01-31</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FALMOUTH        </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-01-31</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">HOLLIS CENTER   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-01-31</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FALMOUTH        </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-01-31</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ALEXANDRIA      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">VA    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">UNUM GROUP POLITICAL ACTION COMMITTEE (UNUMPAC)</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-01-31</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>               │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │               <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                                               │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>          │\n└─────────────────┴──────────────────┴────────┴─────────────────┴─────────────────────────────────────────────────┴────────────┘\n</pre>\n```\n:::\n:::\n\n\nThe `TRANSACTION_PGI` column represents the type (primary, general, etc) of election,\nand the year. But it seems to be not very consistent:\n\n::: {#6cb98e2b .cell execution_count=14}\n``` {.python .cell-code}\ncleaned.TRANSACTION_PGI.topk(10)\n```\n\n::: {.cell-output .cell-output-display execution_count=27}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> TRANSACTION_PGI </span>┃<span style=\"font-weight: bold\"> CountStar() </span>┃\n┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>       │\n├─────────────────┼─────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">17013596</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">G2018          </span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2095123</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P2018          </span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1677183</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P2020          </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">208501</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">O2018          </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">161874</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">S2017          </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">124336</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">G2017          </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">98401</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P2022          </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">91136</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P2017          </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">61153</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">R2017          </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">54281</span> │\n└─────────────────┴─────────────┘\n</pre>\n```\n:::\n:::\n\n\n::: {#463caa6b .cell execution_count=15}\n``` {.python .cell-code}\ndef get_election_type(pgi: StringValue) -> StringValue:\n    \"\"\"Use the first letter of the TRANSACTION_PGI column to determine the election type\n\n    If the first letter is not one of the known election stage, then return null.\n    \"\"\"\n    election_types = {\n        \"P\": \"primary\",\n        \"G\": \"general\",\n        \"O\": \"other\",\n        \"C\": \"convention\",\n        \"R\": \"runoff\",\n        \"S\": \"special\",\n        \"E\": \"recount\",\n    }\n    first_letter = pgi[0]\n    return first_letter.substitute(election_types, else_=ibis.null())\n\n\ncleaned = cleaned.mutate(election_type=get_election_type(_.TRANSACTION_PGI)).drop(\n    \"TRANSACTION_PGI\"\n)\ncleaned\n```\n\n::: {.cell-output .cell-output-display execution_count=28}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> CITY       </span>┃<span style=\"font-weight: bold\"> STATE  </span>┃<span style=\"font-weight: bold\"> TRANSACTION_AMT </span>┃<span style=\"font-weight: bold\"> CMTE_NM                   </span>┃<span style=\"font-weight: bold\"> date       </span>┃<span style=\"font-weight: bold\"> election_type </span>┃\n┡━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>     │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>           │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>                    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">date</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │\n├────────────┼────────┼─────────────────┼───────────────────────────┼────────────┼───────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">ATLANTA   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">GA    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NANCY PELOSI FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-06-20</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">AUSTIN    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">TX    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NANCY PELOSI FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-06-04</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">WASHINGTON</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">DC    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NANCY PELOSI FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-06-23</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">HONOLULU  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">HI    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NANCY PELOSI FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-04-20</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">MAMARONECK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NY    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">110</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NANCY PELOSI FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-06-02</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">REHOBOTH  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MA    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NANCY PELOSI FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-06-01</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">BERKELEY  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CA    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NANCY PELOSI FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-06-05</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">BEAUMONT  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">TX    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NANCY PELOSI FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-04-12</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">CONCORD   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MA    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">200</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NANCY PELOSI FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-05-05</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">OXNARD    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CA    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NANCY PELOSI FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-03-31</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │               <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>             │\n└────────────┴────────┴─────────────────┴───────────────────────────┴────────────┴───────────────┘\n</pre>\n```\n:::\n:::\n\n\nThat worked well! There are 0 nulls in the resulting column, so we always were\nable to determine the election type.\n\n::: {#ead49c9e .cell execution_count=16}\n``` {.python .cell-code}\ncleaned.election_type.topk(10)\n```\n\n::: {.cell-output .cell-output-display execution_count=29}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> election_type </span>┃<span style=\"font-weight: bold\"> CountStar() </span>┃\n┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>       │\n├───────────────┼─────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">19061953</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2216685</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">other        </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">161965</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">special      </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">149572</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">runoff       </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">69637</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">convention   </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">22453</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">recount      </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5063</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>          │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">664</span> │\n└───────────────┴─────────────┘\n</pre>\n```\n:::\n:::\n\n\nAbout 1/20 of transactions are negative. These could represent refunds, or they\ncould be data entry errors. Let's drop them to keep it simple.\n\n::: {#ee56a3f3 .cell execution_count=17}\n``` {.python .cell-code}\nabove_zero = cleaned.TRANSACTION_AMT > 0\ncleaned = cleaned[above_zero]\nabove_zero.value_counts()\n```\n\n::: {.cell-output .cell-output-display execution_count=30}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> Greater(TRANSACTION_AMT, 0) </span>┃<span style=\"font-weight: bold\"> Greater(TRANSACTION_AMT, 0)_count </span>┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">boolean</span>                     │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>                             │\n├─────────────────────────────┼───────────────────────────────────┤\n│ True                        │                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">20669809</span> │\n│ False                       │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1018183</span> │\n└─────────────────────────────┴───────────────────────────────────┘\n</pre>\n```\n:::\n:::\n\n\n## Adding Features\n\nNow that the data is cleaned up to a usable format, let's add some features.\n\nFirst, it's useful to categorize donations by size, placing them into buckets\nof small, medium, large, etc.\n\n::: {#0ccc57df .cell execution_count=18}\n``` {.python .cell-code}\nedges = [\n    10,\n    50,\n    100,\n    500,\n    1000,\n    5000,\n]\nlabels = [\n    \"<10\",\n    \"10-50\",\n    \"50-100\",\n    \"100-500\",\n    \"500-1000\",\n    \"1000-5000\",\n    \"5000+\",\n]\n\n\ndef bucketize(vals, edges, str_labels):\n    # Uses Ibis's .bucket() method to create a categorical column\n    int_labels = vals.bucket(edges, include_under=True, include_over=True)\n    # Map the integer labels to the string labels\n    int_to_str = {str(i): s for i, s in enumerate(str_labels)}\n    return int_labels.cast(str).substitute(int_to_str)\n\n\nfeatured = cleaned.mutate(amount_bucket=bucketize(_.TRANSACTION_AMT, edges, labels))\nfeatured\n```\n\n::: {.cell-output .cell-output-display execution_count=31}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> CITY         </span>┃<span style=\"font-weight: bold\"> STATE  </span>┃<span style=\"font-weight: bold\"> TRANSACTION_AMT </span>┃<span style=\"font-weight: bold\"> CMTE_NM               </span>┃<span style=\"font-weight: bold\"> date       </span>┃<span style=\"font-weight: bold\"> election_type </span>┃<span style=\"font-weight: bold\"> amount_bucket </span>┃\n┡━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>           │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>                │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">date</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │\n├──────────────┼────────┼─────────────────┼───────────────────────┼────────────┼───────────────┼───────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">REMINGTON   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IN    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">50</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">AMERICA'S LIBERTY PAC</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-05-30</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">50-100       </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">REMINGTON   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IN    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">50</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">AMERICA'S LIBERTY PAC</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-06-05</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">50-100       </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">VANCOUVER   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">WA    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">AMERICA'S LIBERTY PAC</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-06-07</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">SOLANA BEACH</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CA    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">AMERICA'S LIBERTY PAC</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-06-26</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">500-1000     </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">HILLSDALE   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MI    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">AMERICA'S LIBERTY PAC</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-05-15</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">MIDDLEBURY  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">VT    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NBT PAC FEDERAL FUND </span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-06-05</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">500-1000     </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">WILLISTON   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">VT    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NBT PAC FEDERAL FUND </span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-05-30</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">500-1000     </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">GLENMONT    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NY    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">350</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NBT PAC FEDERAL FUND </span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-06-01</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">NORWICH     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NY    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NBT PAC FEDERAL FUND </span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-05-31</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">CLIFTON PARK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NY    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NBT PAC FEDERAL FUND </span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-06-26</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>            │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │               <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                     │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>             │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>             │\n└──────────────┴────────┴─────────────────┴───────────────────────┴────────────┴───────────────┴───────────────┘\n</pre>\n```\n:::\n:::\n\n\n## Analysis\n\n### By donation size\n\nOne thing we can look at is the donation breakdown by size:\n- Are most donations small or large?\n- Where do politicians/committees get most of their money from? Large or small donations?\n\nWe also will compare performance of Ibis vs pandas during this groupby.\n\n::: {#6c9dae32 .cell execution_count=19}\n``` {.python .cell-code}\ndef summary_by(table, by):\n    return table.group_by(by).agg(\n        n_donations=_.count(),\n        total_amount=_.TRANSACTION_AMT.sum(),\n        mean_amount=_.TRANSACTION_AMT.mean(),\n        median_amount=_.TRANSACTION_AMT.approx_median(),\n    )\n\n\ndef summary_by_pandas(df, by):\n    return df.groupby(by, as_index=False).agg(\n        n_donations=(\"election_type\", \"count\"),\n        total_amount=(\"TRANSACTION_AMT\", \"sum\"),\n        mean_amount=(\"TRANSACTION_AMT\", \"mean\"),\n        median_amount=(\"TRANSACTION_AMT\", \"median\"),\n    )\n\n\n# persist the input data so the following timings of the group_by are accurate.\nsubset = featured[\"election_type\", \"amount_bucket\", \"TRANSACTION_AMT\"]\nsubset = subset.cache()\npandas_subset = subset.execute()\n```\n:::\n\n\nLet's take a look at what we are actually computing:\n\n::: {#1b310e3e .cell execution_count=20}\n``` {.python .cell-code}\nby_type_and_bucket = summary_by(subset, [\"election_type\", \"amount_bucket\"])\nby_type_and_bucket\n```\n\n::: {.cell-output .cell-output-display execution_count=33}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> election_type </span>┃<span style=\"font-weight: bold\"> amount_bucket </span>┃<span style=\"font-weight: bold\"> n_donations </span>┃<span style=\"font-weight: bold\"> total_amount </span>┃<span style=\"font-weight: bold\"> mean_amount  </span>┃<span style=\"font-weight: bold\"> median_amount </span>┃\n┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>         │\n├───────────────┼───────────────┼─────────────┼──────────────┼──────────────┼───────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">500-1000     </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">634677</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">334630687</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">527.245649</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">5000+        </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3125</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">44496373</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">14238.839360</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7537</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">special      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">500-1000     </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7811</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4003293</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">512.519908</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">runoff       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">18193</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3088289</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">169.751498</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">convention   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">500-1000     </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1824</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">945321</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">518.268092</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">&lt;10          </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">115873</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">536742</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4.632158</span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">50-100       </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">304363</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">16184312</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">53.174374</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">50</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">1000-5000    </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">246101</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">460025242</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1869.253851</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1978</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">10-50        </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">660787</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">14411588</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">21.809733</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">other        </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">500-1000     </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">119</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">62535</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">525.504202</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>             │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>             │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │            <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │            <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │             <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │\n└───────────────┴───────────────┴─────────────┴──────────────┴──────────────┴───────────────┘\n</pre>\n```\n:::\n:::\n\n\nOK, now let's do our timings.\n\nOne interesting thing to pay attention to here is the execution time for the following\ngroupby. Before, we could get away with lazy execution: because we only wanted to preview\nthe first few rows, we only had to compute the first few rows, so all our previews were\nvery fast.\n\nBut now, as soon as we do a groupby, we have to actually go through the whole dataset\nin order to compute the aggregate per group. So this is going to be slower. BUT,\nduckdb is still quite fast. It only takes milliseconds to groupby-agg all 20 million rows!\n\n::: {#32424707 .cell execution_count=21}\n``` {.python .cell-code}\n%timeit summary_by(subset, [\"election_type\", \"amount_bucket\"]).execute()  # .execute() so we actually fetch the data\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n161 ms ± 4.75 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n```\n:::\n:::\n\n\nNow let's try the same thing in pandas:\n\n::: {#cc653b7f .cell execution_count=22}\n``` {.python .cell-code}\n%timeit summary_by_pandas(pandas_subset, [\"election_type\", \"amount_bucket\"])\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n2.19 s ± 6.54 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n```\n:::\n:::\n\n\nIt takes about 4 seconds, which is about 10 times slower than duckdb.\nAt this scale, it again doesn't matter,\nbut you could imagine with a dataset much larger than this, it would matter.\n\nLet's also think about memory usage:\n\n::: {#c967896c .cell execution_count=23}\n``` {.python .cell-code}\npandas_subset.memory_usage(deep=True).sum() / 1e9  # GB\n```\n\n::: {.cell-output .cell-output-display execution_count=36}\n```\n2.782586667\n```\n:::\n:::\n\n\nThe source dataframe is couple gigabytes, so probably during the groupby,\nthe peak memory usage is going to be a bit higher than this. You could use a profiler\nsuch as [FIL](https://github.com/pythonspeed/filprofiler) if you wanted an exact number,\nI was too lazy to use that here.\n\nAgain, this works on my laptop at this dataset size, but much larger than this and I'd\nstart having problems. Duckdb on the other hand is designed around working out of core\nso it should scale to datasets into the hundreds of gigabytes, much larger than your\ncomputer's RAM.\n\n### Back to analysis\n\nOK, let's plot the result of that groupby.\n\nSurprise! (Or maybe not...) Most donations are small. But most of the money comes\nfrom donations larger than $1000.\n\nWell if that's the case, why do politicians spend so much time soliciting small\ndonations? One explanation is that they can use the number of donations\nas a marketing pitch, to show how popular they are, and thus how viable of a\ncandidate they are.\n\nThis also might explain whose interests are being served by our politicians.\n\n::: {#6808107a .cell execution_count=24}\n``` {.python .cell-code}\nimport altair as alt\n\n# Do some bookkeeping so the buckets are displayed smallest to largest on the charts\nbucket_col = alt.Column(\"amount_bucket:N\", sort=labels)\n\nn_by_bucket = (\n    alt.Chart(by_type_and_bucket.execute())\n    .mark_bar()\n    .encode(\n        x=bucket_col,\n        y=\"n_donations:Q\",\n        color=\"election_type:N\",\n    )\n)\ntotal_by_bucket = (\n    alt.Chart(by_type_and_bucket.execute())\n    .mark_bar()\n    .encode(\n        x=bucket_col,\n        y=\"total_amount:Q\",\n        color=\"election_type:N\",\n    )\n)\nn_by_bucket | total_by_bucket\n```\n\n::: {.cell-output .cell-output-display execution_count=37}\n```{=html}\n\n<style>\n  #altair-viz-6fbf17b0e95f4f8c9babe5bb35792a50.vega-embed {\n    width: 100%;\n    display: flex;\n  }\n\n  #altair-viz-6fbf17b0e95f4f8c9babe5bb35792a50.vega-embed details,\n  #altair-viz-6fbf17b0e95f4f8c9babe5bb35792a50.vega-embed details summary {\n    position: relative;\n  }\n</style>\n<div id=\"altair-viz-6fbf17b0e95f4f8c9babe5bb35792a50\"></div>\n<script type=\"text/javascript\">\n  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n  (function(spec, embedOpt){\n    let outputDiv = document.currentScript.previousElementSibling;\n    if (outputDiv.id !== \"altair-viz-6fbf17b0e95f4f8c9babe5bb35792a50\") {\n      outputDiv = document.getElementById(\"altair-viz-6fbf17b0e95f4f8c9babe5bb35792a50\");\n    }\n    const paths = {\n      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.16.3?noext\",\n      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n    };\n\n    function maybeLoadScript(lib, version) {\n      var key = `${lib.replace(\"-\", \"\")}_version`;\n      return (VEGA_DEBUG[key] == version) ?\n        Promise.resolve(paths[lib]) :\n        new Promise(function(resolve, reject) {\n          var s = document.createElement('script');\n          document.getElementsByTagName(\"head\")[0].appendChild(s);\n          s.async = true;\n          s.onload = () => {\n            VEGA_DEBUG[key] = version;\n            return resolve(paths[lib]);\n          };\n          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n          s.src = paths[lib];\n        });\n    }\n\n    function showError(err) {\n      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n      throw err;\n    }\n\n    function displayChart(vegaEmbed) {\n      vegaEmbed(outputDiv, spec, embedOpt)\n        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n    }\n\n    if(typeof define === \"function\" && define.amd) {\n      requirejs.config({paths});\n      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n    } else {\n      maybeLoadScript(\"vega\", \"5\")\n        .then(() => maybeLoadScript(\"vega-lite\", \"5.16.3\"))\n        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n        .catch(showError)\n        .then(() => displayChart(vegaEmbed));\n    }\n  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"hconcat\": [{\"data\": {\"name\": \"data-ec402682d040f07539df5cc760e76274\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"color\": {\"field\": \"election_type\", \"type\": \"nominal\"}, \"x\": {\"field\": \"amount_bucket\", \"sort\": [\"<10\", \"10-50\", \"50-100\", \"100-500\", \"500-1000\", \"1000-5000\", \"5000+\"], \"type\": \"nominal\"}, \"y\": {\"field\": \"n_donations\", \"type\": \"quantitative\"}}}, {\"data\": {\"name\": \"data-c12eefc8ce67300e6225801f6dacde98\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"color\": {\"field\": \"election_type\", \"type\": \"nominal\"}, \"x\": {\"field\": \"amount_bucket\", \"sort\": [\"<10\", \"10-50\", \"50-100\", \"100-500\", \"500-1000\", \"1000-5000\", \"5000+\"], \"type\": \"nominal\"}, \"y\": {\"field\": \"total_amount\", \"type\": \"quantitative\"}}}], \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.16.3.json\", \"datasets\": {\"data-ec402682d040f07539df5cc760e76274\": [{\"election_type\": \"general\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 246101, \"total_amount\": 460025242, \"mean_amount\": 1869.2538510611496, \"median_amount\": 1965}, {\"election_type\": \"general\", \"amount_bucket\": \"10-50\", \"n_donations\": 660787, \"total_amount\": 14411588, \"mean_amount\": 21.809732939661345, \"median_amount\": 25}, {\"election_type\": \"general\", \"amount_bucket\": \"50-100\", \"n_donations\": 304363, \"total_amount\": 16184312, \"mean_amount\": 53.174374020495264, \"median_amount\": 50}, {\"election_type\": \"general\", \"amount_bucket\": \"<10\", \"n_donations\": 115873, \"total_amount\": 536742, \"mean_amount\": 4.632157620843509, \"median_amount\": 5}, {\"election_type\": \"runoff\", \"amount_bucket\": \"5000+\", \"n_donations\": 37, \"total_amount\": 211400, \"mean_amount\": 5713.513513513513, \"median_amount\": 5400}, {\"election_type\": \"other\", \"amount_bucket\": \"500-1000\", \"n_donations\": 119, \"total_amount\": 62535, \"mean_amount\": 525.5042016806723, \"median_amount\": 500}, {\"election_type\": \"special\", \"amount_bucket\": \"500-1000\", \"n_donations\": 7811, \"total_amount\": 4003293, \"mean_amount\": 512.5199078223019, \"median_amount\": 500}, {\"election_type\": \"convention\", \"amount_bucket\": \"500-1000\", \"n_donations\": 1824, \"total_amount\": 945321, \"mean_amount\": 518.2680921052631, \"median_amount\": 500}, {\"election_type\": \"runoff\", \"amount_bucket\": \"100-500\", \"n_donations\": 18193, \"total_amount\": 3088289, \"mean_amount\": 169.75149782883526, \"median_amount\": 101}, {\"election_type\": \"general\", \"amount_bucket\": \"100-500\", \"n_donations\": 700821, \"total_amount\": 123174568, \"mean_amount\": 175.75753009684357, \"median_amount\": 149}, {\"election_type\": null, \"amount_bucket\": \"500-1000\", \"n_donations\": 89, \"total_amount\": 48290, \"mean_amount\": 542.5842696629213, \"median_amount\": 500}, {\"election_type\": \"general\", \"amount_bucket\": \"500-1000\", \"n_donations\": 174182, \"total_amount\": 91015697, \"mean_amount\": 522.5321617618354, \"median_amount\": 500}, {\"election_type\": \"primary\", \"amount_bucket\": \"5000+\", \"n_donations\": 44085, \"total_amount\": 1558371116, \"mean_amount\": 35349.237064761255, \"median_amount\": 10000}, {\"election_type\": \"recount\", \"amount_bucket\": \"5000+\", \"n_donations\": 26, \"total_amount\": 1888024, \"mean_amount\": 72616.30769230769, \"median_amount\": 101450}, {\"election_type\": null, \"amount_bucket\": \"100-500\", \"n_donations\": 195, \"total_amount\": 46746, \"mean_amount\": 239.72307692307692, \"median_amount\": 250}, {\"election_type\": \"other\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 235, \"total_amount\": 548212, \"mean_amount\": 2332.817021276596, \"median_amount\": 2633}, {\"election_type\": \"other\", \"amount_bucket\": \"50-100\", \"n_donations\": 451, \"total_amount\": 27149, \"mean_amount\": 60.19733924611973, \"median_amount\": 50}, {\"election_type\": \"other\", \"amount_bucket\": \"10-50\", \"n_donations\": 2644, \"total_amount\": 64297, \"mean_amount\": 24.318078668683814, \"median_amount\": 23}, {\"election_type\": \"other\", \"amount_bucket\": \"<10\", \"n_donations\": 10993, \"total_amount\": 25816, \"mean_amount\": 2.3484035295187846, \"median_amount\": 1}, {\"election_type\": \"special\", \"amount_bucket\": \"5000+\", \"n_donations\": 129, \"total_amount\": 788712, \"mean_amount\": 6114.046511627907, \"median_amount\": 5400}, {\"election_type\": null, \"amount_bucket\": \"1000-5000\", \"n_donations\": 116, \"total_amount\": 228657, \"mean_amount\": 1971.1810344827586, \"median_amount\": 1300}, {\"election_type\": \"convention\", \"amount_bucket\": \"5000+\", \"n_donations\": 219, \"total_amount\": 1590300, \"mean_amount\": 7261.643835616438, \"median_amount\": 8100}, {\"election_type\": \"other\", \"amount_bucket\": \"100-500\", \"n_donations\": 630, \"total_amount\": 117988, \"mean_amount\": 187.2825396825397, \"median_amount\": 192}, {\"election_type\": null, \"amount_bucket\": \"<10\", \"n_donations\": 24, \"total_amount\": 108, \"mean_amount\": 4.5, \"median_amount\": 5}, {\"election_type\": null, \"amount_bucket\": \"10-50\", \"n_donations\": 151, \"total_amount\": 3167, \"mean_amount\": 20.973509933774835, \"median_amount\": 25}, {\"election_type\": null, \"amount_bucket\": \"50-100\", \"n_donations\": 36, \"total_amount\": 1880, \"mean_amount\": 52.22222222222222, \"median_amount\": 50}, {\"election_type\": \"primary\", \"amount_bucket\": \"500-1000\", \"n_donations\": 634677, \"total_amount\": 334630687, \"mean_amount\": 527.2456493618014, \"median_amount\": 500}, {\"election_type\": \"runoff\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 5196, \"total_amount\": 9601993, \"mean_amount\": 1847.958622016936, \"median_amount\": 1913}, {\"election_type\": \"runoff\", \"amount_bucket\": \"10-50\", \"n_donations\": 20166, \"total_amount\": 461107, \"mean_amount\": 22.865565803828225, \"median_amount\": 25}, {\"election_type\": \"general\", \"amount_bucket\": \"5000+\", \"n_donations\": 3125, \"total_amount\": 44496373, \"mean_amount\": 14238.83936, \"median_amount\": 7534}, {\"election_type\": \"runoff\", \"amount_bucket\": \"<10\", \"n_donations\": 10191, \"total_amount\": 49621, \"mean_amount\": 4.869100186439015, \"median_amount\": 5}, {\"election_type\": \"runoff\", \"amount_bucket\": \"50-100\", \"n_donations\": 11578, \"total_amount\": 585827, \"mean_amount\": 50.59828986007946, \"median_amount\": 50}, {\"election_type\": \"recount\", \"amount_bucket\": \"500-1000\", \"n_donations\": 494, \"total_amount\": 250960, \"mean_amount\": 508.0161943319838, \"median_amount\": 500}, {\"election_type\": \"primary\", \"amount_bucket\": \"100-500\", \"n_donations\": 3636287, \"total_amount\": 637353634, \"mean_amount\": 175.27594329050484, \"median_amount\": 149}, {\"election_type\": \"convention\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 2822, \"total_amount\": 4977314, \"mean_amount\": 1763.7540751240256, \"median_amount\": 1440}, {\"election_type\": \"special\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 7935, \"total_amount\": 13493154, \"mean_amount\": 1700.4604914933836, \"median_amount\": 1000}, {\"election_type\": \"convention\", \"amount_bucket\": \"50-100\", \"n_donations\": 2966, \"total_amount\": 153281, \"mean_amount\": 51.67936614969656, \"median_amount\": 50}, {\"election_type\": \"special\", \"amount_bucket\": \"10-50\", \"n_donations\": 51066, \"total_amount\": 1134616, \"mean_amount\": 22.21861904202405, \"median_amount\": 25}, {\"election_type\": \"special\", \"amount_bucket\": \"<10\", \"n_donations\": 25115, \"total_amount\": 122898, \"mean_amount\": 4.893410312562214, \"median_amount\": 5}, {\"election_type\": \"special\", \"amount_bucket\": \"50-100\", \"n_donations\": 22859, \"total_amount\": 1177660, \"mean_amount\": 51.518439126820944, \"median_amount\": 50}, {\"election_type\": null, \"amount_bucket\": \"5000+\", \"n_donations\": 48, \"total_amount\": 1622455, \"mean_amount\": 33801.145833333336, \"median_amount\": 21731}, {\"election_type\": \"convention\", \"amount_bucket\": \"10-50\", \"n_donations\": 6848, \"total_amount\": 141604, \"mean_amount\": 20.678154205607477, \"median_amount\": 25}, {\"election_type\": \"convention\", \"amount_bucket\": \"<10\", \"n_donations\": 945, \"total_amount\": 4660, \"mean_amount\": 4.931216931216931, \"median_amount\": 5}, {\"election_type\": \"recount\", \"amount_bucket\": \"100-500\", \"n_donations\": 2232, \"total_amount\": 413753, \"mean_amount\": 185.37320788530465, \"median_amount\": 200}, {\"election_type\": \"primary\", \"amount_bucket\": \"<10\", \"n_donations\": 2423728, \"total_amount\": 10080721, \"mean_amount\": 4.159179990493983, \"median_amount\": 5}, {\"election_type\": \"primary\", \"amount_bucket\": \"10-50\", \"n_donations\": 8115403, \"total_amount\": 187666251, \"mean_amount\": 23.12469892129818, \"median_amount\": 25}, {\"election_type\": \"primary\", \"amount_bucket\": \"50-100\", \"n_donations\": 2663933, \"total_amount\": 155426540, \"mean_amount\": 58.34476317535013, \"median_amount\": 50}, {\"election_type\": \"primary\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 684755, \"total_amount\": 1231394874, \"mean_amount\": 1798.2999379340056, \"median_amount\": 1004}, {\"election_type\": \"special\", \"amount_bucket\": \"100-500\", \"n_donations\": 34497, \"total_amount\": 5943498, \"mean_amount\": 172.29028611183583, \"median_amount\": 118}, {\"election_type\": \"convention\", \"amount_bucket\": \"100-500\", \"n_donations\": 6350, \"total_amount\": 1097843, \"mean_amount\": 172.88866141732282, \"median_amount\": 138}, {\"election_type\": \"runoff\", \"amount_bucket\": \"500-1000\", \"n_donations\": 4117, \"total_amount\": 2110393, \"mean_amount\": 512.6045664318679, \"median_amount\": 500}, {\"election_type\": \"recount\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 589, \"total_amount\": 1113150, \"mean_amount\": 1889.8981324278438, \"median_amount\": 1965}, {\"election_type\": \"recount\", \"amount_bucket\": \"10-50\", \"n_donations\": 883, \"total_amount\": 20860, \"mean_amount\": 23.62400906002265, \"median_amount\": 25}, {\"election_type\": \"recount\", \"amount_bucket\": \"50-100\", \"n_donations\": 712, \"total_amount\": 38450, \"mean_amount\": 54.002808988764045, \"median_amount\": 50}, {\"election_type\": \"recount\", \"amount_bucket\": \"<10\", \"n_donations\": 110, \"total_amount\": 569, \"mean_amount\": 5.172727272727273, \"median_amount\": 5}, {\"election_type\": \"other\", \"amount_bucket\": \"5000+\", \"n_donations\": 48, \"total_amount\": 1901300, \"mean_amount\": 39610.416666666664, \"median_amount\": 16950}], \"data-c12eefc8ce67300e6225801f6dacde98\": [{\"election_type\": \"primary\", \"amount_bucket\": \"500-1000\", \"n_donations\": 634677, \"total_amount\": 334630687, \"mean_amount\": 527.2456493618014, \"median_amount\": 500}, {\"election_type\": \"general\", \"amount_bucket\": \"5000+\", \"n_donations\": 3125, \"total_amount\": 44496373, \"mean_amount\": 14238.83936, \"median_amount\": 7527}, {\"election_type\": \"runoff\", \"amount_bucket\": \"10-50\", \"n_donations\": 20166, \"total_amount\": 461107, \"mean_amount\": 22.865565803828225, \"median_amount\": 25}, {\"election_type\": \"runoff\", \"amount_bucket\": \"50-100\", \"n_donations\": 11578, \"total_amount\": 585827, \"mean_amount\": 50.59828986007946, \"median_amount\": 50}, {\"election_type\": \"runoff\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 5196, \"total_amount\": 9601993, \"mean_amount\": 1847.958622016936, \"median_amount\": 1897}, {\"election_type\": \"recount\", \"amount_bucket\": \"500-1000\", \"n_donations\": 494, \"total_amount\": 250960, \"mean_amount\": 508.0161943319838, \"median_amount\": 500}, {\"election_type\": \"runoff\", \"amount_bucket\": \"<10\", \"n_donations\": 10191, \"total_amount\": 49621, \"mean_amount\": 4.869100186439015, \"median_amount\": 5}, {\"election_type\": \"special\", \"amount_bucket\": \"500-1000\", \"n_donations\": 7811, \"total_amount\": 4003293, \"mean_amount\": 512.5199078223019, \"median_amount\": 500}, {\"election_type\": \"runoff\", \"amount_bucket\": \"100-500\", \"n_donations\": 18193, \"total_amount\": 3088289, \"mean_amount\": 169.75149782883526, \"median_amount\": 101}, {\"election_type\": \"convention\", \"amount_bucket\": \"500-1000\", \"n_donations\": 1824, \"total_amount\": 945321, \"mean_amount\": 518.2680921052631, \"median_amount\": 500}, {\"election_type\": \"primary\", \"amount_bucket\": \"100-500\", \"n_donations\": 3636287, \"total_amount\": 637353634, \"mean_amount\": 175.27594329050484, \"median_amount\": 149}, {\"election_type\": \"special\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 7935, \"total_amount\": 13493154, \"mean_amount\": 1700.4604914933836, \"median_amount\": 1000}, {\"election_type\": \"special\", \"amount_bucket\": \"<10\", \"n_donations\": 25115, \"total_amount\": 122898, \"mean_amount\": 4.893410312562214, \"median_amount\": 5}, {\"election_type\": \"special\", \"amount_bucket\": \"10-50\", \"n_donations\": 51066, \"total_amount\": 1134616, \"mean_amount\": 22.21861904202405, \"median_amount\": 25}, {\"election_type\": \"special\", \"amount_bucket\": \"50-100\", \"n_donations\": 22859, \"total_amount\": 1177660, \"mean_amount\": 51.518439126820944, \"median_amount\": 50}, {\"election_type\": \"convention\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 2822, \"total_amount\": 4977314, \"mean_amount\": 1763.7540751240256, \"median_amount\": 1441}, {\"election_type\": null, \"amount_bucket\": \"5000+\", \"n_donations\": 48, \"total_amount\": 1622455, \"mean_amount\": 33801.145833333336, \"median_amount\": 21731}, {\"election_type\": \"recount\", \"amount_bucket\": \"100-500\", \"n_donations\": 2232, \"total_amount\": 413753, \"mean_amount\": 185.37320788530465, \"median_amount\": 200}, {\"election_type\": \"convention\", \"amount_bucket\": \"10-50\", \"n_donations\": 6848, \"total_amount\": 141604, \"mean_amount\": 20.678154205607477, \"median_amount\": 25}, {\"election_type\": \"convention\", \"amount_bucket\": \"50-100\", \"n_donations\": 2966, \"total_amount\": 153281, \"mean_amount\": 51.67936614969656, \"median_amount\": 50}, {\"election_type\": \"convention\", \"amount_bucket\": \"<10\", \"n_donations\": 945, \"total_amount\": 4660, \"mean_amount\": 4.931216931216931, \"median_amount\": 5}, {\"election_type\": \"general\", \"amount_bucket\": \"<10\", \"n_donations\": 115873, \"total_amount\": 536742, \"mean_amount\": 4.632157620843509, \"median_amount\": 5}, {\"election_type\": \"general\", \"amount_bucket\": \"10-50\", \"n_donations\": 660787, \"total_amount\": 14411588, \"mean_amount\": 21.809732939661345, \"median_amount\": 25}, {\"election_type\": \"general\", \"amount_bucket\": \"50-100\", \"n_donations\": 304363, \"total_amount\": 16184312, \"mean_amount\": 53.174374020495264, \"median_amount\": 50}, {\"election_type\": \"general\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 246101, \"total_amount\": 460025242, \"mean_amount\": 1869.2538510611496, \"median_amount\": 1970}, {\"election_type\": \"runoff\", \"amount_bucket\": \"5000+\", \"n_donations\": 37, \"total_amount\": 211400, \"mean_amount\": 5713.513513513513, \"median_amount\": 5400}, {\"election_type\": \"other\", \"amount_bucket\": \"500-1000\", \"n_donations\": 119, \"total_amount\": 62535, \"mean_amount\": 525.5042016806723, \"median_amount\": 500}, {\"election_type\": \"general\", \"amount_bucket\": \"100-500\", \"n_donations\": 700821, \"total_amount\": 123174568, \"mean_amount\": 175.75753009684357, \"median_amount\": 151}, {\"election_type\": null, \"amount_bucket\": \"500-1000\", \"n_donations\": 89, \"total_amount\": 48290, \"mean_amount\": 542.5842696629213, \"median_amount\": 500}, {\"election_type\": \"special\", \"amount_bucket\": \"5000+\", \"n_donations\": 129, \"total_amount\": 788712, \"mean_amount\": 6114.046511627907, \"median_amount\": 5400}, {\"election_type\": \"other\", \"amount_bucket\": \"100-500\", \"n_donations\": 630, \"total_amount\": 117988, \"mean_amount\": 187.2825396825397, \"median_amount\": 192}, {\"election_type\": null, \"amount_bucket\": \"1000-5000\", \"n_donations\": 116, \"total_amount\": 228657, \"mean_amount\": 1971.1810344827586, \"median_amount\": 1300}, {\"election_type\": \"convention\", \"amount_bucket\": \"5000+\", \"n_donations\": 219, \"total_amount\": 1590300, \"mean_amount\": 7261.643835616438, \"median_amount\": 8100}, {\"election_type\": null, \"amount_bucket\": \"10-50\", \"n_donations\": 151, \"total_amount\": 3167, \"mean_amount\": 20.973509933774835, \"median_amount\": 25}, {\"election_type\": null, \"amount_bucket\": \"<10\", \"n_donations\": 24, \"total_amount\": 108, \"mean_amount\": 4.5, \"median_amount\": 5}, {\"election_type\": null, \"amount_bucket\": \"50-100\", \"n_donations\": 36, \"total_amount\": 1880, \"mean_amount\": 52.22222222222222, \"median_amount\": 50}, {\"election_type\": \"primary\", \"amount_bucket\": \"10-50\", \"n_donations\": 8115403, \"total_amount\": 187666251, \"mean_amount\": 23.12469892129818, \"median_amount\": 25}, {\"election_type\": \"primary\", \"amount_bucket\": \"50-100\", \"n_donations\": 2663933, \"total_amount\": 155426540, \"mean_amount\": 58.34476317535013, \"median_amount\": 50}, {\"election_type\": \"primary\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 684755, \"total_amount\": 1231394874, \"mean_amount\": 1798.2999379340056, \"median_amount\": 1002}, {\"election_type\": \"primary\", \"amount_bucket\": \"<10\", \"n_donations\": 2423728, \"total_amount\": 10080721, \"mean_amount\": 4.159179990493983, \"median_amount\": 5}, {\"election_type\": \"special\", \"amount_bucket\": \"100-500\", \"n_donations\": 34497, \"total_amount\": 5943498, \"mean_amount\": 172.29028611183583, \"median_amount\": 119}, {\"election_type\": \"runoff\", \"amount_bucket\": \"500-1000\", \"n_donations\": 4117, \"total_amount\": 2110393, \"mean_amount\": 512.6045664318679, \"median_amount\": 500}, {\"election_type\": \"convention\", \"amount_bucket\": \"100-500\", \"n_donations\": 6350, \"total_amount\": 1097843, \"mean_amount\": 172.88866141732282, \"median_amount\": 137}, {\"election_type\": \"recount\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 589, \"total_amount\": 1113150, \"mean_amount\": 1889.8981324278438, \"median_amount\": 1965}, {\"election_type\": \"recount\", \"amount_bucket\": \"10-50\", \"n_donations\": 883, \"total_amount\": 20860, \"mean_amount\": 23.62400906002265, \"median_amount\": 25}, {\"election_type\": \"recount\", \"amount_bucket\": \"50-100\", \"n_donations\": 712, \"total_amount\": 38450, \"mean_amount\": 54.002808988764045, \"median_amount\": 50}, {\"election_type\": \"other\", \"amount_bucket\": \"5000+\", \"n_donations\": 48, \"total_amount\": 1901300, \"mean_amount\": 39610.416666666664, \"median_amount\": 16950}, {\"election_type\": \"recount\", \"amount_bucket\": \"<10\", \"n_donations\": 110, \"total_amount\": 569, \"mean_amount\": 5.172727272727273, \"median_amount\": 5}, {\"election_type\": \"general\", \"amount_bucket\": \"500-1000\", \"n_donations\": 174182, \"total_amount\": 91015697, \"mean_amount\": 522.5321617618354, \"median_amount\": 500}, {\"election_type\": \"primary\", \"amount_bucket\": \"5000+\", \"n_donations\": 44085, \"total_amount\": 1558371116, \"mean_amount\": 35349.237064761255, \"median_amount\": 10000}, {\"election_type\": \"recount\", \"amount_bucket\": \"5000+\", \"n_donations\": 26, \"total_amount\": 1888024, \"mean_amount\": 72616.30769230769, \"median_amount\": 101450}, {\"election_type\": \"other\", \"amount_bucket\": \"10-50\", \"n_donations\": 2644, \"total_amount\": 64297, \"mean_amount\": 24.318078668683814, \"median_amount\": 23}, {\"election_type\": \"other\", \"amount_bucket\": \"<10\", \"n_donations\": 10993, \"total_amount\": 25816, \"mean_amount\": 2.3484035295187846, \"median_amount\": 1}, {\"election_type\": \"other\", \"amount_bucket\": \"50-100\", \"n_donations\": 451, \"total_amount\": 27149, \"mean_amount\": 60.19733924611973, \"median_amount\": 50}, {\"election_type\": null, \"amount_bucket\": \"100-500\", \"n_donations\": 195, \"total_amount\": 46746, \"mean_amount\": 239.72307692307692, \"median_amount\": 250}, {\"election_type\": \"other\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 235, \"total_amount\": 548212, \"mean_amount\": 2332.817021276596, \"median_amount\": 2633}]}}, {\"mode\": \"vega-lite\"});\n</script>\n```\n:::\n:::\n\n\n### By election stage\n\nLet's look at how donations break down by election stage. Do people donate\ndifferently for primary elections vs general elections?\n\nLet's ignore everything but primary and general elections, since they are the\nmost common, and arguably the most important.\n\n::: {#8a758b63 .cell execution_count=25}\n``` {.python .cell-code}\ngb2 = by_type_and_bucket[_.election_type.isin((\"primary\", \"general\"))]\nn_donations_per_election_type = _.n_donations.sum().over(group_by=\"election_type\")\nfrac = _.n_donations / n_donations_per_election_type\ngb2 = gb2.mutate(frac_n_donations_per_election_type=frac)\ngb2\n```\n\n::: {.cell-output .cell-output-display execution_count=38}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> election_type </span>┃<span style=\"font-weight: bold\"> amount_bucket </span>┃<span style=\"font-weight: bold\"> n_donations </span>┃<span style=\"font-weight: bold\"> total_amount </span>┃<span style=\"font-weight: bold\"> mean_amount  </span>┃<span style=\"font-weight: bold\"> median_amount </span>┃<span style=\"font-weight: bold\"> frac_n_donations_per_election_type </span>┃\n┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>                            │\n├───────────────┼───────────────┼─────────────┼──────────────┼──────────────┼───────────────┼────────────────────────────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">&lt;10          </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">115873</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">536742</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4.632158</span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.052544</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">50-100       </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">304363</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">16184312</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">53.174374</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">50</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.138017</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">1000-5000    </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">246101</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">460025242</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1869.253851</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1961</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.111598</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">10-50        </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">660787</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">14411588</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">21.809733</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.299642</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">700821</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">123174568</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">175.757530</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">150</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.317796</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">500-1000     </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">174182</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">91015697</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">522.532162</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.078985</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">5000+        </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3125</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">44496373</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">14238.839360</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7601</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.001417</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">5000+        </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">44085</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1558371116</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35349.237065</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10000</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.002422</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3636287</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">637353634</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">175.275943</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">150</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.199765</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">500-1000     </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">634677</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">334630687</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">527.245649</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.034867</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>             │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>             │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │            <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │            <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │             <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │                                  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │\n└───────────────┴───────────────┴─────────────┴──────────────┴──────────────┴───────────────┴────────────────────────────────────┘\n</pre>\n```\n:::\n:::\n\n\nIt looks like primary elections get a larger proportion of small donations.\n\n::: {#30710ce2 .cell execution_count=26}\n``` {.python .cell-code}\nalt.Chart(gb2.execute()).mark_bar().encode(\n    x=\"election_type:O\",\n    y=\"frac_n_donations_per_election_type:Q\",\n    color=bucket_col,\n)\n```\n\n::: {.cell-output .cell-output-display execution_count=39}\n```{=html}\n\n<style>\n  #altair-viz-c7d7cb33ea8c45b6bbd9679963751c34.vega-embed {\n    width: 100%;\n    display: flex;\n  }\n\n  #altair-viz-c7d7cb33ea8c45b6bbd9679963751c34.vega-embed details,\n  #altair-viz-c7d7cb33ea8c45b6bbd9679963751c34.vega-embed details summary {\n    position: relative;\n  }\n</style>\n<div id=\"altair-viz-c7d7cb33ea8c45b6bbd9679963751c34\"></div>\n<script type=\"text/javascript\">\n  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n  (function(spec, embedOpt){\n    let outputDiv = document.currentScript.previousElementSibling;\n    if (outputDiv.id !== \"altair-viz-c7d7cb33ea8c45b6bbd9679963751c34\") {\n      outputDiv = document.getElementById(\"altair-viz-c7d7cb33ea8c45b6bbd9679963751c34\");\n    }\n    const paths = {\n      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.16.3?noext\",\n      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n    };\n\n    function maybeLoadScript(lib, version) {\n      var key = `${lib.replace(\"-\", \"\")}_version`;\n      return (VEGA_DEBUG[key] == version) ?\n        Promise.resolve(paths[lib]) :\n        new Promise(function(resolve, reject) {\n          var s = document.createElement('script');\n          document.getElementsByTagName(\"head\")[0].appendChild(s);\n          s.async = true;\n          s.onload = () => {\n            VEGA_DEBUG[key] = version;\n            return resolve(paths[lib]);\n          };\n          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n          s.src = paths[lib];\n        });\n    }\n\n    function showError(err) {\n      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n      throw err;\n    }\n\n    function displayChart(vegaEmbed) {\n      vegaEmbed(outputDiv, spec, embedOpt)\n        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n    }\n\n    if(typeof define === \"function\" && define.amd) {\n      requirejs.config({paths});\n      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n    } else {\n      maybeLoadScript(\"vega\", \"5\")\n        .then(() => maybeLoadScript(\"vega-lite\", \"5.16.3\"))\n        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n        .catch(showError)\n        .then(() => displayChart(vegaEmbed));\n    }\n  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-ebbd3804e28f31fe1e149c3016fe9de2\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"color\": {\"field\": \"amount_bucket\", \"sort\": [\"<10\", \"10-50\", \"50-100\", \"100-500\", \"500-1000\", \"1000-5000\", \"5000+\"], \"type\": \"nominal\"}, \"x\": {\"field\": \"election_type\", \"type\": \"ordinal\"}, \"y\": {\"field\": \"frac_n_donations_per_election_type\", \"type\": \"quantitative\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.16.3.json\", \"datasets\": {\"data-ebbd3804e28f31fe1e149c3016fe9de2\": [{\"election_type\": \"general\", \"amount_bucket\": \"500-1000\", \"n_donations\": 174182, \"total_amount\": 91015697, \"mean_amount\": 522.5321617618354, \"median_amount\": 500, \"frac_n_donations_per_election_type\": 0.0789850774423966}, {\"election_type\": \"general\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 246101, \"total_amount\": 460025242, \"mean_amount\": 1869.2538510611496, \"median_amount\": 1962, \"frac_n_donations_per_election_type\": 0.1115976768187944}, {\"election_type\": \"general\", \"amount_bucket\": \"50-100\", \"n_donations\": 304363, \"total_amount\": 16184312, \"mean_amount\": 53.174374020495264, \"median_amount\": 50, \"frac_n_donations_per_election_type\": 0.13801733316645898}, {\"election_type\": \"general\", \"amount_bucket\": \"<10\", \"n_donations\": 115873, \"total_amount\": 536742, \"mean_amount\": 4.632157620843509, \"median_amount\": 5, \"frac_n_donations_per_election_type\": 0.052544108337731925}, {\"election_type\": \"general\", \"amount_bucket\": \"10-50\", \"n_donations\": 660787, \"total_amount\": 14411588, \"mean_amount\": 21.809732939661345, \"median_amount\": 25, \"frac_n_donations_per_election_type\": 0.2996423991453131}, {\"election_type\": \"general\", \"amount_bucket\": \"5000+\", \"n_donations\": 3125, \"total_amount\": 44496373, \"mean_amount\": 14238.83936, \"median_amount\": 7534, \"frac_n_donations_per_election_type\": 0.0014170716090496688}, {\"election_type\": \"general\", \"amount_bucket\": \"100-500\", \"n_donations\": 700821, \"total_amount\": 123174568, \"mean_amount\": 175.75753009684357, \"median_amount\": 150, \"frac_n_donations_per_election_type\": 0.3177963334802553}, {\"election_type\": \"primary\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 684755, \"total_amount\": 1231394874, \"mean_amount\": 1798.2999379340056, \"median_amount\": 1005, \"frac_n_donations_per_election_type\": 0.037617973167744775}, {\"election_type\": \"primary\", \"amount_bucket\": \"50-100\", \"n_donations\": 2663933, \"total_amount\": 155426540, \"mean_amount\": 58.34476317535013, \"median_amount\": 50, \"frac_n_donations_per_election_type\": 0.14634688335925966}, {\"election_type\": \"primary\", \"amount_bucket\": \"<10\", \"n_donations\": 2423728, \"total_amount\": 10080721, \"mean_amount\": 4.159179990493983, \"median_amount\": 5, \"frac_n_donations_per_election_type\": 0.13315088589336582}, {\"election_type\": \"primary\", \"amount_bucket\": \"10-50\", \"n_donations\": 8115403, \"total_amount\": 187666251, \"mean_amount\": 23.12469892129818, \"median_amount\": 25, \"frac_n_donations_per_election_type\": 0.44583100860809405}, {\"election_type\": \"primary\", \"amount_bucket\": \"5000+\", \"n_donations\": 44085, \"total_amount\": 1558371116, \"mean_amount\": 35349.237064761255, \"median_amount\": 10000, \"frac_n_donations_per_election_type\": 0.0024218711029492714}, {\"election_type\": \"primary\", \"amount_bucket\": \"500-1000\", \"n_donations\": 634677, \"total_amount\": 334630687, \"mean_amount\": 527.2456493618014, \"median_amount\": 500, \"frac_n_donations_per_election_type\": 0.03486686823197312}, {\"election_type\": \"primary\", \"amount_bucket\": \"100-500\", \"n_donations\": 3636287, \"total_amount\": 637353634, \"mean_amount\": 175.27594329050484, \"median_amount\": 150, \"frac_n_donations_per_election_type\": 0.1997645096366133}]}}, {\"mode\": \"vega-lite\"});\n</script>\n```\n:::\n:::\n\n\n### By recipient\n\nLet's look at the top players. Who gets the most donations?\n\nFar and away it is ActBlue, which acts as a conduit for donations to Democratic\ninterests.\n\nBeto O'Rourke is the top individual politician, hats off to him!\n\n::: {#97c0a2c8 .cell execution_count=27}\n``` {.python .cell-code}\nby_recip = summary_by(featured, \"CMTE_NM\")\nby_recip\n```\n\n::: {.cell-output .cell-output-display execution_count=40}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> CMTE_NM                                                          </span>┃<span style=\"font-weight: bold\"> n_donations </span>┃<span style=\"font-weight: bold\"> total_amount </span>┃<span style=\"font-weight: bold\"> mean_amount </span>┃<span style=\"font-weight: bold\"> median_amount </span>┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>                                                           │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>     │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>         │\n├──────────────────────────────────────────────────────────────────┼─────────────┼──────────────┼─────────────┼───────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">INDIANA DENTAL PAC                                              </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">111</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">62236</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">560.684685</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">410</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">BEAM SUNTORY INC POLITICAL ACTION COMMITTEE                     </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">407</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">64806</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">159.228501</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">65</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">AMEDISYS, INC. POLITICAL ACTION COMMITTEE                       </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">132</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25000</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">189.393939</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">75</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">PIEDMONT TRIAD ANESTHESIA P A FEDERAL PAC                       </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">132</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">90375</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">684.659091</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">600</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">AHOLD DELHAIZE USA, INC POLITICAL ACTION COMMITTEE              </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">369</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">48062</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">130.249322</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">DIMITRI FOR CONGRESS                                            </span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">87</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">34719</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">399.068966</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">RELX INC. POLITICAL ACTION COMMITTEE                            </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5491</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">306908</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">55.892916</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">34</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">MAKING INVESTMENTS MAJORITY INSURED PAC                         </span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">14</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">30600</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2185.714286</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1000</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">AMERICAN ACADEMY OF OTOLARYNGOLOGY-HEAD AND NECK SURGERY ENT PAC</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">765</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">285756</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">373.537255</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">365</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">MIMI WALTERS VICTORY FUND                                       </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">840</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2514824</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2993.838095</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2506</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                                                                │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │            <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │             <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │\n└──────────────────────────────────────────────────────────────────┴─────────────┴──────────────┴─────────────┴───────────────┘\n</pre>\n```\n:::\n:::\n\n\n::: {#56418e6e .cell execution_count=28}\n``` {.python .cell-code}\ntop_recip = by_recip.order_by(ibis.desc(\"n_donations\")).head(10)\nalt.Chart(top_recip.execute()).mark_bar().encode(\n    x=alt.X(\"CMTE_NM:O\", sort=\"-y\"),\n    y=\"n_donations:Q\",\n)\n```\n\n::: {.cell-output .cell-output-display execution_count=41}\n```{=html}\n\n<style>\n  #altair-viz-22b463e147124dfdb214c1ccc86159eb.vega-embed {\n    width: 100%;\n    display: flex;\n  }\n\n  #altair-viz-22b463e147124dfdb214c1ccc86159eb.vega-embed details,\n  #altair-viz-22b463e147124dfdb214c1ccc86159eb.vega-embed details summary {\n    position: relative;\n  }\n</style>\n<div id=\"altair-viz-22b463e147124dfdb214c1ccc86159eb\"></div>\n<script type=\"text/javascript\">\n  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n  (function(spec, embedOpt){\n    let outputDiv = document.currentScript.previousElementSibling;\n    if (outputDiv.id !== \"altair-viz-22b463e147124dfdb214c1ccc86159eb\") {\n      outputDiv = document.getElementById(\"altair-viz-22b463e147124dfdb214c1ccc86159eb\");\n    }\n    const paths = {\n      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.16.3?noext\",\n      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n    };\n\n    function maybeLoadScript(lib, version) {\n      var key = `${lib.replace(\"-\", \"\")}_version`;\n      return (VEGA_DEBUG[key] == version) ?\n        Promise.resolve(paths[lib]) :\n        new Promise(function(resolve, reject) {\n          var s = document.createElement('script');\n          document.getElementsByTagName(\"head\")[0].appendChild(s);\n          s.async = true;\n          s.onload = () => {\n            VEGA_DEBUG[key] = version;\n            return resolve(paths[lib]);\n          };\n          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n          s.src = paths[lib];\n        });\n    }\n\n    function showError(err) {\n      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n      throw err;\n    }\n\n    function displayChart(vegaEmbed) {\n      vegaEmbed(outputDiv, spec, embedOpt)\n        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n    }\n\n    if(typeof define === \"function\" && define.amd) {\n      requirejs.config({paths});\n      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n    } else {\n      maybeLoadScript(\"vega\", \"5\")\n        .then(() => maybeLoadScript(\"vega-lite\", \"5.16.3\"))\n        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n        .catch(showError)\n        .then(() => displayChart(vegaEmbed));\n    }\n  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-ce10b3f5b7c7e35451245a008d469163\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"x\": {\"field\": \"CMTE_NM\", \"sort\": \"-y\", \"type\": \"ordinal\"}, \"y\": {\"field\": \"n_donations\", \"type\": \"quantitative\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.16.3.json\", \"datasets\": {\"data-ce10b3f5b7c7e35451245a008d469163\": [{\"CMTE_NM\": \"ACTBLUE\", \"n_donations\": 5820888, \"total_amount\": 693057213, \"mean_amount\": 119.06382892094814, \"median_amount\": 25}, {\"CMTE_NM\": \"DCCC\", \"n_donations\": 1315476, \"total_amount\": 124802082, \"mean_amount\": 94.87218466927561, \"median_amount\": 25}, {\"CMTE_NM\": \"REPUBLICAN NATIONAL COMMITTEE\", \"n_donations\": 570561, \"total_amount\": 131525422, \"mean_amount\": 230.5194746924518, \"median_amount\": 50}, {\"CMTE_NM\": \"END CITIZENS UNITED\", \"n_donations\": 489710, \"total_amount\": 13654987, \"mean_amount\": 27.8838230789651, \"median_amount\": 15}, {\"CMTE_NM\": \"DSCC\", \"n_donations\": 347493, \"total_amount\": 67844824, \"mean_amount\": 195.2408365060591, \"median_amount\": 35}, {\"CMTE_NM\": \"PROGRESSIVE TURNOUT PROJECT\", \"n_donations\": 313433, \"total_amount\": 9251647, \"mean_amount\": 29.517144014829327, \"median_amount\": 15}, {\"CMTE_NM\": \"DNC SERVICES CORP./DEM. NAT'L COMMITTEE\", \"n_donations\": 280264, \"total_amount\": 70156788, \"mean_amount\": 250.32393743042275, \"median_amount\": 50}, {\"CMTE_NM\": \"BETO FOR TEXAS\", \"n_donations\": 280027, \"total_amount\": 44914966, \"mean_amount\": 160.39512618426082, \"median_amount\": 50}, {\"CMTE_NM\": \"NRSC\", \"n_donations\": 203124, \"total_amount\": 55384644, \"mean_amount\": 272.66420511608675, \"median_amount\": 50}, {\"CMTE_NM\": \"NRCC\", \"n_donations\": 178176, \"total_amount\": 38646560, \"mean_amount\": 216.90104166666666, \"median_amount\": 50}]}}, {\"mode\": \"vega-lite\"});\n</script>\n```\n:::\n:::\n\n\n### By Location\n\nWhere are the largest donations coming from?\n\n::: {#55b19fc3 .cell execution_count=29}\n``` {.python .cell-code}\nf2 = featured.mutate(loc=_.CITY + \", \" + _.STATE).drop(\"CITY\", \"STATE\")\nby_loc = summary_by(f2, \"loc\")\n# Drop the places with a small number of donations so we're\n# resistant to outliers for the mean\nby_loc = by_loc[_.n_donations > 1000]\nby_loc\n```\n\n::: {.cell-output .cell-output-display execution_count=42}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> loc             </span>┃<span style=\"font-weight: bold\"> n_donations </span>┃<span style=\"font-weight: bold\"> total_amount </span>┃<span style=\"font-weight: bold\"> mean_amount </span>┃<span style=\"font-weight: bold\"> median_amount </span>┃\n┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>     │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>         │\n├─────────────────┼─────────────┼──────────────┼─────────────┼───────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">NAZARETH, PA   </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1460</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">138710</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">95.006849</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">38</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">FULSHEAR, TX   </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1504</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">346778</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">230.570479</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">50</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">GLOUCESTER, MA </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4956</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">563331</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">113.666465</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">NORMAN, OK     </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6195</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">945333</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">152.596126</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">OAK PARK, IL   </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12017</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3413138</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">284.025797</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">39</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">AUSTIN, TX     </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">189865</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">33315922</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">175.471635</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">38</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">MIAMI BEACH, FL</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12825</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10598453</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">826.390097</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">SAN ANTONIO, TX</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">140529</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">18925978</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">134.676672</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">HAMBURG, NY    </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2322</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">170254</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">73.322136</span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">PITTSBURGH, PA </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">74208</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">14358578</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">193.490971</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">42</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>               │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │            <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │             <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │\n└─────────────────┴─────────────┴──────────────┴─────────────┴───────────────┘\n</pre>\n```\n:::\n:::\n\n\n::: {#cc1697c5 .cell execution_count=30}\n``` {.python .cell-code}\ndef top_by(col):\n    top = by_loc.order_by(ibis.desc(col)).head(10)\n    return (\n        alt.Chart(top.execute())\n        .mark_bar()\n        .encode(\n            x=alt.X('loc:O', sort=\"-y\"),\n            y=col,\n        )\n    )\n\n\ntop_by(\"n_donations\") | top_by(\"total_amount\") | top_by(\"mean_amount\") | top_by(\n    \"median_amount\"\n)\n```\n\n::: {.cell-output .cell-output-display execution_count=43}\n```{=html}\n\n<style>\n  #altair-viz-8a6f1f32899f46cc9b2d62e6535d25ef.vega-embed {\n    width: 100%;\n    display: flex;\n  }\n\n  #altair-viz-8a6f1f32899f46cc9b2d62e6535d25ef.vega-embed details,\n  #altair-viz-8a6f1f32899f46cc9b2d62e6535d25ef.vega-embed details summary {\n    position: relative;\n  }\n</style>\n<div id=\"altair-viz-8a6f1f32899f46cc9b2d62e6535d25ef\"></div>\n<script type=\"text/javascript\">\n  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n  (function(spec, embedOpt){\n    let outputDiv = document.currentScript.previousElementSibling;\n    if (outputDiv.id !== \"altair-viz-8a6f1f32899f46cc9b2d62e6535d25ef\") {\n      outputDiv = document.getElementById(\"altair-viz-8a6f1f32899f46cc9b2d62e6535d25ef\");\n    }\n    const paths = {\n      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.16.3?noext\",\n      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n    };\n\n    function maybeLoadScript(lib, version) {\n      var key = `${lib.replace(\"-\", \"\")}_version`;\n      return (VEGA_DEBUG[key] == version) ?\n        Promise.resolve(paths[lib]) :\n        new Promise(function(resolve, reject) {\n          var s = document.createElement('script');\n          document.getElementsByTagName(\"head\")[0].appendChild(s);\n          s.async = true;\n          s.onload = () => {\n            VEGA_DEBUG[key] = version;\n            return resolve(paths[lib]);\n          };\n          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n          s.src = paths[lib];\n        });\n    }\n\n    function showError(err) {\n      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n      throw err;\n    }\n\n    function displayChart(vegaEmbed) {\n      vegaEmbed(outputDiv, spec, embedOpt)\n        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n    }\n\n    if(typeof define === \"function\" && define.amd) {\n      requirejs.config({paths});\n      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n    } else {\n      maybeLoadScript(\"vega\", \"5\")\n        .then(() => maybeLoadScript(\"vega-lite\", \"5.16.3\"))\n        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n        .catch(showError)\n        .then(() => displayChart(vegaEmbed));\n    }\n  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"hconcat\": [{\"data\": {\"name\": \"data-88a0fd8958c48a49df7689732aa79f72\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"x\": {\"field\": \"loc\", \"sort\": \"-y\", \"type\": \"ordinal\"}, \"y\": {\"field\": \"n_donations\", \"type\": \"quantitative\"}}}, {\"data\": {\"name\": \"data-63253180effd0e4c1cba64f44dc0588d\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"x\": {\"field\": \"loc\", \"sort\": \"-y\", \"type\": \"ordinal\"}, \"y\": {\"field\": \"total_amount\", \"type\": \"quantitative\"}}}, {\"data\": {\"name\": \"data-0053131ee6866911d91bd779adba39b7\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"x\": {\"field\": \"loc\", \"sort\": \"-y\", \"type\": \"ordinal\"}, \"y\": {\"field\": \"mean_amount\", \"type\": \"quantitative\"}}}, {\"data\": {\"name\": \"data-c6410b8d1e323aef90b691e51bbd6e4d\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"x\": {\"field\": \"loc\", \"sort\": \"-y\", \"type\": \"ordinal\"}, \"y\": {\"field\": \"median_amount\", \"type\": \"quantitative\"}}}], \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.16.3.json\", \"datasets\": {\"data-88a0fd8958c48a49df7689732aa79f72\": [{\"loc\": \"NEW YORK, NY\", \"n_donations\": 695091, \"total_amount\": 444600108, \"mean_amount\": 639.6286356750411, \"median_amount\": 50}, {\"loc\": \"WASHINGTON, DC\", \"n_donations\": 401498, \"total_amount\": 124456508, \"mean_amount\": 309.9803934266173, \"median_amount\": 57}, {\"loc\": \"HOUSTON, TX\", \"n_donations\": 251960, \"total_amount\": 83026989, \"mean_amount\": 329.5244840450865, \"median_amount\": 50}, {\"loc\": \"LOS ANGELES, CA\", \"n_donations\": 245263, \"total_amount\": 89881980, \"mean_amount\": 366.4718282007478, \"median_amount\": 50}, {\"loc\": \"SAN FRANCISCO, CA\", \"n_donations\": 238117, \"total_amount\": 189799961, \"mean_amount\": 797.086982449804, \"median_amount\": 50}, {\"loc\": \"PHILADELPHIA, PA\", \"n_donations\": 222938, \"total_amount\": 36054977, \"mean_amount\": 161.72647552234253, \"median_amount\": 62}, {\"loc\": \"CHICAGO, IL\", \"n_donations\": 212527, \"total_amount\": 108119674, \"mean_amount\": 508.7338267608351, \"median_amount\": 40}, {\"loc\": \"SEATTLE, WA\", \"n_donations\": 197671, \"total_amount\": 52867387, \"mean_amount\": 267.4514066302088, \"median_amount\": 36}, {\"loc\": \"AUSTIN, TX\", \"n_donations\": 189865, \"total_amount\": 33315922, \"mean_amount\": 175.4716351091565, \"median_amount\": 38}, {\"loc\": \"ARLINGTON, VA\", \"n_donations\": 163168, \"total_amount\": 23382868, \"mean_amount\": 143.30547656403218, \"median_amount\": 50}], \"data-63253180effd0e4c1cba64f44dc0588d\": [{\"loc\": \"NEW YORK, NY\", \"n_donations\": 695091, \"total_amount\": 444600108, \"mean_amount\": 639.6286356750411, \"median_amount\": 50}, {\"loc\": \"SAN FRANCISCO, CA\", \"n_donations\": 238117, \"total_amount\": 189799961, \"mean_amount\": 797.086982449804, \"median_amount\": 50}, {\"loc\": \"LAS VEGAS, NV\", \"n_donations\": 65940, \"total_amount\": 153467387, \"mean_amount\": 2327.37923870185, \"median_amount\": 46}, {\"loc\": \"WASHINGTON, DC\", \"n_donations\": 401498, \"total_amount\": 124456508, \"mean_amount\": 309.9803934266173, \"median_amount\": 57}, {\"loc\": \"CHICAGO, IL\", \"n_donations\": 212527, \"total_amount\": 108119674, \"mean_amount\": 508.7338267608351, \"median_amount\": 40}, {\"loc\": \"LOS ANGELES, CA\", \"n_donations\": 245263, \"total_amount\": 89881980, \"mean_amount\": 366.4718282007478, \"median_amount\": 50}, {\"loc\": \"HOUSTON, TX\", \"n_donations\": 251960, \"total_amount\": 83026989, \"mean_amount\": 329.5244840450865, \"median_amount\": 50}, {\"loc\": \"DALLAS, TX\", \"n_donations\": 154038, \"total_amount\": 66558403, \"mean_amount\": 432.09080226956985, \"median_amount\": 57}, {\"loc\": \"SEATTLE, WA\", \"n_donations\": 197671, \"total_amount\": 52867387, \"mean_amount\": 267.4514066302088, \"median_amount\": 36}, {\"loc\": \"BOSTON, MA\", \"n_donations\": 82925, \"total_amount\": 47592049, \"mean_amount\": 573.9167802230932, \"median_amount\": 58}], \"data-0053131ee6866911d91bd779adba39b7\": [{\"loc\": \"LAKE FOREST, IL\", \"n_donations\": 5636, \"total_amount\": 37486362, \"mean_amount\": 6651.235273243435, \"median_amount\": 100}, {\"loc\": \"MOUNT VERNON, OH\", \"n_donations\": 1431, \"total_amount\": 5605857, \"mean_amount\": 3917.4402515723273, \"median_amount\": 46}, {\"loc\": \"LOS ALTOS HILLS, CA\", \"n_donations\": 4098, \"total_amount\": 10367629, \"mean_amount\": 2529.92410932162, \"median_amount\": 326}, {\"loc\": \"PALM BEACH, FL\", \"n_donations\": 7140, \"total_amount\": 17212425, \"mean_amount\": 2410.703781512605, \"median_amount\": 255}, {\"loc\": \"LAS VEGAS, NV\", \"n_donations\": 65940, \"total_amount\": 153467387, \"mean_amount\": 2327.37923870185, \"median_amount\": 45}, {\"loc\": \"RHINEBECK, NY\", \"n_donations\": 3014, \"total_amount\": 5942571, \"mean_amount\": 1971.6559389515594, \"median_amount\": 46}, {\"loc\": \"JOPLIN, MO\", \"n_donations\": 1839, \"total_amount\": 3617186, \"mean_amount\": 1966.9309407286569, \"median_amount\": 50}, {\"loc\": \"BALA CYNWYD, PA\", \"n_donations\": 3668, \"total_amount\": 6949933, \"mean_amount\": 1894.7472737186479, \"median_amount\": 100}, {\"loc\": \"CARMEL, IN\", \"n_donations\": 10932, \"total_amount\": 20383688, \"mean_amount\": 1864.5890962312478, \"median_amount\": 53}, {\"loc\": \"WAYLAND, MA\", \"n_donations\": 5283, \"total_amount\": 9704279, \"mean_amount\": 1836.8879424569373, \"median_amount\": 50}], \"data-c6410b8d1e323aef90b691e51bbd6e4d\": [{\"loc\": \"GLADWYNE, PA\", \"n_donations\": 1727, \"total_amount\": 1333243, \"mean_amount\": 771.9994209612044, \"median_amount\": 337}, {\"loc\": \"LOS ALTOS HILLS, CA\", \"n_donations\": 4098, \"total_amount\": 10367629, \"mean_amount\": 2529.92410932162, \"median_amount\": 313}, {\"loc\": \"MC LEAN, VA\", \"n_donations\": 4692, \"total_amount\": 3656109, \"mean_amount\": 779.2218670076726, \"median_amount\": 307}, {\"loc\": \"PALM BEACH, FL\", \"n_donations\": 7140, \"total_amount\": 17212425, \"mean_amount\": 2410.703781512605, \"median_amount\": 255}, {\"loc\": \"MISSION HILLS, KS\", \"n_donations\": 2258, \"total_amount\": 1642339, \"mean_amount\": 727.3423383525244, \"median_amount\": 250}, {\"loc\": \"DOVER, MA\", \"n_donations\": 1040, \"total_amount\": 976757, \"mean_amount\": 939.189423076923, \"median_amount\": 250}, {\"loc\": \"SHORT HILLS, NJ\", \"n_donations\": 3555, \"total_amount\": 3396742, \"mean_amount\": 955.4829817158931, \"median_amount\": 250}, {\"loc\": \"PARADISE VALLEY, AZ\", \"n_donations\": 8197, \"total_amount\": 7035291, \"mean_amount\": 858.2763206050994, \"median_amount\": 250}, {\"loc\": \"ATHERTON, CA\", \"n_donations\": 8780, \"total_amount\": 11595391, \"mean_amount\": 1320.6595671981777, \"median_amount\": 250}, {\"loc\": \"KENILWORTH, IL\", \"n_donations\": 1500, \"total_amount\": 855723, \"mean_amount\": 570.482, \"median_amount\": 250}]}}, {\"mode\": \"vega-lite\"});\n</script>\n```\n:::\n:::\n\n\n### By month\n\nWhen do the donations come in?\n\n::: {#0d055d90 .cell execution_count=31}\n``` {.python .cell-code}\nby_month = summary_by(featured, _.date.month().name(\"month_int\"))\n# Sorta hacky, .substritute doesn't work to change dtypes (yet?)\n# so we cast to string and then do our mapping\nmonth_map = {\n    \"1\": \"Jan\",\n    \"2\": \"Feb\",\n    \"3\": \"Mar\",\n    \"4\": \"Apr\",\n    \"5\": \"May\",\n    \"6\": \"Jun\",\n    \"7\": \"Jul\",\n    \"8\": \"Aug\",\n    \"9\": \"Sep\",\n    \"10\": \"Oct\",\n    \"11\": \"Nov\",\n    \"12\": \"Dec\",\n}\nby_month = by_month.mutate(month_str=_.month_int.cast(str).substitute(month_map))\nby_month\n```\n\n::: {.cell-output .cell-output-display execution_count=44}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> month_int </span>┃<span style=\"font-weight: bold\"> n_donations </span>┃<span style=\"font-weight: bold\"> total_amount </span>┃<span style=\"font-weight: bold\"> mean_amount </span>┃<span style=\"font-weight: bold\"> median_amount </span>┃<span style=\"font-weight: bold\"> month_str </span>┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int32</span>     │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>     │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │\n├───────────┼─────────────┼──────────────┼─────────────┼───────────────┼───────────┤\n│      <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1514</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250297</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">165.321664</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>      │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">348979</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">174837854</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500.998209</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">124</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Jan      </span> │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">581646</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">255997655</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">440.126219</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Feb      </span> │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1042577</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">430906797</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">413.309326</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">81</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Mar      </span> │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1088244</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">299252692</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">274.986760</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">50</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Apr      </span> │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1374247</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">387317192</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">281.839576</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">48</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">May      </span> │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1667285</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">465305247</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">279.079610</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">44</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Jun      </span> │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1607053</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">320528605</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">199.451172</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Jul      </span> │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2023466</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">473544182</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">234.026261</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Aug      </span> │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">9</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2583847</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">697888624</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">270.096729</span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">38</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Sep      </span> │\n│         <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │            <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │             <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │\n└───────────┴─────────────┴──────────────┴─────────────┴───────────────┴───────────┘\n</pre>\n```\n:::\n:::\n\n\n::: {#7002ddb8 .cell execution_count=32}\n``` {.python .cell-code}\nmonths_in_order = list(month_map.values())\nalt.Chart(by_month.execute()).mark_bar().encode(\n    x=alt.X(\"month_str:O\", sort=months_in_order),\n    y=\"n_donations:Q\",\n)\n```\n\n::: {.cell-output .cell-output-display execution_count=45}\n```{=html}\n\n<style>\n  #altair-viz-fd0cec36d6a04996a5559215e6fce4df.vega-embed {\n    width: 100%;\n    display: flex;\n  }\n\n  #altair-viz-fd0cec36d6a04996a5559215e6fce4df.vega-embed details,\n  #altair-viz-fd0cec36d6a04996a5559215e6fce4df.vega-embed details summary {\n    position: relative;\n  }\n</style>\n<div id=\"altair-viz-fd0cec36d6a04996a5559215e6fce4df\"></div>\n<script type=\"text/javascript\">\n  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n  (function(spec, embedOpt){\n    let outputDiv = document.currentScript.previousElementSibling;\n    if (outputDiv.id !== \"altair-viz-fd0cec36d6a04996a5559215e6fce4df\") {\n      outputDiv = document.getElementById(\"altair-viz-fd0cec36d6a04996a5559215e6fce4df\");\n    }\n    const paths = {\n      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.16.3?noext\",\n      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n    };\n\n    function maybeLoadScript(lib, version) {\n      var key = `${lib.replace(\"-\", \"\")}_version`;\n      return (VEGA_DEBUG[key] == version) ?\n        Promise.resolve(paths[lib]) :\n        new Promise(function(resolve, reject) {\n          var s = document.createElement('script');\n          document.getElementsByTagName(\"head\")[0].appendChild(s);\n          s.async = true;\n          s.onload = () => {\n            VEGA_DEBUG[key] = version;\n            return resolve(paths[lib]);\n          };\n          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n          s.src = paths[lib];\n        });\n    }\n\n    function showError(err) {\n      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n      throw err;\n    }\n\n    function displayChart(vegaEmbed) {\n      vegaEmbed(outputDiv, spec, embedOpt)\n        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n    }\n\n    if(typeof define === \"function\" && define.amd) {\n      requirejs.config({paths});\n      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n    } else {\n      maybeLoadScript(\"vega\", \"5\")\n        .then(() => maybeLoadScript(\"vega-lite\", \"5.16.3\"))\n        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n        .catch(showError)\n        .then(() => displayChart(vegaEmbed));\n    }\n  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-f45f98a3a06c83bbafc4d3d94cb29c0b\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"x\": {\"field\": \"month_str\", \"sort\": [\"Jan\", \"Feb\", \"Mar\", \"Apr\", \"May\", \"Jun\", \"Jul\", \"Aug\", \"Sep\", \"Oct\", \"Nov\", \"Dec\"], \"type\": \"ordinal\"}, \"y\": {\"field\": \"n_donations\", \"type\": \"quantitative\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.16.3.json\", \"datasets\": {\"data-f45f98a3a06c83bbafc4d3d94cb29c0b\": [{\"month_int\": null, \"n_donations\": 1514, \"total_amount\": 250297, \"mean_amount\": 165.3216644649934, \"median_amount\": 100, \"month_str\": null}, {\"month_int\": 1.0, \"n_donations\": 348979, \"total_amount\": 174837854, \"mean_amount\": 500.9982090612902, \"median_amount\": 122, \"month_str\": \"Jan\"}, {\"month_int\": 2.0, \"n_donations\": 581646, \"total_amount\": 255997655, \"mean_amount\": 440.126219384299, \"median_amount\": 100, \"month_str\": \"Feb\"}, {\"month_int\": 3.0, \"n_donations\": 1042577, \"total_amount\": 430906797, \"mean_amount\": 413.3093258339672, \"median_amount\": 80, \"month_str\": \"Mar\"}, {\"month_int\": 4.0, \"n_donations\": 1088244, \"total_amount\": 299252692, \"mean_amount\": 274.98676032213365, \"median_amount\": 50, \"month_str\": \"Apr\"}, {\"month_int\": 5.0, \"n_donations\": 1374247, \"total_amount\": 387317192, \"mean_amount\": 281.83957614606396, \"median_amount\": 48, \"month_str\": \"May\"}, {\"month_int\": 6.0, \"n_donations\": 1667285, \"total_amount\": 465305247, \"mean_amount\": 279.07960966481437, \"median_amount\": 44, \"month_str\": \"Jun\"}, {\"month_int\": 7.0, \"n_donations\": 1607053, \"total_amount\": 320528605, \"mean_amount\": 199.45117242555162, \"median_amount\": 35, \"month_str\": \"Jul\"}, {\"month_int\": 8.0, \"n_donations\": 2023466, \"total_amount\": 473544182, \"mean_amount\": 234.02626088108227, \"median_amount\": 35, \"month_str\": \"Aug\"}, {\"month_int\": 9.0, \"n_donations\": 2583847, \"total_amount\": 697888624, \"mean_amount\": 270.0967294116099, \"median_amount\": 38, \"month_str\": \"Sep\"}, {\"month_int\": 10.0, \"n_donations\": 3686024, \"total_amount\": 850820707, \"mean_amount\": 230.82343115508743, \"median_amount\": 29, \"month_str\": \"Oct\"}, {\"month_int\": 11.0, \"n_donations\": 2545616, \"total_amount\": 285143995, \"mean_amount\": 112.01375030640914, \"median_amount\": 25, \"month_str\": \"Nov\"}, {\"month_int\": 12.0, \"n_donations\": 2119311, \"total_amount\": 283081648, \"mean_amount\": 133.57249030463203, \"median_amount\": 25, \"month_str\": \"Dec\"}]}}, {\"mode\": \"vega-lite\"});\n</script>\n```\n:::\n:::\n\n\n## Conclusion\n\nThanks for following along! I hope you've learned something about Ibis, and\nmaybe even about campaign finance.\n\nIbis is a great tool for exploring data. I now find myself reaching for it\nwhen in the past I would have reached for pandas.\n\nSome of the highlights for me:\n\n- Fast, lazy execution, a great display format, and good type hinting/editor support for a great REPL experience.\n- Very well thought-out API and semantics (e.g. `isinstance(val, NumericValue)`?? That's beautiful!)\n- Fast and fairly complete string support, since I work with a lot of text data.\n- Extremely responsive maintainers. Sometimes I've submitted multiple feature requests and bug reports in a single day, and a PR has been merged by the next day.\n- Escape hatch to SQL. I didn't have to use that here, but if something isn't supported, you can always fall back to SQL.\n\nCheck out [The Ibis Website](https://ibis-project.org/) for more information.\n\n",
+    "markdown": "---\ntitle: \"Exploring campaign finance data\"\nauthor: \"Nick Crews\"\ndate: \"2023-03-24\"\ncategories:\n    - blog\n    - data engineering\n    - case study\n    - duckdb\n    - performance\n---\n\nHi! My name is [Nick Crews](https://www.linkedin.com/in/nicholas-b-crews/),\nand I'm a data engineer that looks at public campaign finance data.\n\nIn this post, I'll walk through how I use Ibis to explore public campaign contribution\ndata from the Federal Election Commission (FEC). We'll do some loading,\ncleaning, featurizing, and visualization. There will be filtering, sorting, grouping,\nand aggregation.\n\n## Downloading The Data\n\n::: {#c4ae1c47 .cell execution_count=1}\n``` {.python .cell-code}\nfrom pathlib import Path\nfrom zipfile import ZipFile\nfrom urllib.request import urlretrieve\n\n# Download and unzip the 2018 individual contributions data\nurl = \"https://cg-519a459a-0ea3-42c2-b7bc-fa1143481f74.s3-us-gov-west-1.amazonaws.com/bulk-downloads/2018/indiv18.zip\"\nzip_path = Path(\"indiv18.zip\")\ncsv_path = Path(\"indiv18.csv\")\n\nif not zip_path.exists():\n    urlretrieve(url, zip_path)\n\nif not csv_path.exists():\n    with ZipFile(zip_path) as zip_file, csv_path.open(\"w\") as csv_file:\n        for line in zip_file.open(\"itcont.txt\"):\n            csv_file.write(line.decode())\n```\n:::\n\n\n## Loading the data\n\nNow that we have our raw data in a .csv format, let's load it into Ibis,\nusing the duckdb backend.\n\nNote that a 4.3 GB .csv would be near the limit of what pandas could\nhandle on my laptop with 16GB of RAM. In pandas, typically every time\nyou perform a transformation on the data, a copy of the data is made.\nI could only do a few transformations before I ran out of memory.\n\nWith Ibis, this problem is solved in two different ways.\n\nFirst, because they are designed to work with very large datasets,\nmany (all?) SQL backends support out of core operations.\nThe data lives on disk, and are only loaded in a streaming fashion\nwhen needed, and then written back to disk as the operation is performed.\n\nSecond, unless you explicitly ask for it, Ibis makes use of lazy\nevaluation. This means that when you ask for a result, the\nresult is not persisted in memory. Only the original source\ndata is persisted. Everything else is derived from this on the fly.\n\n::: {#70cdc5f3 .cell execution_count=2}\n``` {.python .cell-code}\nimport ibis\nfrom ibis import _\n\nibis.options.interactive = True\n\n# The raw .csv file doesn't have column names, so we will add them in the next step.\nraw = ibis.read_csv(csv_path)\nraw\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> C00401224 </span>┃<span style=\"font-weight: bold\"> A      </span>┃<span style=\"font-weight: bold\"> M6     </span>┃<span style=\"font-weight: bold\"> P      </span>┃<span style=\"font-weight: bold\"> 201804059101866001 </span>┃<span style=\"font-weight: bold\"> 24T    </span>┃<span style=\"font-weight: bold\"> IND    </span>┃<span style=\"font-weight: bold\"> STOUFFER, LEIGH   </span>┃<span style=\"font-weight: bold\"> AMSTELVEEN   </span>┃<span style=\"font-weight: bold\"> ZZ     </span>┃<span style=\"font-weight: bold\"> 1187RC    </span>┃<span style=\"font-weight: bold\"> MYSELF            </span>┃<span style=\"font-weight: bold\"> SELF EMPLOYED           </span>┃<span style=\"font-weight: bold\"> 05172017 </span>┃<span style=\"font-weight: bold\"> 10    </span>┃<span style=\"font-weight: bold\"> C00458000 </span>┃<span style=\"font-weight: bold\"> SA11AI_81445687 </span>┃<span style=\"font-weight: bold\"> 1217152 </span>┃<span style=\"font-weight: bold\"> column18 </span>┃<span style=\"font-weight: bold\"> EARMARKED FOR PROGRESSIVE CHANGE CAMPAIGN COMMITTEE (C00458000) </span>┃<span style=\"font-weight: bold\"> 4050820181544765358 </span>┃\n┡━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>              │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>            │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>            │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>                  │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>   │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>   │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>   │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>                                                          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>               │\n├───────────┼────────┼────────┼────────┼────────────────────┼────────┼────────┼───────────────────┼──────────────┼────────┼───────────┼───────────────────┼─────────────────────────┼──────────┼───────┼───────────┼─────────────────┼─────────┼──────────┼─────────────────────────────────────────────────────────────────┼─────────────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101867748</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STRAWS, JOYCE    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">OCOEE       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">34761    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SILVERSEA CRUISES</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">RESERVATIONS SUPERVISOR</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05182017</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_81592336</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544770597</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101867748</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STRAWS, JOYCE    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">OCOEE       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">34761    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SILVERSEA CRUISES</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">RESERVATIONS SUPERVISOR</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05192017</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_81627562</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544770598</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101865942</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STOTT, JIM       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">039020760</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE             </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE                   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05132017</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_81047921</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544765179</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101865942</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STOTT, JIM       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">039020760</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE             </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE                   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05152017</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_81209209</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544765180</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101865942</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STOTT, JIM       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">039020760</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE             </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE                   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05192017</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_81605223</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544765181</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101865943</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STOTT, JIM       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">039020760</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE             </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE                   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05242017</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_82200022</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544765182</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101865943</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STOTT, JIM       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">03902    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NOT EMPLOYED     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NOT EMPLOYED           </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05292017</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00213512</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_82589834</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR NANCY PELOSI FOR CONGRESS (C00213512)            </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544765184</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101865944</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STOTT, JIM       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">039020760</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE             </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NONE                   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05302017</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_82643727</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544765185</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101867050</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STRANGE, WINIFRED</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ANNA MSRIA  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">34216    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NOT EMPLOYED     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NOT EMPLOYED           </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05162017</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_81325918</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544768505</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">A     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">M6    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P     </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201804059101867051</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">24T   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">STRANGE, WINIFRED</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ANNA MSRIA  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">34216    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NOT EMPLOYED     </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NOT EMPLOYED           </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05232017</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">C00000935</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SA11AI_81991189</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1217152</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>     │ <span style=\"color: #008000; text-decoration-color: #008000\">EARMARKED FOR DCCC (C00000935)                                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4050820181544768506</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │                  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                 │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>            │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                 │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>        │     <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>               │       <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                                                               │                   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │\n└───────────┴────────┴────────┴────────┴────────────────────┴────────┴────────┴───────────────────┴──────────────┴────────┴───────────┴───────────────────┴─────────────────────────┴──────────┴───────┴───────────┴─────────────────┴─────────┴──────────┴─────────────────────────────────────────────────────────────────┴─────────────────────┘\n</pre>\n```\n:::\n:::\n\n\n::: {#9efc5c77 .cell execution_count=3}\n``` {.python .cell-code}\n# For a more comprehensive description of the columns and their meaning, see\n# https://www.fec.gov/campaign-finance-data/contributions-individuals-file-description/\ncolumns = {\n    \"CMTE_ID\": \"keep\",  # Committee ID\n    \"AMNDT_IND\": \"drop\",  # Amendment indicator. A = amendment, N = new, T = termination\n    \"RPT_TP\": \"drop\",  # Report type (monthly, quarterly, etc)\n    \"TRANSACTION_PGI\": \"keep\",  # Primary/general indicator\n    \"IMAGE_NUM\": \"drop\",  # Image number\n    \"TRANSACTION_TP\": \"drop\",  # Transaction type\n    \"ENTITY_TP\": \"keep\",  # Entity type\n    \"NAME\": \"drop\",  # Contributor name\n    \"CITY\": \"keep\",  # Contributor city\n    \"STATE\": \"keep\",  # Contributor state\n    \"ZIP_CODE\": \"drop\",  # Contributor zip code\n    \"EMPLOYER\": \"drop\",  # Contributor employer\n    \"OCCUPATION\": \"drop\",  # Contributor occupation\n    \"TRANSACTION_DT\": \"keep\",  # Transaction date\n    \"TRANSACTION_AMT\": \"keep\",  # Transaction amount\n    # Other ID. For individual contributions will be null. For contributions from\n    # other FEC committees, will be the committee ID of the other committee.\n    \"OTHER_ID\": \"drop\",\n    \"TRAN_ID\": \"drop\",  # Transaction ID\n    \"FILE_NUM\": \"drop\",  # File number, unique number assigned to each report filed with the FEC\n    \"MEMO_CD\": \"drop\",  # Memo code\n    \"MEMO_TEXT\": \"drop\",  # Memo text\n    \"SUB_ID\": \"drop\",  # Submission ID. Unique number assigned to each transaction.\n}\n\nrenaming = dict(zip(columns.keys(), raw.columns))\nto_keep = [k for k, v in columns.items() if v == \"keep\"]\nkept = raw.rename(renaming)[to_keep]\nkept\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> CMTE_ID   </span>┃<span style=\"font-weight: bold\"> TRANSACTION_PGI </span>┃<span style=\"font-weight: bold\"> ENTITY_TP </span>┃<span style=\"font-weight: bold\"> CITY         </span>┃<span style=\"font-weight: bold\"> STATE  </span>┃<span style=\"font-weight: bold\"> TRANSACTION_DT </span>┃<span style=\"font-weight: bold\"> TRANSACTION_AMT </span>┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>           │\n├───────────┼─────────────────┼───────────┼──────────────┼────────┼────────────────┼─────────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">OCOEE       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05182017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">OCOEE       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05192017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05132017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05152017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05192017      </span> │               <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05242017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05292017      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05302017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ANNA MSRIA  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05162017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ANNA MSRIA  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05232017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>               │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>            │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>              │               <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │\n└───────────┴─────────────────┴───────────┴──────────────┴────────┴────────────────┴─────────────────┘\n</pre>\n```\n:::\n:::\n\n\n::: {#50e6655a .cell execution_count=4}\n``` {.python .cell-code}\n# 21 million rows\nkept.count()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n<script type=\"application/vnd.jupyter.widget-view+json\">\n{\"model_id\":\"c555021fe66b479dbde9787832c212b9\",\"version_major\":2,\"version_minor\":0,\"quarto_mimetype\":\"application/vnd.jupyter.widget-view+json\"}\n</script>\n```\n:::\n\n::: {.cell-output .cell-output-display}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=4}\n\n::: {.ansi-escaped-output}\n```{=html}\n<pre>┌──────────┐\n│ <span class=\"ansi-cyan-fg ansi-bold\">21730730</span> │\n└──────────┘</pre>\n```\n:::\n\n:::\n:::\n\n\nHuh, what's up with those timings? Previewing the head only took a fraction of a second,\nbut finding the number of rows took 10 seconds.\n\nThat's because duckdb is scanning the .csv file on the fly every time we access it.\nSo we only have to read the first few lines to get that preview,\nbut we have to read the whole file to get the number of rows.\n\nNote that this isn't a feature of Ibis, but a feature of Duckdb. This what I think is\none of the strengths of Ibis: Ibis itself doesn't have to implement any of the\noptimimizations or features of the backends. Those backends can focus on what they do\nbest, and Ibis can get those things for free.\n\nSo, let's tell duckdb to actually read in the file to its native format so later accesses\nwill be faster. This will be a ~20 seconds that we'll only have to pay once.\n\n::: {#0ac3b6dd .cell execution_count=5}\n``` {.python .cell-code}\nkept = kept.cache()\nkept\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n<script type=\"application/vnd.jupyter.widget-view+json\">\n{\"model_id\":\"25fdb26f53744bfdb6c6fad84a62bdc6\",\"version_major\":2,\"version_minor\":0,\"quarto_mimetype\":\"application/vnd.jupyter.widget-view+json\"}\n</script>\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=5}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> CMTE_ID   </span>┃<span style=\"font-weight: bold\"> TRANSACTION_PGI </span>┃<span style=\"font-weight: bold\"> ENTITY_TP </span>┃<span style=\"font-weight: bold\"> CITY         </span>┃<span style=\"font-weight: bold\"> STATE  </span>┃<span style=\"font-weight: bold\"> TRANSACTION_DT </span>┃<span style=\"font-weight: bold\"> TRANSACTION_AMT </span>┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>           │\n├───────────┼─────────────────┼───────────┼──────────────┼────────┼────────────────┼─────────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">OCOEE       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05182017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">OCOEE       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05192017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05132017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05152017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05192017      </span> │               <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05242017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05292017      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05302017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ANNA MSRIA  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05162017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00401224</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ANNA MSRIA  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05232017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>               │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>            │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>              │               <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │\n└───────────┴─────────────────┴───────────┴──────────────┴────────┴────────────────┴─────────────────┘\n</pre>\n```\n:::\n:::\n\n\nLook, now accessing it only takes a fraction of a second!\n\n::: {#b00c7c83 .cell execution_count=6}\n``` {.python .cell-code}\nkept.count()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=6}\n\n::: {.ansi-escaped-output}\n```{=html}\n<pre>┌──────────┐\n│ <span class=\"ansi-cyan-fg ansi-bold\">21730730</span> │\n└──────────┘</pre>\n```\n:::\n\n:::\n:::\n\n\n### Committees Data\n\nThe contributions only list an opaque `CMTE_ID` column. We want to know which actual\ncommittee this is. Let's load the committees table so we can lookup from\ncommittee ID to committee name.\n\n::: {#7813b8f5 .cell execution_count=7}\n``` {.python .cell-code}\ndef read_committees():\n    committees_url = \"https://cg-519a459a-0ea3-42c2-b7bc-fa1143481f74.s3-us-gov-west-1.amazonaws.com/bulk-downloads/2018/committee_summary_2018.csv\"\n    # This just creates a view, it doesn't actually fetch the data yet\n    tmp = ibis.read_csv(committees_url)\n    tmp = tmp[\"CMTE_ID\", \"CMTE_NM\"]\n    # The raw table contains multiple rows for each committee id, so lets pick\n    # an arbitrary row for each committee id as the representative name.\n    deduped = tmp.group_by(\"CMTE_ID\").agg(CMTE_NM=_.CMTE_NM.arbitrary())\n    return deduped\n\n\ncomms = read_committees().cache()\ncomms\n```\n\n::: {.cell-output .cell-output-display execution_count=7}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> CMTE_ID   </span>┃<span style=\"font-weight: bold\"> CMTE_NM                                                                          </span>┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>                                                                           │\n├───────────┼──────────────────────────────────────────────────────────────────────────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00414318</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">LOEBSACK FOR CONGRESS                                                           </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00678292</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">COMMITTEE TO ELECT NANCY DAILEY SLOTNICK                                        </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00034678</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">AMERICAN WATERWAYS OPERATORS-PAC                                                </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00078287</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CNA FINANCIAL CORPORATION CITIZENS FOR GOOD GOVERNMENT                          </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00112680</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">AMERICANS FOR DEMOCRATIC ACTION INC POLITICAL ACTION COMMITTEE                  </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00017194</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">INTERNATIONAL UNION OF OPERATING ENGINEERS LO 825 POLITICAL ACTION AND EDUCATIO…</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00462234</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SMART FOR CONGRESS                                                              </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C00101410</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CSRA INC. PAC                                                                   </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C70004239</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SEIU LOCAL 32BJ                                                                 </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">C70001979</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">INTERNATIONAL BROTHERHOOD OF TEAMSTERS                                          </span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                                                                                │\n└───────────┴──────────────────────────────────────────────────────────────────────────────────┘\n</pre>\n```\n:::\n:::\n\n\nNow add the committee name to the contributions table:\n\n::: {#3040661a .cell execution_count=8}\n``` {.python .cell-code}\ntogether = kept.left_join(comms, \"CMTE_ID\").drop(\"CMTE_ID\", \"CMTE_ID_right\")\ntogether\n```\n\n::: {.cell-output .cell-output-display execution_count=8}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> TRANSACTION_PGI </span>┃<span style=\"font-weight: bold\"> ENTITY_TP </span>┃<span style=\"font-weight: bold\"> CITY         </span>┃<span style=\"font-weight: bold\"> STATE  </span>┃<span style=\"font-weight: bold\"> TRANSACTION_DT </span>┃<span style=\"font-weight: bold\"> TRANSACTION_AMT </span>┃<span style=\"font-weight: bold\"> CMTE_NM </span>┃\n┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>           │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>  │\n├─────────────────┼───────────┼──────────────┼────────┼────────────────┼─────────────────┼─────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">OCOEE       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05182017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ACTBLUE</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">OCOEE       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05192017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ACTBLUE</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05132017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ACTBLUE</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05152017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ACTBLUE</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05192017      </span> │               <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ACTBLUE</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05242017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ACTBLUE</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05292017      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ACTBLUE</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CAPE NEDDICK</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ME    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05302017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ACTBLUE</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ANNA MSRIA  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05162017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ACTBLUE</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ANNA MSRIA  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FL    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">05232017      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">ACTBLUE</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>               │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>            │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>              │               <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>       │\n└─────────────────┴───────────┴──────────────┴────────┴────────────────┴─────────────────┴─────────┘\n</pre>\n```\n:::\n:::\n\n\n## Cleaning\n\nFirst, let's drop any contributions that don't have a committee name. There are only 6 of them.\n\n::: {#9e9b4d4e .cell execution_count=9}\n``` {.python .cell-code}\n# We can do this fearlessly, no .copy() needed, because\n# everything in Ibis is immutable. If we did this in pandas,\n# we might start modifying the original DataFrame accidentally!\ncleaned = together\n\nhas_name = cleaned.CMTE_NM.notnull()\ncleaned = cleaned[has_name]\nhas_name.value_counts()\n```\n\n::: {.cell-output .cell-output-display execution_count=9}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> NotNull(CMTE_NM) </span>┃<span style=\"font-weight: bold\"> NotNull(CMTE_NM)_count </span>┃\n┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">boolean</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>                  │\n├──────────────────┼────────────────────────┤\n│ False            │                      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6</span> │\n│ True             │               <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">21730724</span> │\n└──────────────────┴────────────────────────┘\n</pre>\n```\n:::\n:::\n\n\nLet's look at the `ENTITY_TP` column. This represents the type of entity that\nmade the contribution:\n\n::: {#db705864 .cell execution_count=10}\n``` {.python .cell-code}\ntogether.ENTITY_TP.value_counts()\n```\n\n::: {.cell-output .cell-output-display execution_count=10}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> ENTITY_TP </span>┃<span style=\"font-weight: bold\"> ENTITY_TP_count </span>┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>           │\n├───────────┼─────────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">CCM      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">698</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">COM      </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">867</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">CAN      </span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">13659</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>      │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5289</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">IND      </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">21687992</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">ORG      </span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">18555</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">PAC      </span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3621</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">PTY      </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">49</span> │\n└───────────┴─────────────────┘\n</pre>\n```\n:::\n:::\n\n\nWe only care about contributions from individuals.\n\nOnce we filter on this column, the contents of it are irrelevant, so let's drop it.\n\n::: {#a7e12254 .cell execution_count=11}\n``` {.python .cell-code}\ncleaned = together[_.ENTITY_TP == \"IND\"].drop(\"ENTITY_TP\")\n```\n:::\n\n\nIt looks like the `TRANSACTION_DT` column was a raw string like \"MMDDYYYY\",\nso let's convert that to a proper date type.\n\n::: {#8ca09d94 .cell execution_count=12}\n``` {.python .cell-code}\nfrom ibis.expr.types import StringValue, DateValue\n\n\ndef mmddyyyy_to_date(val: StringValue) -> DateValue:\n    return val.cast(str).lpad(8, \"0\").nullif(\"\").to_timestamp(\"%m%d%Y\").date()\n\n\ncleaned = cleaned.mutate(date=mmddyyyy_to_date(_.TRANSACTION_DT)).drop(\"TRANSACTION_DT\")\ncleaned\n```\n\n::: {.cell-output .cell-output-display execution_count=12}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> TRANSACTION_PGI </span>┃<span style=\"font-weight: bold\"> CITY         </span>┃<span style=\"font-weight: bold\"> STATE  </span>┃<span style=\"font-weight: bold\"> TRANSACTION_AMT </span>┃<span style=\"font-weight: bold\"> CMTE_NM                     </span>┃<span style=\"font-weight: bold\"> date       </span>┃\n┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>           │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>                      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">date</span>       │\n├─────────────────┼──────────────┼────────┼─────────────────┼─────────────────────────────┼────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NEW YORK    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NY    </span> │               <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">PROGRESSIVE TURNOUT PROJECT</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-03-19</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NEW YORK    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NY    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">PROGRESSIVE TURNOUT PROJECT</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-03-21</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NEW YORK    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NY    </span> │               <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">PROGRESSIVE TURNOUT PROJECT</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-03-21</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">LAGUNA BEACH</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CA    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">PROGRESSIVE TURNOUT PROJECT</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-03-26</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">DALLAS      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">TX    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">PROGRESSIVE TURNOUT PROJECT</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-04-26</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">BELOIT      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">WI    </span> │               <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">PROGRESSIVE TURNOUT PROJECT</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-05-03</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">EAST HAMPTON</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">NY    </span> │               <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">PROGRESSIVE TURNOUT PROJECT</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-04-05</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">BETHESDA    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MD    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">PROGRESSIVE TURNOUT PROJECT</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-02-16</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">BETHESDA    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MD    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">PROGRESSIVE TURNOUT PROJECT</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-02-26</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">BETHESDA    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MD    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">PROGRESSIVE TURNOUT PROJECT</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-03-21</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>               │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>            │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │               <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                           │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>          │\n└─────────────────┴──────────────┴────────┴─────────────────┴─────────────────────────────┴────────────┘\n</pre>\n```\n:::\n:::\n\n\nThe `TRANSACTION_PGI` column represents the type (primary, general, etc) of election,\nand the year. But it seems to be not very consistent:\n\n::: {#26b5cd7c .cell execution_count=13}\n``` {.python .cell-code}\ncleaned.TRANSACTION_PGI.topk(10)\n```\n\n::: {.cell-output .cell-output-display execution_count=13}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> TRANSACTION_PGI </span>┃<span style=\"font-weight: bold\"> CountStar() </span>┃\n┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>       │\n├─────────────────┼─────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P              </span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">17013596</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">G2018          </span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2095123</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P2018          </span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1677183</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P2020          </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">208501</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">O2018          </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">161874</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">S2017          </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">124336</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">G2017          </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">98401</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P2022          </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">91136</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">P2017          </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">61153</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">R2017          </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">54281</span> │\n└─────────────────┴─────────────┘\n</pre>\n```\n:::\n:::\n\n\n::: {#80fa93f0 .cell execution_count=14}\n``` {.python .cell-code}\ndef get_election_type(pgi: StringValue) -> StringValue:\n    \"\"\"Use the first letter of the TRANSACTION_PGI column to determine the election type\n\n    If the first letter is not one of the known election stage, then return null.\n    \"\"\"\n    election_types = {\n        \"P\": \"primary\",\n        \"G\": \"general\",\n        \"O\": \"other\",\n        \"C\": \"convention\",\n        \"R\": \"runoff\",\n        \"S\": \"special\",\n        \"E\": \"recount\",\n    }\n    first_letter = pgi[0]\n    return first_letter.substitute(election_types, else_=ibis.null())\n\n\ncleaned = cleaned.mutate(election_type=get_election_type(_.TRANSACTION_PGI)).drop(\n    \"TRANSACTION_PGI\"\n)\ncleaned\n```\n\n::: {.cell-output .cell-output-display execution_count=14}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> CITY        </span>┃<span style=\"font-weight: bold\"> STATE  </span>┃<span style=\"font-weight: bold\"> TRANSACTION_AMT </span>┃<span style=\"font-weight: bold\"> CMTE_NM                                                  </span>┃<span style=\"font-weight: bold\"> date       </span>┃<span style=\"font-weight: bold\"> election_type </span>┃\n┡━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>           │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>                                                   │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">date</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │\n├─────────────┼────────┼─────────────────┼──────────────────────────────────────────────────────────┼────────────┼───────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">BURR RIDGE </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">IL    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">TCF FINANCIAL CORPORATION PAC                           </span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-03-17</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">EDINA      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MN    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">150</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">TCF FINANCIAL CORPORATION PAC                           </span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-03-17</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">ORONO      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MN    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">416</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">TCF FINANCIAL CORPORATION PAC                           </span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-03-17</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">SHAKOPEE   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MN    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">125</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">TCF FINANCIAL CORPORATION PAC                           </span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-03-17</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">MINNEAPOLIS</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MN    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">125</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">TCF FINANCIAL CORPORATION PAC                           </span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-03-17</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">LAKE ELMO  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MN    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">75</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">TCF FINANCIAL CORPORATION PAC                           </span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-03-17</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">NASHVILLE  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">TN    </span> │            <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5000</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MAKING A RESPONSIBLE STAND FOR HOUSEHOLDS IN AMERICA PAC</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-03-08</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">BRIDGEVILLE</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">PA    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">95</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CONSOL ENERGY INC. PAC                                  </span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-03-31</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">MCMURRAY   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">PA    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">112</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CONSOL ENERGY INC. PAC                                  </span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-03-31</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">PITTSBURGH </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">PA    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">103</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CONSOL ENERGY INC. PAC                                  </span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-03-31</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>           │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │               <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                                                        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>             │\n└─────────────┴────────┴─────────────────┴──────────────────────────────────────────────────────────┴────────────┴───────────────┘\n</pre>\n```\n:::\n:::\n\n\nThat worked well! There are 0 nulls in the resulting column, so we always were\nable to determine the election type.\n\n::: {#149a0f14 .cell execution_count=15}\n``` {.python .cell-code}\ncleaned.election_type.topk(10)\n```\n\n::: {.cell-output .cell-output-display execution_count=15}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> election_type </span>┃<span style=\"font-weight: bold\"> CountStar() </span>┃\n┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>       │\n├───────────────┼─────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">19061953</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2216685</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">other        </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">161965</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">special      </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">149572</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">runoff       </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">69637</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">convention   </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">22453</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">recount      </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5063</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>          │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">664</span> │\n└───────────────┴─────────────┘\n</pre>\n```\n:::\n:::\n\n\nAbout 1/20 of transactions are negative. These could represent refunds, or they\ncould be data entry errors. Let's drop them to keep it simple.\n\n::: {#7b0f4736 .cell execution_count=16}\n``` {.python .cell-code}\nabove_zero = cleaned.TRANSACTION_AMT > 0\ncleaned = cleaned[above_zero]\nabove_zero.value_counts()\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> Greater(TRANSACTION_AMT, 0) </span>┃<span style=\"font-weight: bold\"> Greater(TRANSACTION_AMT, 0)_count </span>┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">boolean</span>                     │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>                             │\n├─────────────────────────────┼───────────────────────────────────┤\n│ False                       │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1018183</span> │\n│ True                        │                          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">20669809</span> │\n└─────────────────────────────┴───────────────────────────────────┘\n</pre>\n```\n:::\n:::\n\n\n## Adding Features\n\nNow that the data is cleaned up to a usable format, let's add some features.\n\nFirst, it's useful to categorize donations by size, placing them into buckets\nof small, medium, large, etc.\n\n::: {#4ff18f22 .cell execution_count=17}\n``` {.python .cell-code}\nedges = [\n    10,\n    50,\n    100,\n    500,\n    1000,\n    5000,\n]\nlabels = [\n    \"<10\",\n    \"10-50\",\n    \"50-100\",\n    \"100-500\",\n    \"500-1000\",\n    \"1000-5000\",\n    \"5000+\",\n]\n\n\ndef bucketize(vals, edges, str_labels):\n    # Uses Ibis's .bucket() method to create a categorical column\n    int_labels = vals.bucket(edges, include_under=True, include_over=True)\n    # Map the integer labels to the string labels\n    int_to_str = {str(i): s for i, s in enumerate(str_labels)}\n    return int_labels.cast(str).substitute(int_to_str)\n\n\nfeatured = cleaned.mutate(amount_bucket=bucketize(_.TRANSACTION_AMT, edges, labels))\nfeatured\n```\n\n::: {.cell-output .cell-output-display execution_count=17}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> CITY          </span>┃<span style=\"font-weight: bold\"> STATE  </span>┃<span style=\"font-weight: bold\"> TRANSACTION_AMT </span>┃<span style=\"font-weight: bold\"> CMTE_NM                  </span>┃<span style=\"font-weight: bold\"> date       </span>┃<span style=\"font-weight: bold\"> election_type </span>┃<span style=\"font-weight: bold\"> amount_bucket </span>┃\n┡━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>           │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>                   │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">date</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │\n├───────────────┼────────┼─────────────────┼──────────────────────────┼────────────┼───────────────┼───────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">RIVERSIDE    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CA    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">150</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MARK TAKANO FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-07-29</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">RIVERSIDE    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CA    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">150</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MARK TAKANO FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-08-31</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">SAN JACINTO  </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CA    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MARK TAKANO FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-07-31</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">10-50        </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">RIVERSIDE    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CA    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MARK TAKANO FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-08-22</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">10-50        </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">MORENO VALLEY</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CA    </span> │              <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MARK TAKANO FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-09-16</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">10-50        </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">MORENO VALLEY</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CA    </span> │               <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MARK TAKANO FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-09-15</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">&lt;10          </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">RIVERSIDE    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CA    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MARK TAKANO FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-07-31</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">RIVERSIDE    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CA    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MARK TAKANO FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-08-31</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">RIVERSIDE    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CA    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MARK TAKANO FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-09-30</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">RIVERSIDE    </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">CA    </span> │             <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">MARK TAKANO FOR CONGRESS</span> │ <span style=\"color: #800080; text-decoration-color: #800080\">2017-08-30</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>             │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>      │               <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>          │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>             │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>             │\n└───────────────┴────────┴─────────────────┴──────────────────────────┴────────────┴───────────────┴───────────────┘\n</pre>\n```\n:::\n:::\n\n\n## Analysis\n\n### By donation size\n\nOne thing we can look at is the donation breakdown by size:\n- Are most donations small or large?\n- Where do politicians/committees get most of their money from? Large or small donations?\n\nWe also will compare performance of Ibis vs pandas during this groupby.\n\n::: {#ae7c190a .cell execution_count=18}\n``` {.python .cell-code}\ndef summary_by(table, by):\n    return table.group_by(by).agg(\n        n_donations=_.count(),\n        total_amount=_.TRANSACTION_AMT.sum(),\n        mean_amount=_.TRANSACTION_AMT.mean(),\n        median_amount=_.TRANSACTION_AMT.approx_median(),\n    )\n\n\ndef summary_by_pandas(df, by):\n    return df.groupby(by, as_index=False).agg(\n        n_donations=(\"election_type\", \"count\"),\n        total_amount=(\"TRANSACTION_AMT\", \"sum\"),\n        mean_amount=(\"TRANSACTION_AMT\", \"mean\"),\n        median_amount=(\"TRANSACTION_AMT\", \"median\"),\n    )\n\n\n# persist the input data so the following timings of the group_by are accurate.\nsubset = featured[\"election_type\", \"amount_bucket\", \"TRANSACTION_AMT\"]\nsubset = subset.cache()\npandas_subset = subset.execute()\n```\n:::\n\n\nLet's take a look at what we are actually computing:\n\n::: {#c1a800c0 .cell execution_count=19}\n``` {.python .cell-code}\nby_type_and_bucket = summary_by(subset, [\"election_type\", \"amount_bucket\"])\nby_type_and_bucket\n```\n\n::: {.cell-output .cell-output-display execution_count=19}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> election_type </span>┃<span style=\"font-weight: bold\"> amount_bucket </span>┃<span style=\"font-weight: bold\"> n_donations </span>┃<span style=\"font-weight: bold\"> total_amount </span>┃<span style=\"font-weight: bold\"> mean_amount </span>┃<span style=\"font-weight: bold\"> median_amount </span>┃\n┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>     │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>       │\n├───────────────┼───────────────┼─────────────┼──────────────┼─────────────┼───────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">special      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">5000+        </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">129</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">788712</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6114.046512</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5400.0</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>          │ <span style=\"color: #008000; text-decoration-color: #008000\">1000-5000    </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">116</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">228657</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1971.181034</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1300.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">other        </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">630</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">117988</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">187.282540</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">192.0</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>          │ <span style=\"color: #008000; text-decoration-color: #008000\">&lt;10          </span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">24</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">108</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4.500000</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5.0</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>          │ <span style=\"color: #008000; text-decoration-color: #008000\">10-50        </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">151</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3167</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">20.973510</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">convention   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">5000+        </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">219</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1590300</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7261.643836</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8100.0</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>          │ <span style=\"color: #008000; text-decoration-color: #008000\">50-100       </span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">36</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1880</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">52.222222</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">50.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">special      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">500-1000     </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7811</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4003293</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">512.519908</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">runoff       </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">18193</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3088289</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">169.751498</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">convention   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">500-1000     </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1824</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">945321</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">518.268092</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500.0</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>             │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>             │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │            <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │             <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │\n└───────────────┴───────────────┴─────────────┴──────────────┴─────────────┴───────────────┘\n</pre>\n```\n:::\n:::\n\n\nOK, now let's do our timings.\n\nOne interesting thing to pay attention to here is the execution time for the following\ngroupby. Before, we could get away with lazy execution: because we only wanted to preview\nthe first few rows, we only had to compute the first few rows, so all our previews were\nvery fast.\n\nBut now, as soon as we do a groupby, we have to actually go through the whole dataset\nin order to compute the aggregate per group. So this is going to be slower. BUT,\nduckdb is still quite fast. It only takes milliseconds to groupby-agg all 20 million rows!\n\n::: {#bf433983 .cell execution_count=20}\n``` {.python .cell-code}\n%timeit summary_by(subset, [\"election_type\", \"amount_bucket\"]).execute()  # .execute() so we actually fetch the data\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n84.4 ms ± 325 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n```\n:::\n:::\n\n\nNow let's try the same thing in pandas:\n\n::: {#cdccea7d .cell execution_count=21}\n``` {.python .cell-code}\n%timeit summary_by_pandas(pandas_subset, [\"election_type\", \"amount_bucket\"])\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n3.75 s ± 32.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n```\n:::\n:::\n\n\nIt takes about 4 seconds, which is about 10 times slower than duckdb.\nAt this scale, it again doesn't matter,\nbut you could imagine with a dataset much larger than this, it would matter.\n\nLet's also think about memory usage:\n\n::: {#395df2f1 .cell execution_count=22}\n``` {.python .cell-code}\npandas_subset.memory_usage(deep=True).sum() / 1e9  # GB\n```\n\n::: {.cell-output .cell-output-display execution_count=22}\n```\nnp.float64(2.451874995)\n```\n:::\n:::\n\n\nThe source dataframe is couple gigabytes, so probably during the groupby,\nthe peak memory usage is going to be a bit higher than this. You could use a profiler\nsuch as [FIL](https://github.com/pythonspeed/filprofiler) if you wanted an exact number,\nI was too lazy to use that here.\n\nAgain, this works on my laptop at this dataset size, but much larger than this and I'd\nstart having problems. Duckdb on the other hand is designed around working out of core\nso it should scale to datasets into the hundreds of gigabytes, much larger than your\ncomputer's RAM.\n\n### Back to analysis\n\nOK, let's plot the result of that groupby.\n\nSurprise! (Or maybe not...) Most donations are small. But most of the money comes\nfrom donations larger than $1000.\n\nWell if that's the case, why do politicians spend so much time soliciting small\ndonations? One explanation is that they can use the number of donations\nas a marketing pitch, to show how popular they are, and thus how viable of a\ncandidate they are.\n\nThis also might explain whose interests are being served by our politicians.\n\n::: {#d0950c01 .cell execution_count=23}\n``` {.python .cell-code}\nimport altair as alt\n\n# Do some bookkeeping so the buckets are displayed smallest to largest on the charts\nbucket_col = alt.Column(\"amount_bucket:N\", sort=labels)\n\nn_by_bucket = (\n    alt.Chart(by_type_and_bucket.execute())\n    .mark_bar()\n    .encode(\n        x=bucket_col,\n        y=\"n_donations:Q\",\n        color=\"election_type:N\",\n    )\n)\ntotal_by_bucket = (\n    alt.Chart(by_type_and_bucket.execute())\n    .mark_bar()\n    .encode(\n        x=bucket_col,\n        y=\"total_amount:Q\",\n        color=\"election_type:N\",\n    )\n)\nn_by_bucket | total_by_bucket\n```\n\n::: {.cell-output .cell-output-display execution_count=23}\n```{=html}\n\n<style>\n  #altair-viz-79ec15eedd5449aba40cc26f9180234e.vega-embed {\n    width: 100%;\n    display: flex;\n  }\n\n  #altair-viz-79ec15eedd5449aba40cc26f9180234e.vega-embed details,\n  #altair-viz-79ec15eedd5449aba40cc26f9180234e.vega-embed details summary {\n    position: relative;\n  }\n</style>\n<div id=\"altair-viz-79ec15eedd5449aba40cc26f9180234e\"></div>\n<script type=\"text/javascript\">\n  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n  (function(spec, embedOpt){\n    let outputDiv = document.currentScript.previousElementSibling;\n    if (outputDiv.id !== \"altair-viz-79ec15eedd5449aba40cc26f9180234e\") {\n      outputDiv = document.getElementById(\"altair-viz-79ec15eedd5449aba40cc26f9180234e\");\n    }\n\n    const paths = {\n      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.20.1?noext\",\n      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n    };\n\n    function maybeLoadScript(lib, version) {\n      var key = `${lib.replace(\"-\", \"\")}_version`;\n      return (VEGA_DEBUG[key] == version) ?\n        Promise.resolve(paths[lib]) :\n        new Promise(function(resolve, reject) {\n          var s = document.createElement('script');\n          document.getElementsByTagName(\"head\")[0].appendChild(s);\n          s.async = true;\n          s.onload = () => {\n            VEGA_DEBUG[key] = version;\n            return resolve(paths[lib]);\n          };\n          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n          s.src = paths[lib];\n        });\n    }\n\n    function showError(err) {\n      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n      throw err;\n    }\n\n    function displayChart(vegaEmbed) {\n      vegaEmbed(outputDiv, spec, embedOpt)\n        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n    }\n\n    if(typeof define === \"function\" && define.amd) {\n      requirejs.config({paths});\n      let deps = [\"vega-embed\"];\n      require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));\n    } else {\n      maybeLoadScript(\"vega\", \"5\")\n        .then(() => maybeLoadScript(\"vega-lite\", \"5.20.1\"))\n        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n        .catch(showError)\n        .then(() => displayChart(vegaEmbed));\n    }\n  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"hconcat\": [{\"data\": {\"name\": \"data-97feed1dc52ea1a8f29e5f305302c418\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"color\": {\"field\": \"election_type\", \"type\": \"nominal\"}, \"x\": {\"field\": \"amount_bucket\", \"sort\": [\"<10\", \"10-50\", \"50-100\", \"100-500\", \"500-1000\", \"1000-5000\", \"5000+\"], \"type\": \"nominal\"}, \"y\": {\"field\": \"n_donations\", \"type\": \"quantitative\"}}}, {\"data\": {\"name\": \"data-a9ac8e74ea2f41fb4431d4c04f9b24b3\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"color\": {\"field\": \"election_type\", \"type\": \"nominal\"}, \"x\": {\"field\": \"amount_bucket\", \"sort\": [\"<10\", \"10-50\", \"50-100\", \"100-500\", \"500-1000\", \"1000-5000\", \"5000+\"], \"type\": \"nominal\"}, \"y\": {\"field\": \"total_amount\", \"type\": \"quantitative\"}}}], \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.20.1.json\", \"datasets\": {\"data-97feed1dc52ea1a8f29e5f305302c418\": [{\"election_type\": \"special\", \"amount_bucket\": \"5000+\", \"n_donations\": 129, \"total_amount\": 788712, \"mean_amount\": 6114.046511627907, \"median_amount\": 5400.0}, {\"election_type\": \"convention\", \"amount_bucket\": \"5000+\", \"n_donations\": 219, \"total_amount\": 1590300, \"mean_amount\": 7261.643835616438, \"median_amount\": 8100.0}, {\"election_type\": \"other\", \"amount_bucket\": \"100-500\", \"n_donations\": 630, \"total_amount\": 117988, \"mean_amount\": 187.2825396825397, \"median_amount\": 192.0}, {\"election_type\": null, \"amount_bucket\": \"<10\", \"n_donations\": 24, \"total_amount\": 108, \"mean_amount\": 4.5, \"median_amount\": 5.0}, {\"election_type\": null, \"amount_bucket\": \"10-50\", \"n_donations\": 151, \"total_amount\": 3167, \"mean_amount\": 20.973509933774835, \"median_amount\": 25.0}, {\"election_type\": null, \"amount_bucket\": \"1000-5000\", \"n_donations\": 116, \"total_amount\": 228657, \"mean_amount\": 1971.1810344827586, \"median_amount\": 1300.0}, {\"election_type\": null, \"amount_bucket\": \"50-100\", \"n_donations\": 36, \"total_amount\": 1880, \"mean_amount\": 52.22222222222222, \"median_amount\": 50.0}, {\"election_type\": \"runoff\", \"amount_bucket\": \"100-500\", \"n_donations\": 18193, \"total_amount\": 3088289, \"mean_amount\": 169.75149782883526, \"median_amount\": 100.0}, {\"election_type\": \"special\", \"amount_bucket\": \"500-1000\", \"n_donations\": 7811, \"total_amount\": 4003293, \"mean_amount\": 512.5199078223019, \"median_amount\": 500.0}, {\"election_type\": \"convention\", \"amount_bucket\": \"500-1000\", \"n_donations\": 1824, \"total_amount\": 945321, \"mean_amount\": 518.2680921052631, \"median_amount\": 500.0}, {\"election_type\": \"general\", \"amount_bucket\": \"100-500\", \"n_donations\": 700821, \"total_amount\": 123174568, \"mean_amount\": 175.75753009684357, \"median_amount\": 150.0}, {\"election_type\": null, \"amount_bucket\": \"500-1000\", \"n_donations\": 89, \"total_amount\": 48290, \"mean_amount\": 542.5842696629213, \"median_amount\": 500.0}, {\"election_type\": \"primary\", \"amount_bucket\": \"500-1000\", \"n_donations\": 634677, \"total_amount\": 334630687, \"mean_amount\": 527.2456493618014, \"median_amount\": 500.0}, {\"election_type\": \"general\", \"amount_bucket\": \"5000+\", \"n_donations\": 3125, \"total_amount\": 44496373, \"mean_amount\": 14238.83936, \"median_amount\": 7525.0}, {\"election_type\": \"runoff\", \"amount_bucket\": \"10-50\", \"n_donations\": 20166, \"total_amount\": 461107, \"mean_amount\": 22.865565803828225, \"median_amount\": 25.0}, {\"election_type\": \"runoff\", \"amount_bucket\": \"50-100\", \"n_donations\": 11578, \"total_amount\": 585827, \"mean_amount\": 50.59828986007946, \"median_amount\": 50.0}, {\"election_type\": \"runoff\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 5196, \"total_amount\": 9601993, \"mean_amount\": 1847.958622016936, \"median_amount\": 1919.0}, {\"election_type\": \"runoff\", \"amount_bucket\": \"<10\", \"n_donations\": 10191, \"total_amount\": 49621, \"mean_amount\": 4.869100186439015, \"median_amount\": 5.0}, {\"election_type\": \"recount\", \"amount_bucket\": \"500-1000\", \"n_donations\": 494, \"total_amount\": 250960, \"mean_amount\": 508.0161943319838, \"median_amount\": 500.0}, {\"election_type\": \"primary\", \"amount_bucket\": \"5000+\", \"n_donations\": 44085, \"total_amount\": 1558371116, \"mean_amount\": 35349.237064761255, \"median_amount\": 10000.0}, {\"election_type\": \"general\", \"amount_bucket\": \"500-1000\", \"n_donations\": 174182, \"total_amount\": 91015697, \"mean_amount\": 522.5321617618354, \"median_amount\": 500.0}, {\"election_type\": null, \"amount_bucket\": \"100-500\", \"n_donations\": 195, \"total_amount\": 46746, \"mean_amount\": 239.72307692307692, \"median_amount\": 250.0}, {\"election_type\": \"other\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 235, \"total_amount\": 548212, \"mean_amount\": 2332.817021276596, \"median_amount\": 2633.0}, {\"election_type\": \"other\", \"amount_bucket\": \"<10\", \"n_donations\": 10993, \"total_amount\": 25816, \"mean_amount\": 2.3484035295187846, \"median_amount\": 1.0}, {\"election_type\": \"other\", \"amount_bucket\": \"10-50\", \"n_donations\": 2644, \"total_amount\": 64297, \"mean_amount\": 24.318078668683814, \"median_amount\": 23.0}, {\"election_type\": \"other\", \"amount_bucket\": \"50-100\", \"n_donations\": 451, \"total_amount\": 27149, \"mean_amount\": 60.19733924611973, \"median_amount\": 50.0}, {\"election_type\": \"recount\", \"amount_bucket\": \"5000+\", \"n_donations\": 26, \"total_amount\": 1888024, \"mean_amount\": 72616.30769230769, \"median_amount\": 101450.0}, {\"election_type\": \"primary\", \"amount_bucket\": \"100-500\", \"n_donations\": 3636287, \"total_amount\": 637353634, \"mean_amount\": 175.27594329050484, \"median_amount\": 150.0}, {\"election_type\": null, \"amount_bucket\": \"5000+\", \"n_donations\": 48, \"total_amount\": 1622455, \"mean_amount\": 33801.145833333336, \"median_amount\": 21731.0}, {\"election_type\": \"special\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 7935, \"total_amount\": 13493154, \"mean_amount\": 1700.4604914933836, \"median_amount\": 1001.0}, {\"election_type\": \"convention\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 2822, \"total_amount\": 4977314, \"mean_amount\": 1763.7540751240256, \"median_amount\": 1459.0}, {\"election_type\": \"convention\", \"amount_bucket\": \"10-50\", \"n_donations\": 6848, \"total_amount\": 141604, \"mean_amount\": 20.678154205607477, \"median_amount\": 25.0}, {\"election_type\": \"convention\", \"amount_bucket\": \"50-100\", \"n_donations\": 2966, \"total_amount\": 153281, \"mean_amount\": 51.67936614969656, \"median_amount\": 50.0}, {\"election_type\": \"convention\", \"amount_bucket\": \"<10\", \"n_donations\": 945, \"total_amount\": 4660, \"mean_amount\": 4.931216931216931, \"median_amount\": 5.0}, {\"election_type\": \"special\", \"amount_bucket\": \"10-50\", \"n_donations\": 51066, \"total_amount\": 1134616, \"mean_amount\": 22.21861904202405, \"median_amount\": 25.0}, {\"election_type\": \"special\", \"amount_bucket\": \"50-100\", \"n_donations\": 22859, \"total_amount\": 1177660, \"mean_amount\": 51.518439126820944, \"median_amount\": 50.0}, {\"election_type\": \"special\", \"amount_bucket\": \"<10\", \"n_donations\": 25115, \"total_amount\": 122898, \"mean_amount\": 4.893410312562214, \"median_amount\": 5.0}, {\"election_type\": \"recount\", \"amount_bucket\": \"100-500\", \"n_donations\": 2232, \"total_amount\": 413753, \"mean_amount\": 185.37320788530465, \"median_amount\": 200.0}, {\"election_type\": \"general\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 246101, \"total_amount\": 460025242, \"mean_amount\": 1869.2538510611496, \"median_amount\": 1960.0}, {\"election_type\": \"general\", \"amount_bucket\": \"10-50\", \"n_donations\": 660787, \"total_amount\": 14411588, \"mean_amount\": 21.809732939661345, \"median_amount\": 25.0}, {\"election_type\": \"general\", \"amount_bucket\": \"50-100\", \"n_donations\": 304363, \"total_amount\": 16184312, \"mean_amount\": 53.174374020495264, \"median_amount\": 50.0}, {\"election_type\": \"general\", \"amount_bucket\": \"<10\", \"n_donations\": 115873, \"total_amount\": 536742, \"mean_amount\": 4.632157620843509, \"median_amount\": 5.0}, {\"election_type\": \"runoff\", \"amount_bucket\": \"5000+\", \"n_donations\": 37, \"total_amount\": 211400, \"mean_amount\": 5713.513513513513, \"median_amount\": 5400.0}, {\"election_type\": \"other\", \"amount_bucket\": \"500-1000\", \"n_donations\": 119, \"total_amount\": 62535, \"mean_amount\": 525.5042016806723, \"median_amount\": 500.0}, {\"election_type\": \"primary\", \"amount_bucket\": \"<10\", \"n_donations\": 2423728, \"total_amount\": 10080721, \"mean_amount\": 4.159179990493983, \"median_amount\": 5.0}, {\"election_type\": \"primary\", \"amount_bucket\": \"50-100\", \"n_donations\": 2663933, \"total_amount\": 155426540, \"mean_amount\": 58.34476317535013, \"median_amount\": 50.0}, {\"election_type\": \"primary\", \"amount_bucket\": \"10-50\", \"n_donations\": 8115403, \"total_amount\": 187666251, \"mean_amount\": 23.12469892129818, \"median_amount\": 25.0}, {\"election_type\": \"primary\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 684755, \"total_amount\": 1231394874, \"mean_amount\": 1798.2999379340056, \"median_amount\": 1007.0}, {\"election_type\": \"convention\", \"amount_bucket\": \"100-500\", \"n_donations\": 6350, \"total_amount\": 1097843, \"mean_amount\": 172.88866141732282, \"median_amount\": 138.0}, {\"election_type\": \"special\", \"amount_bucket\": \"100-500\", \"n_donations\": 34497, \"total_amount\": 5943498, \"mean_amount\": 172.29028611183583, \"median_amount\": 118.0}, {\"election_type\": \"runoff\", \"amount_bucket\": \"500-1000\", \"n_donations\": 4117, \"total_amount\": 2110393, \"mean_amount\": 512.6045664318679, \"median_amount\": 500.0}, {\"election_type\": \"recount\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 589, \"total_amount\": 1113150, \"mean_amount\": 1889.8981324278438, \"median_amount\": 1965.0}, {\"election_type\": \"recount\", \"amount_bucket\": \"<10\", \"n_donations\": 110, \"total_amount\": 569, \"mean_amount\": 5.172727272727273, \"median_amount\": 5.0}, {\"election_type\": \"recount\", \"amount_bucket\": \"10-50\", \"n_donations\": 883, \"total_amount\": 20860, \"mean_amount\": 23.62400906002265, \"median_amount\": 25.0}, {\"election_type\": \"recount\", \"amount_bucket\": \"50-100\", \"n_donations\": 712, \"total_amount\": 38450, \"mean_amount\": 54.002808988764045, \"median_amount\": 50.0}, {\"election_type\": \"other\", \"amount_bucket\": \"5000+\", \"n_donations\": 48, \"total_amount\": 1901300, \"mean_amount\": 39610.416666666664, \"median_amount\": 16950.0}], \"data-a9ac8e74ea2f41fb4431d4c04f9b24b3\": [{\"election_type\": \"other\", \"amount_bucket\": \"100-500\", \"n_donations\": 630, \"total_amount\": 117988, \"mean_amount\": 187.2825396825397, \"median_amount\": 192.0}, {\"election_type\": \"convention\", \"amount_bucket\": \"5000+\", \"n_donations\": 219, \"total_amount\": 1590300, \"mean_amount\": 7261.643835616438, \"median_amount\": 8100.0}, {\"election_type\": null, \"amount_bucket\": \"1000-5000\", \"n_donations\": 116, \"total_amount\": 228657, \"mean_amount\": 1971.1810344827586, \"median_amount\": 1300.0}, {\"election_type\": \"special\", \"amount_bucket\": \"5000+\", \"n_donations\": 129, \"total_amount\": 788712, \"mean_amount\": 6114.046511627907, \"median_amount\": 5400.0}, {\"election_type\": null, \"amount_bucket\": \"<10\", \"n_donations\": 24, \"total_amount\": 108, \"mean_amount\": 4.5, \"median_amount\": 5.0}, {\"election_type\": null, \"amount_bucket\": \"10-50\", \"n_donations\": 151, \"total_amount\": 3167, \"mean_amount\": 20.973509933774835, \"median_amount\": 25.0}, {\"election_type\": null, \"amount_bucket\": \"50-100\", \"n_donations\": 36, \"total_amount\": 1880, \"mean_amount\": 52.22222222222222, \"median_amount\": 50.0}, {\"election_type\": \"runoff\", \"amount_bucket\": \"100-500\", \"n_donations\": 18193, \"total_amount\": 3088289, \"mean_amount\": 169.75149782883526, \"median_amount\": 101.0}, {\"election_type\": \"special\", \"amount_bucket\": \"500-1000\", \"n_donations\": 7811, \"total_amount\": 4003293, \"mean_amount\": 512.5199078223019, \"median_amount\": 500.0}, {\"election_type\": \"convention\", \"amount_bucket\": \"500-1000\", \"n_donations\": 1824, \"total_amount\": 945321, \"mean_amount\": 518.2680921052631, \"median_amount\": 500.0}, {\"election_type\": \"general\", \"amount_bucket\": \"100-500\", \"n_donations\": 700821, \"total_amount\": 123174568, \"mean_amount\": 175.75753009684357, \"median_amount\": 150.0}, {\"election_type\": null, \"amount_bucket\": \"500-1000\", \"n_donations\": 89, \"total_amount\": 48290, \"mean_amount\": 542.5842696629213, \"median_amount\": 500.0}, {\"election_type\": \"primary\", \"amount_bucket\": \"500-1000\", \"n_donations\": 634677, \"total_amount\": 334630687, \"mean_amount\": 527.2456493618014, \"median_amount\": 500.0}, {\"election_type\": \"general\", \"amount_bucket\": \"5000+\", \"n_donations\": 3125, \"total_amount\": 44496373, \"mean_amount\": 14238.83936, \"median_amount\": 7554.0}, {\"election_type\": \"runoff\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 5196, \"total_amount\": 9601993, \"mean_amount\": 1847.958622016936, \"median_amount\": 1892.0}, {\"election_type\": \"runoff\", \"amount_bucket\": \"10-50\", \"n_donations\": 20166, \"total_amount\": 461107, \"mean_amount\": 22.865565803828225, \"median_amount\": 25.0}, {\"election_type\": \"runoff\", \"amount_bucket\": \"50-100\", \"n_donations\": 11578, \"total_amount\": 585827, \"mean_amount\": 50.59828986007946, \"median_amount\": 50.0}, {\"election_type\": \"runoff\", \"amount_bucket\": \"<10\", \"n_donations\": 10191, \"total_amount\": 49621, \"mean_amount\": 4.869100186439015, \"median_amount\": 5.0}, {\"election_type\": \"recount\", \"amount_bucket\": \"500-1000\", \"n_donations\": 494, \"total_amount\": 250960, \"mean_amount\": 508.0161943319838, \"median_amount\": 500.0}, {\"election_type\": \"primary\", \"amount_bucket\": \"5000+\", \"n_donations\": 44085, \"total_amount\": 1558371116, \"mean_amount\": 35349.237064761255, \"median_amount\": 10000.0}, {\"election_type\": \"general\", \"amount_bucket\": \"500-1000\", \"n_donations\": 174182, \"total_amount\": 91015697, \"mean_amount\": 522.5321617618354, \"median_amount\": 500.0}, {\"election_type\": \"other\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 235, \"total_amount\": 548212, \"mean_amount\": 2332.817021276596, \"median_amount\": 2633.0}, {\"election_type\": \"recount\", \"amount_bucket\": \"5000+\", \"n_donations\": 26, \"total_amount\": 1888024, \"mean_amount\": 72616.30769230769, \"median_amount\": 101450.0}, {\"election_type\": \"other\", \"amount_bucket\": \"10-50\", \"n_donations\": 2644, \"total_amount\": 64297, \"mean_amount\": 24.318078668683814, \"median_amount\": 23.0}, {\"election_type\": \"other\", \"amount_bucket\": \"50-100\", \"n_donations\": 451, \"total_amount\": 27149, \"mean_amount\": 60.19733924611973, \"median_amount\": 50.0}, {\"election_type\": null, \"amount_bucket\": \"100-500\", \"n_donations\": 195, \"total_amount\": 46746, \"mean_amount\": 239.72307692307692, \"median_amount\": 250.0}, {\"election_type\": \"other\", \"amount_bucket\": \"<10\", \"n_donations\": 10993, \"total_amount\": 25816, \"mean_amount\": 2.3484035295187846, \"median_amount\": 1.0}, {\"election_type\": \"primary\", \"amount_bucket\": \"100-500\", \"n_donations\": 3636287, \"total_amount\": 637353634, \"mean_amount\": 175.27594329050484, \"median_amount\": 150.0}, {\"election_type\": \"special\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 7935, \"total_amount\": 13493154, \"mean_amount\": 1700.4604914933836, \"median_amount\": 1000.0}, {\"election_type\": \"convention\", \"amount_bucket\": \"10-50\", \"n_donations\": 6848, \"total_amount\": 141604, \"mean_amount\": 20.678154205607477, \"median_amount\": 25.0}, {\"election_type\": \"convention\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 2822, \"total_amount\": 4977314, \"mean_amount\": 1763.7540751240256, \"median_amount\": 1445.0}, {\"election_type\": \"convention\", \"amount_bucket\": \"50-100\", \"n_donations\": 2966, \"total_amount\": 153281, \"mean_amount\": 51.67936614969656, \"median_amount\": 50.0}, {\"election_type\": \"convention\", \"amount_bucket\": \"<10\", \"n_donations\": 945, \"total_amount\": 4660, \"mean_amount\": 4.931216931216931, \"median_amount\": 5.0}, {\"election_type\": null, \"amount_bucket\": \"5000+\", \"n_donations\": 48, \"total_amount\": 1622455, \"mean_amount\": 33801.145833333336, \"median_amount\": 21731.0}, {\"election_type\": \"special\", \"amount_bucket\": \"50-100\", \"n_donations\": 22859, \"total_amount\": 1177660, \"mean_amount\": 51.518439126820944, \"median_amount\": 50.0}, {\"election_type\": \"special\", \"amount_bucket\": \"10-50\", \"n_donations\": 51066, \"total_amount\": 1134616, \"mean_amount\": 22.21861904202405, \"median_amount\": 25.0}, {\"election_type\": \"special\", \"amount_bucket\": \"<10\", \"n_donations\": 25115, \"total_amount\": 122898, \"mean_amount\": 4.893410312562214, \"median_amount\": 5.0}, {\"election_type\": \"recount\", \"amount_bucket\": \"100-500\", \"n_donations\": 2232, \"total_amount\": 413753, \"mean_amount\": 185.37320788530465, \"median_amount\": 200.0}, {\"election_type\": \"general\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 246101, \"total_amount\": 460025242, \"mean_amount\": 1869.2538510611496, \"median_amount\": 1980.0}, {\"election_type\": \"general\", \"amount_bucket\": \"50-100\", \"n_donations\": 304363, \"total_amount\": 16184312, \"mean_amount\": 53.174374020495264, \"median_amount\": 50.0}, {\"election_type\": \"general\", \"amount_bucket\": \"10-50\", \"n_donations\": 660787, \"total_amount\": 14411588, \"mean_amount\": 21.809732939661345, \"median_amount\": 25.0}, {\"election_type\": \"general\", \"amount_bucket\": \"<10\", \"n_donations\": 115873, \"total_amount\": 536742, \"mean_amount\": 4.632157620843509, \"median_amount\": 5.0}, {\"election_type\": \"other\", \"amount_bucket\": \"500-1000\", \"n_donations\": 119, \"total_amount\": 62535, \"mean_amount\": 525.5042016806723, \"median_amount\": 500.0}, {\"election_type\": \"runoff\", \"amount_bucket\": \"5000+\", \"n_donations\": 37, \"total_amount\": 211400, \"mean_amount\": 5713.513513513513, \"median_amount\": 5400.0}, {\"election_type\": \"primary\", \"amount_bucket\": \"10-50\", \"n_donations\": 8115403, \"total_amount\": 187666251, \"mean_amount\": 23.12469892129818, \"median_amount\": 25.0}, {\"election_type\": \"primary\", \"amount_bucket\": \"<10\", \"n_donations\": 2423728, \"total_amount\": 10080721, \"mean_amount\": 4.159179990493983, \"median_amount\": 5.0}, {\"election_type\": \"primary\", \"amount_bucket\": \"50-100\", \"n_donations\": 2663933, \"total_amount\": 155426540, \"mean_amount\": 58.34476317535013, \"median_amount\": 50.0}, {\"election_type\": \"primary\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 684755, \"total_amount\": 1231394874, \"mean_amount\": 1798.2999379340056, \"median_amount\": 1012.0}, {\"election_type\": \"runoff\", \"amount_bucket\": \"500-1000\", \"n_donations\": 4117, \"total_amount\": 2110393, \"mean_amount\": 512.6045664318679, \"median_amount\": 500.0}, {\"election_type\": \"special\", \"amount_bucket\": \"100-500\", \"n_donations\": 34497, \"total_amount\": 5943498, \"mean_amount\": 172.29028611183583, \"median_amount\": 118.0}, {\"election_type\": \"convention\", \"amount_bucket\": \"100-500\", \"n_donations\": 6350, \"total_amount\": 1097843, \"mean_amount\": 172.88866141732282, \"median_amount\": 135.0}, {\"election_type\": \"recount\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 589, \"total_amount\": 1113150, \"mean_amount\": 1889.8981324278438, \"median_amount\": 1965.0}, {\"election_type\": \"recount\", \"amount_bucket\": \"<10\", \"n_donations\": 110, \"total_amount\": 569, \"mean_amount\": 5.172727272727273, \"median_amount\": 5.0}, {\"election_type\": \"recount\", \"amount_bucket\": \"10-50\", \"n_donations\": 883, \"total_amount\": 20860, \"mean_amount\": 23.62400906002265, \"median_amount\": 25.0}, {\"election_type\": \"recount\", \"amount_bucket\": \"50-100\", \"n_donations\": 712, \"total_amount\": 38450, \"mean_amount\": 54.002808988764045, \"median_amount\": 50.0}, {\"election_type\": \"other\", \"amount_bucket\": \"5000+\", \"n_donations\": 48, \"total_amount\": 1901300, \"mean_amount\": 39610.416666666664, \"median_amount\": 16950.0}]}}, {\"mode\": \"vega-lite\"});\n</script>\n```\n:::\n:::\n\n\n### By election stage\n\nLet's look at how donations break down by election stage. Do people donate\ndifferently for primary elections vs general elections?\n\nLet's ignore everything but primary and general elections, since they are the\nmost common, and arguably the most important.\n\n::: {#ff87b11e .cell execution_count=24}\n``` {.python .cell-code}\ngb2 = by_type_and_bucket[_.election_type.isin((\"primary\", \"general\"))]\nn_donations_per_election_type = _.n_donations.sum().over(group_by=\"election_type\")\nfrac = _.n_donations / n_donations_per_election_type\ngb2 = gb2.mutate(frac_n_donations_per_election_type=frac)\ngb2\n```\n\n::: {.cell-output .cell-output-display execution_count=24}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> election_type </span>┃<span style=\"font-weight: bold\"> amount_bucket </span>┃<span style=\"font-weight: bold\"> n_donations </span>┃<span style=\"font-weight: bold\"> total_amount </span>┃<span style=\"font-weight: bold\"> mean_amount  </span>┃<span style=\"font-weight: bold\"> median_amount </span>┃<span style=\"font-weight: bold\"> frac_n_donations_per_election_type </span>┃\n┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>      │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>                            │\n├───────────────┼───────────────┼─────────────┼──────────────┼──────────────┼───────────────┼────────────────────────────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">5000+        </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3125</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">44496373</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">14238.839360</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7524.0</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.001417</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">700821</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">123174568</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">175.757530</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">149.0</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.317796</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">500-1000     </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">174182</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">91015697</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">522.532162</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500.0</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.078985</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">1000-5000    </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">246101</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">460025242</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1869.253851</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1964.0</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.111598</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">10-50        </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">660787</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">14411588</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">21.809733</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25.0</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.299642</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">50-100       </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">304363</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">16184312</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">53.174374</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">50.0</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.138017</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">general      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">&lt;10          </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">115873</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">536742</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4.632158</span> │           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5.0</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.052544</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">500-1000     </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">634677</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">334630687</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">527.245649</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500.0</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.034867</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">100-500      </span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3636287</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">637353634</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">175.275943</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">150.0</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.199765</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">primary      </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">5000+        </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">44085</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1558371116</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35349.237065</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10000.0</span> │                           <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.002422</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>             │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>             │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │            <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │            <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │             <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │                                  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │\n└───────────────┴───────────────┴─────────────┴──────────────┴──────────────┴───────────────┴────────────────────────────────────┘\n</pre>\n```\n:::\n:::\n\n\nIt looks like primary elections get a larger proportion of small donations.\n\n::: {#7a9eb10d .cell execution_count=25}\n``` {.python .cell-code}\nalt.Chart(gb2.execute()).mark_bar().encode(\n    x=\"election_type:O\",\n    y=\"frac_n_donations_per_election_type:Q\",\n    color=bucket_col,\n)\n```\n\n::: {.cell-output .cell-output-display execution_count=25}\n```{=html}\n\n<style>\n  #altair-viz-f918a064456a4c78aa2284dfeacb9cd1.vega-embed {\n    width: 100%;\n    display: flex;\n  }\n\n  #altair-viz-f918a064456a4c78aa2284dfeacb9cd1.vega-embed details,\n  #altair-viz-f918a064456a4c78aa2284dfeacb9cd1.vega-embed details summary {\n    position: relative;\n  }\n</style>\n<div id=\"altair-viz-f918a064456a4c78aa2284dfeacb9cd1\"></div>\n<script type=\"text/javascript\">\n  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n  (function(spec, embedOpt){\n    let outputDiv = document.currentScript.previousElementSibling;\n    if (outputDiv.id !== \"altair-viz-f918a064456a4c78aa2284dfeacb9cd1\") {\n      outputDiv = document.getElementById(\"altair-viz-f918a064456a4c78aa2284dfeacb9cd1\");\n    }\n\n    const paths = {\n      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.20.1?noext\",\n      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n    };\n\n    function maybeLoadScript(lib, version) {\n      var key = `${lib.replace(\"-\", \"\")}_version`;\n      return (VEGA_DEBUG[key] == version) ?\n        Promise.resolve(paths[lib]) :\n        new Promise(function(resolve, reject) {\n          var s = document.createElement('script');\n          document.getElementsByTagName(\"head\")[0].appendChild(s);\n          s.async = true;\n          s.onload = () => {\n            VEGA_DEBUG[key] = version;\n            return resolve(paths[lib]);\n          };\n          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n          s.src = paths[lib];\n        });\n    }\n\n    function showError(err) {\n      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n      throw err;\n    }\n\n    function displayChart(vegaEmbed) {\n      vegaEmbed(outputDiv, spec, embedOpt)\n        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n    }\n\n    if(typeof define === \"function\" && define.amd) {\n      requirejs.config({paths});\n      let deps = [\"vega-embed\"];\n      require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));\n    } else {\n      maybeLoadScript(\"vega\", \"5\")\n        .then(() => maybeLoadScript(\"vega-lite\", \"5.20.1\"))\n        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n        .catch(showError)\n        .then(() => displayChart(vegaEmbed));\n    }\n  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-b0c61205d1cd1fea4e9925ce2d1732a7\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"color\": {\"field\": \"amount_bucket\", \"sort\": [\"<10\", \"10-50\", \"50-100\", \"100-500\", \"500-1000\", \"1000-5000\", \"5000+\"], \"type\": \"nominal\"}, \"x\": {\"field\": \"election_type\", \"type\": \"ordinal\"}, \"y\": {\"field\": \"frac_n_donations_per_election_type\", \"type\": \"quantitative\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.20.1.json\", \"datasets\": {\"data-b0c61205d1cd1fea4e9925ce2d1732a7\": [{\"election_type\": \"general\", \"amount_bucket\": \"100-500\", \"n_donations\": 700821, \"total_amount\": 123174568, \"mean_amount\": 175.75753009684357, \"median_amount\": 149.0, \"frac_n_donations_per_election_type\": 0.3177963334802553}, {\"election_type\": \"general\", \"amount_bucket\": \"5000+\", \"n_donations\": 3125, \"total_amount\": 44496373, \"mean_amount\": 14238.83936, \"median_amount\": 7540.0, \"frac_n_donations_per_election_type\": 0.0014170716090496688}, {\"election_type\": \"general\", \"amount_bucket\": \"500-1000\", \"n_donations\": 174182, \"total_amount\": 91015697, \"mean_amount\": 522.5321617618354, \"median_amount\": 500.0, \"frac_n_donations_per_election_type\": 0.0789850774423966}, {\"election_type\": \"general\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 246101, \"total_amount\": 460025242, \"mean_amount\": 1869.2538510611496, \"median_amount\": 1970.0, \"frac_n_donations_per_election_type\": 0.1115976768187944}, {\"election_type\": \"general\", \"amount_bucket\": \"50-100\", \"n_donations\": 304363, \"total_amount\": 16184312, \"mean_amount\": 53.174374020495264, \"median_amount\": 50.0, \"frac_n_donations_per_election_type\": 0.13801733316645898}, {\"election_type\": \"general\", \"amount_bucket\": \"10-50\", \"n_donations\": 660787, \"total_amount\": 14411588, \"mean_amount\": 21.809732939661345, \"median_amount\": 25.0, \"frac_n_donations_per_election_type\": 0.2996423991453131}, {\"election_type\": \"general\", \"amount_bucket\": \"<10\", \"n_donations\": 115873, \"total_amount\": 536742, \"mean_amount\": 4.632157620843509, \"median_amount\": 5.0, \"frac_n_donations_per_election_type\": 0.052544108337731925}, {\"election_type\": \"primary\", \"amount_bucket\": \"500-1000\", \"n_donations\": 634677, \"total_amount\": 334630687, \"mean_amount\": 527.2456493618014, \"median_amount\": 500.0, \"frac_n_donations_per_election_type\": 0.03486686823197312}, {\"election_type\": \"primary\", \"amount_bucket\": \"100-500\", \"n_donations\": 3636287, \"total_amount\": 637353634, \"mean_amount\": 175.27594329050484, \"median_amount\": 150.0, \"frac_n_donations_per_election_type\": 0.1997645096366133}, {\"election_type\": \"primary\", \"amount_bucket\": \"5000+\", \"n_donations\": 44085, \"total_amount\": 1558371116, \"mean_amount\": 35349.237064761255, \"median_amount\": 10000.0, \"frac_n_donations_per_election_type\": 0.0024218711029492714}, {\"election_type\": \"primary\", \"amount_bucket\": \"50-100\", \"n_donations\": 2663933, \"total_amount\": 155426540, \"mean_amount\": 58.34476317535013, \"median_amount\": 50.0, \"frac_n_donations_per_election_type\": 0.14634688335925966}, {\"election_type\": \"primary\", \"amount_bucket\": \"1000-5000\", \"n_donations\": 684755, \"total_amount\": 1231394874, \"mean_amount\": 1798.2999379340056, \"median_amount\": 1007.0, \"frac_n_donations_per_election_type\": 0.037617973167744775}, {\"election_type\": \"primary\", \"amount_bucket\": \"<10\", \"n_donations\": 2423728, \"total_amount\": 10080721, \"mean_amount\": 4.159179990493983, \"median_amount\": 5.0, \"frac_n_donations_per_election_type\": 0.13315088589336582}, {\"election_type\": \"primary\", \"amount_bucket\": \"10-50\", \"n_donations\": 8115403, \"total_amount\": 187666251, \"mean_amount\": 23.12469892129818, \"median_amount\": 25.0, \"frac_n_donations_per_election_type\": 0.44583100860809405}]}}, {\"mode\": \"vega-lite\"});\n</script>\n```\n:::\n:::\n\n\n### By recipient\n\nLet's look at the top players. Who gets the most donations?\n\nFar and away it is ActBlue, which acts as a conduit for donations to Democratic\ninterests.\n\nBeto O'Rourke is the top individual politician, hats off to him!\n\n::: {#54c2af64 .cell execution_count=26}\n``` {.python .cell-code}\nby_recip = summary_by(featured, \"CMTE_NM\")\nby_recip\n```\n\n::: {.cell-output .cell-output-display execution_count=26}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> CMTE_NM                                                                 </span>┃<span style=\"font-weight: bold\"> n_donations </span>┃<span style=\"font-weight: bold\"> total_amount </span>┃<span style=\"font-weight: bold\"> mean_amount </span>┃<span style=\"font-weight: bold\"> median_amount </span>┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>                                                                  │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>     │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>       │\n├─────────────────────────────────────────────────────────────────────────┼─────────────┼──────────────┼─────────────┼───────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">COMCAST CORPORATION &amp; NBCUNIVERSAL POLITICAL ACTION COMMITTEE - FEDERAL</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">73359</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3810028</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">51.936749</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">38.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">COMMITTEE TO ELECT SAMEENA MUSTAFA                                     </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">191</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">77893</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">407.816754</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">AIR CONDITIONING CONTRACTORS OF AMERICA PAC                            </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">206</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">40350</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">195.873786</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">RIGHTNOW WOMEN PAC                                                     </span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">85</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35653</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">419.447059</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">125.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">CAROL O'BRIEN FOR CONGRESS                                             </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">169</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">120377</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">712.289941</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">REPUBLICAN EXECUTIVE COMMITTEE OF VOLUSIA COUNTY                       </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">349</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">158987</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">455.550143</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">ELLISON FOR CONGRESS                                                   </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7718</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">972480</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">126.001555</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">DWIGHT EVANS FOR CONGRESS                                              </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1164</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">613535</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">527.091924</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">SAAD FOR CONGRESS                                                      </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">979</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">580008</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">592.449438</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">304.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">BEN CLINE FOR CONGRESS, INC.                                           </span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">897</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">635999</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">709.028986</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500.0</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                                                                       │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │            <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │             <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │\n└─────────────────────────────────────────────────────────────────────────┴─────────────┴──────────────┴─────────────┴───────────────┘\n</pre>\n```\n:::\n:::\n\n\n::: {#fca3108c .cell execution_count=27}\n``` {.python .cell-code}\ntop_recip = by_recip.order_by(ibis.desc(\"n_donations\")).head(10)\nalt.Chart(top_recip.execute()).mark_bar().encode(\n    x=alt.X(\"CMTE_NM:O\", sort=\"-y\"),\n    y=\"n_donations:Q\",\n)\n```\n\n::: {.cell-output .cell-output-display execution_count=27}\n```{=html}\n\n<style>\n  #altair-viz-19b632b4b1a64023854f5a4b194aac9e.vega-embed {\n    width: 100%;\n    display: flex;\n  }\n\n  #altair-viz-19b632b4b1a64023854f5a4b194aac9e.vega-embed details,\n  #altair-viz-19b632b4b1a64023854f5a4b194aac9e.vega-embed details summary {\n    position: relative;\n  }\n</style>\n<div id=\"altair-viz-19b632b4b1a64023854f5a4b194aac9e\"></div>\n<script type=\"text/javascript\">\n  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n  (function(spec, embedOpt){\n    let outputDiv = document.currentScript.previousElementSibling;\n    if (outputDiv.id !== \"altair-viz-19b632b4b1a64023854f5a4b194aac9e\") {\n      outputDiv = document.getElementById(\"altair-viz-19b632b4b1a64023854f5a4b194aac9e\");\n    }\n\n    const paths = {\n      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.20.1?noext\",\n      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n    };\n\n    function maybeLoadScript(lib, version) {\n      var key = `${lib.replace(\"-\", \"\")}_version`;\n      return (VEGA_DEBUG[key] == version) ?\n        Promise.resolve(paths[lib]) :\n        new Promise(function(resolve, reject) {\n          var s = document.createElement('script');\n          document.getElementsByTagName(\"head\")[0].appendChild(s);\n          s.async = true;\n          s.onload = () => {\n            VEGA_DEBUG[key] = version;\n            return resolve(paths[lib]);\n          };\n          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n          s.src = paths[lib];\n        });\n    }\n\n    function showError(err) {\n      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n      throw err;\n    }\n\n    function displayChart(vegaEmbed) {\n      vegaEmbed(outputDiv, spec, embedOpt)\n        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n    }\n\n    if(typeof define === \"function\" && define.amd) {\n      requirejs.config({paths});\n      let deps = [\"vega-embed\"];\n      require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));\n    } else {\n      maybeLoadScript(\"vega\", \"5\")\n        .then(() => maybeLoadScript(\"vega-lite\", \"5.20.1\"))\n        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n        .catch(showError)\n        .then(() => displayChart(vegaEmbed));\n    }\n  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-b61be6be068af56f659da9a91995911d\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"x\": {\"field\": \"CMTE_NM\", \"sort\": \"-y\", \"type\": \"ordinal\"}, \"y\": {\"field\": \"n_donations\", \"type\": \"quantitative\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.20.1.json\", \"datasets\": {\"data-b61be6be068af56f659da9a91995911d\": [{\"CMTE_NM\": \"ACTBLUE\", \"n_donations\": 5820888, \"total_amount\": 693057213, \"mean_amount\": 119.06382892094814, \"median_amount\": 25.0}, {\"CMTE_NM\": \"DCCC\", \"n_donations\": 1315476, \"total_amount\": 124802082, \"mean_amount\": 94.87218466927561, \"median_amount\": 25.0}, {\"CMTE_NM\": \"REPUBLICAN NATIONAL COMMITTEE\", \"n_donations\": 570561, \"total_amount\": 131525422, \"mean_amount\": 230.5194746924518, \"median_amount\": 50.0}, {\"CMTE_NM\": \"END CITIZENS UNITED\", \"n_donations\": 489710, \"total_amount\": 13654987, \"mean_amount\": 27.8838230789651, \"median_amount\": 15.0}, {\"CMTE_NM\": \"DSCC\", \"n_donations\": 347493, \"total_amount\": 67844824, \"mean_amount\": 195.2408365060591, \"median_amount\": 35.0}, {\"CMTE_NM\": \"PROGRESSIVE TURNOUT PROJECT\", \"n_donations\": 313433, \"total_amount\": 9251647, \"mean_amount\": 29.517144014829327, \"median_amount\": 15.0}, {\"CMTE_NM\": \"DNC SERVICES CORP./DEM. NAT'L COMMITTEE\", \"n_donations\": 280264, \"total_amount\": 70156788, \"mean_amount\": 250.32393743042275, \"median_amount\": 50.0}, {\"CMTE_NM\": \"BETO FOR TEXAS\", \"n_donations\": 280027, \"total_amount\": 44914966, \"mean_amount\": 160.39512618426082, \"median_amount\": 50.0}, {\"CMTE_NM\": \"NRSC\", \"n_donations\": 203124, \"total_amount\": 55384644, \"mean_amount\": 272.66420511608675, \"median_amount\": 50.0}, {\"CMTE_NM\": \"NRCC\", \"n_donations\": 178176, \"total_amount\": 38646560, \"mean_amount\": 216.90104166666666, \"median_amount\": 50.0}]}}, {\"mode\": \"vega-lite\"});\n</script>\n```\n:::\n:::\n\n\n### By Location\n\nWhere are the largest donations coming from?\n\n::: {#d50af21d .cell execution_count=28}\n``` {.python .cell-code}\nf2 = featured.mutate(loc=_.CITY + \", \" + _.STATE).drop(\"CITY\", \"STATE\")\nby_loc = summary_by(f2, \"loc\")\n# Drop the places with a small number of donations so we're\n# resistant to outliers for the mean\nby_loc = by_loc[_.n_donations > 1000]\nby_loc\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n<script type=\"application/vnd.jupyter.widget-view+json\">\n{\"model_id\":\"8435117ea8ec42f9889527c9c1c5f2e6\",\"version_major\":2,\"version_minor\":0,\"quarto_mimetype\":\"application/vnd.jupyter.widget-view+json\"}\n</script>\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=28}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> loc              </span>┃<span style=\"font-weight: bold\"> n_donations </span>┃<span style=\"font-weight: bold\"> total_amount </span>┃<span style=\"font-weight: bold\"> mean_amount </span>┃<span style=\"font-weight: bold\"> median_amount </span>┃\n┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>           │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>     │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>       │\n├──────────────────┼─────────────┼──────────────┼─────────────┼───────────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">BRYN MAWR, PA   </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6854</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2494882</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">364.003793</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">PITTSBURGH, PA  </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">74208</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">14358578</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">193.490971</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">42.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">GREENSBORO, NC  </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">15107</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4748771</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">314.342424</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">50.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">KEY WEST, FL    </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6150</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1239006</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">201.464390</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">YONKERS, NY     </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5327</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">436812</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">81.999625</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">DOWNINGTOWN, PA </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3781</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">388424</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">102.730495</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">JACKSONVILLE, FL</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35733</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7408221</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">207.321552</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">47.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">AUSTIN, TX      </span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">189865</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">33315922</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">175.471635</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">38.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">LITHONIA, GA    </span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1110</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">62765</span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">56.545045</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25.0</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">TUCSON, AZ      </span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">88808</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">12633841</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">142.260168</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">25.0</span> │\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>                │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │            <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │             <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │\n└──────────────────┴─────────────┴──────────────┴─────────────┴───────────────┘\n</pre>\n```\n:::\n:::\n\n\n::: {#dc9ed5b1 .cell execution_count=29}\n``` {.python .cell-code}\ndef top_by(col):\n    top = by_loc.order_by(ibis.desc(col)).head(10)\n    return (\n        alt.Chart(top.execute())\n        .mark_bar()\n        .encode(\n            x=alt.X('loc:O', sort=\"-y\"),\n            y=col,\n        )\n    )\n\n\ntop_by(\"n_donations\") | top_by(\"total_amount\") | top_by(\"mean_amount\") | top_by(\n    \"median_amount\"\n)\n```\n\n::: {.cell-output .cell-output-display execution_count=29}\n```{=html}\n\n<style>\n  #altair-viz-fc32ed46a40644338011db4bb4c7f154.vega-embed {\n    width: 100%;\n    display: flex;\n  }\n\n  #altair-viz-fc32ed46a40644338011db4bb4c7f154.vega-embed details,\n  #altair-viz-fc32ed46a40644338011db4bb4c7f154.vega-embed details summary {\n    position: relative;\n  }\n</style>\n<div id=\"altair-viz-fc32ed46a40644338011db4bb4c7f154\"></div>\n<script type=\"text/javascript\">\n  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n  (function(spec, embedOpt){\n    let outputDiv = document.currentScript.previousElementSibling;\n    if (outputDiv.id !== \"altair-viz-fc32ed46a40644338011db4bb4c7f154\") {\n      outputDiv = document.getElementById(\"altair-viz-fc32ed46a40644338011db4bb4c7f154\");\n    }\n\n    const paths = {\n      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.20.1?noext\",\n      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n    };\n\n    function maybeLoadScript(lib, version) {\n      var key = `${lib.replace(\"-\", \"\")}_version`;\n      return (VEGA_DEBUG[key] == version) ?\n        Promise.resolve(paths[lib]) :\n        new Promise(function(resolve, reject) {\n          var s = document.createElement('script');\n          document.getElementsByTagName(\"head\")[0].appendChild(s);\n          s.async = true;\n          s.onload = () => {\n            VEGA_DEBUG[key] = version;\n            return resolve(paths[lib]);\n          };\n          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n          s.src = paths[lib];\n        });\n    }\n\n    function showError(err) {\n      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n      throw err;\n    }\n\n    function displayChart(vegaEmbed) {\n      vegaEmbed(outputDiv, spec, embedOpt)\n        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n    }\n\n    if(typeof define === \"function\" && define.amd) {\n      requirejs.config({paths});\n      let deps = [\"vega-embed\"];\n      require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));\n    } else {\n      maybeLoadScript(\"vega\", \"5\")\n        .then(() => maybeLoadScript(\"vega-lite\", \"5.20.1\"))\n        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n        .catch(showError)\n        .then(() => displayChart(vegaEmbed));\n    }\n  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"hconcat\": [{\"data\": {\"name\": \"data-d5ce674e380a65ea1c42bae2e57542dc\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"x\": {\"field\": \"loc\", \"sort\": \"-y\", \"type\": \"ordinal\"}, \"y\": {\"field\": \"n_donations\", \"type\": \"quantitative\"}}}, {\"data\": {\"name\": \"data-718c91cb63f3d1d66e2d2fd1b2ffaf0b\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"x\": {\"field\": \"loc\", \"sort\": \"-y\", \"type\": \"ordinal\"}, \"y\": {\"field\": \"total_amount\", \"type\": \"quantitative\"}}}, {\"data\": {\"name\": \"data-80b3a2d25f15791cad9e16130ca4f60a\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"x\": {\"field\": \"loc\", \"sort\": \"-y\", \"type\": \"ordinal\"}, \"y\": {\"field\": \"mean_amount\", \"type\": \"quantitative\"}}}, {\"data\": {\"name\": \"data-329a0a513b4f90981272e83322174bdb\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"x\": {\"field\": \"loc\", \"sort\": \"-y\", \"type\": \"ordinal\"}, \"y\": {\"field\": \"median_amount\", \"type\": \"quantitative\"}}}], \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.20.1.json\", \"datasets\": {\"data-d5ce674e380a65ea1c42bae2e57542dc\": [{\"loc\": \"NEW YORK, NY\", \"n_donations\": 695091, \"total_amount\": 444600108, \"mean_amount\": 639.6286356750411, \"median_amount\": 50.0}, {\"loc\": \"WASHINGTON, DC\", \"n_donations\": 401498, \"total_amount\": 124456508, \"mean_amount\": 309.9803934266173, \"median_amount\": 57.0}, {\"loc\": \"HOUSTON, TX\", \"n_donations\": 251960, \"total_amount\": 83026989, \"mean_amount\": 329.5244840450865, \"median_amount\": 50.0}, {\"loc\": \"LOS ANGELES, CA\", \"n_donations\": 245263, \"total_amount\": 89881980, \"mean_amount\": 366.4718282007478, \"median_amount\": 49.0}, {\"loc\": \"SAN FRANCISCO, CA\", \"n_donations\": 238117, \"total_amount\": 189799961, \"mean_amount\": 797.086982449804, \"median_amount\": 50.0}, {\"loc\": \"PHILADELPHIA, PA\", \"n_donations\": 222938, \"total_amount\": 36054977, \"mean_amount\": 161.72647552234253, \"median_amount\": 62.0}, {\"loc\": \"CHICAGO, IL\", \"n_donations\": 212527, \"total_amount\": 108119674, \"mean_amount\": 508.7338267608351, \"median_amount\": 40.0}, {\"loc\": \"SEATTLE, WA\", \"n_donations\": 197671, \"total_amount\": 52867387, \"mean_amount\": 267.4514066302088, \"median_amount\": 36.0}, {\"loc\": \"AUSTIN, TX\", \"n_donations\": 189865, \"total_amount\": 33315922, \"mean_amount\": 175.4716351091565, \"median_amount\": 39.0}, {\"loc\": \"ARLINGTON, VA\", \"n_donations\": 163168, \"total_amount\": 23382868, \"mean_amount\": 143.30547656403218, \"median_amount\": 50.0}], \"data-718c91cb63f3d1d66e2d2fd1b2ffaf0b\": [{\"loc\": \"NEW YORK, NY\", \"n_donations\": 695091, \"total_amount\": 444600108, \"mean_amount\": 639.6286356750411, \"median_amount\": 50.0}, {\"loc\": \"SAN FRANCISCO, CA\", \"n_donations\": 238117, \"total_amount\": 189799961, \"mean_amount\": 797.086982449804, \"median_amount\": 50.0}, {\"loc\": \"LAS VEGAS, NV\", \"n_donations\": 65940, \"total_amount\": 153467387, \"mean_amount\": 2327.37923870185, \"median_amount\": 45.0}, {\"loc\": \"WASHINGTON, DC\", \"n_donations\": 401498, \"total_amount\": 124456508, \"mean_amount\": 309.9803934266173, \"median_amount\": 57.0}, {\"loc\": \"CHICAGO, IL\", \"n_donations\": 212527, \"total_amount\": 108119674, \"mean_amount\": 508.7338267608351, \"median_amount\": 40.0}, {\"loc\": \"LOS ANGELES, CA\", \"n_donations\": 245263, \"total_amount\": 89881980, \"mean_amount\": 366.4718282007478, \"median_amount\": 49.0}, {\"loc\": \"HOUSTON, TX\", \"n_donations\": 251960, \"total_amount\": 83026989, \"mean_amount\": 329.5244840450865, \"median_amount\": 50.0}, {\"loc\": \"DALLAS, TX\", \"n_donations\": 154038, \"total_amount\": 66558403, \"mean_amount\": 432.09080226956985, \"median_amount\": 57.0}, {\"loc\": \"SEATTLE, WA\", \"n_donations\": 197671, \"total_amount\": 52867387, \"mean_amount\": 267.4514066302088, \"median_amount\": 36.0}, {\"loc\": \"BOSTON, MA\", \"n_donations\": 82925, \"total_amount\": 47592049, \"mean_amount\": 573.9167802230932, \"median_amount\": 58.0}], \"data-80b3a2d25f15791cad9e16130ca4f60a\": [{\"loc\": \"LAKE FOREST, IL\", \"n_donations\": 5636, \"total_amount\": 37486362, \"mean_amount\": 6651.235273243435, \"median_amount\": 100.0}, {\"loc\": \"MOUNT VERNON, OH\", \"n_donations\": 1431, \"total_amount\": 5605857, \"mean_amount\": 3917.4402515723273, \"median_amount\": 46.0}, {\"loc\": \"LOS ALTOS HILLS, CA\", \"n_donations\": 4098, \"total_amount\": 10367629, \"mean_amount\": 2529.92410932162, \"median_amount\": 325.0}, {\"loc\": \"PALM BEACH, FL\", \"n_donations\": 7140, \"total_amount\": 17212425, \"mean_amount\": 2410.703781512605, \"median_amount\": 255.0}, {\"loc\": \"LAS VEGAS, NV\", \"n_donations\": 65940, \"total_amount\": 153467387, \"mean_amount\": 2327.37923870185, \"median_amount\": 45.0}, {\"loc\": \"RHINEBECK, NY\", \"n_donations\": 3014, \"total_amount\": 5942571, \"mean_amount\": 1971.6559389515594, \"median_amount\": 47.0}, {\"loc\": \"JOPLIN, MO\", \"n_donations\": 1839, \"total_amount\": 3617186, \"mean_amount\": 1966.9309407286569, \"median_amount\": 50.0}, {\"loc\": \"BALA CYNWYD, PA\", \"n_donations\": 3668, \"total_amount\": 6949933, \"mean_amount\": 1894.7472737186479, \"median_amount\": 100.0}, {\"loc\": \"CARMEL, IN\", \"n_donations\": 10932, \"total_amount\": 20383688, \"mean_amount\": 1864.5890962312478, \"median_amount\": 53.0}, {\"loc\": \"WAYLAND, MA\", \"n_donations\": 5283, \"total_amount\": 9704279, \"mean_amount\": 1836.8879424569373, \"median_amount\": 50.0}], \"data-329a0a513b4f90981272e83322174bdb\": [{\"loc\": \"GLADWYNE, PA\", \"n_donations\": 1727, \"total_amount\": 1333243, \"mean_amount\": 771.9994209612044, \"median_amount\": 337.0}, {\"loc\": \"LOS ALTOS HILLS, CA\", \"n_donations\": 4098, \"total_amount\": 10367629, \"mean_amount\": 2529.92410932162, \"median_amount\": 313.0}, {\"loc\": \"MC LEAN, VA\", \"n_donations\": 4692, \"total_amount\": 3656109, \"mean_amount\": 779.2218670076726, \"median_amount\": 309.0}, {\"loc\": \"PALM BEACH, FL\", \"n_donations\": 7140, \"total_amount\": 17212425, \"mean_amount\": 2410.703781512605, \"median_amount\": 256.0}, {\"loc\": \"DOVER, MA\", \"n_donations\": 1040, \"total_amount\": 976757, \"mean_amount\": 939.189423076923, \"median_amount\": 250.0}, {\"loc\": \"MISSION HILLS, KS\", \"n_donations\": 2258, \"total_amount\": 1642339, \"mean_amount\": 727.3423383525244, \"median_amount\": 250.0}, {\"loc\": \"PARADISE VALLEY, AZ\", \"n_donations\": 8197, \"total_amount\": 7035291, \"mean_amount\": 858.2763206050994, \"median_amount\": 250.0}, {\"loc\": \"ATHERTON, CA\", \"n_donations\": 8780, \"total_amount\": 11595391, \"mean_amount\": 1320.6595671981777, \"median_amount\": 250.0}, {\"loc\": \"KENILWORTH, IL\", \"n_donations\": 1500, \"total_amount\": 855723, \"mean_amount\": 570.482, \"median_amount\": 250.0}, {\"loc\": \"SHORT HILLS, NJ\", \"n_donations\": 3555, \"total_amount\": 3396742, \"mean_amount\": 955.4829817158931, \"median_amount\": 248.0}]}}, {\"mode\": \"vega-lite\"});\n</script>\n```\n:::\n:::\n\n\n### By month\n\nWhen do the donations come in?\n\n::: {#0e69beb3 .cell execution_count=30}\n``` {.python .cell-code}\nby_month = summary_by(featured, _.date.month().name(\"month_int\"))\n# Sorta hacky, .substritute doesn't work to change dtypes (yet?)\n# so we cast to string and then do our mapping\nmonth_map = {\n    \"1\": \"Jan\",\n    \"2\": \"Feb\",\n    \"3\": \"Mar\",\n    \"4\": \"Apr\",\n    \"5\": \"May\",\n    \"6\": \"Jun\",\n    \"7\": \"Jul\",\n    \"8\": \"Aug\",\n    \"9\": \"Sep\",\n    \"10\": \"Oct\",\n    \"11\": \"Nov\",\n    \"12\": \"Dec\",\n}\nby_month = by_month.mutate(month_str=_.month_int.cast(str).substitute(month_map))\nby_month\n```\n\n::: {.cell-output .cell-output-display execution_count=30}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> month_int </span>┃<span style=\"font-weight: bold\"> n_donations </span>┃<span style=\"font-weight: bold\"> total_amount </span>┃<span style=\"font-weight: bold\"> mean_amount </span>┃<span style=\"font-weight: bold\"> median_amount </span>┃<span style=\"font-weight: bold\"> month_str </span>┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int32</span>     │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span>        │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>     │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">float64</span>       │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │\n├───────────┼─────────────┼──────────────┼─────────────┼───────────────┼───────────┤\n│      <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span> │        <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1514</span> │       <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">250297</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">165.321664</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100.0</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">NULL</span>      │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">348979</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">174837854</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">500.998209</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">122.0</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Jan      </span> │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span> │      <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">581646</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">255997655</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">440.126219</span> │         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100.0</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Feb      </span> │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1042577</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">430906797</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">413.309326</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">80.0</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Mar      </span> │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1088244</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">299252692</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">274.986760</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">50.0</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Apr      </span> │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1374247</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">387317192</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">281.839576</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">48.0</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">May      </span> │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1667285</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">465305247</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">279.079610</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">44.0</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Jun      </span> │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1607053</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">320528605</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">199.451172</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35.0</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Jul      </span> │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2023466</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">473544182</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">234.026261</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">35.0</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Aug      </span> │\n│         <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">9</span> │     <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2583847</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">697888624</span> │  <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">270.096729</span> │          <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">38.0</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Sep      </span> │\n│         <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │            <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │           <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │             <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">…</span>         │\n└───────────┴─────────────┴──────────────┴─────────────┴───────────────┴───────────┘\n</pre>\n```\n:::\n:::\n\n\n::: {#c7afc164 .cell execution_count=31}\n``` {.python .cell-code}\nmonths_in_order = list(month_map.values())\nalt.Chart(by_month.execute()).mark_bar().encode(\n    x=alt.X(\"month_str:O\", sort=months_in_order),\n    y=\"n_donations:Q\",\n)\n```\n\n::: {.cell-output .cell-output-display execution_count=31}\n```{=html}\n\n<style>\n  #altair-viz-c065f2367afa4c4a9d4bfd8dc055885b.vega-embed {\n    width: 100%;\n    display: flex;\n  }\n\n  #altair-viz-c065f2367afa4c4a9d4bfd8dc055885b.vega-embed details,\n  #altair-viz-c065f2367afa4c4a9d4bfd8dc055885b.vega-embed details summary {\n    position: relative;\n  }\n</style>\n<div id=\"altair-viz-c065f2367afa4c4a9d4bfd8dc055885b\"></div>\n<script type=\"text/javascript\">\n  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n  (function(spec, embedOpt){\n    let outputDiv = document.currentScript.previousElementSibling;\n    if (outputDiv.id !== \"altair-viz-c065f2367afa4c4a9d4bfd8dc055885b\") {\n      outputDiv = document.getElementById(\"altair-viz-c065f2367afa4c4a9d4bfd8dc055885b\");\n    }\n\n    const paths = {\n      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.20.1?noext\",\n      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n    };\n\n    function maybeLoadScript(lib, version) {\n      var key = `${lib.replace(\"-\", \"\")}_version`;\n      return (VEGA_DEBUG[key] == version) ?\n        Promise.resolve(paths[lib]) :\n        new Promise(function(resolve, reject) {\n          var s = document.createElement('script');\n          document.getElementsByTagName(\"head\")[0].appendChild(s);\n          s.async = true;\n          s.onload = () => {\n            VEGA_DEBUG[key] = version;\n            return resolve(paths[lib]);\n          };\n          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n          s.src = paths[lib];\n        });\n    }\n\n    function showError(err) {\n      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n      throw err;\n    }\n\n    function displayChart(vegaEmbed) {\n      vegaEmbed(outputDiv, spec, embedOpt)\n        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n    }\n\n    if(typeof define === \"function\" && define.amd) {\n      requirejs.config({paths});\n      let deps = [\"vega-embed\"];\n      require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));\n    } else {\n      maybeLoadScript(\"vega\", \"5\")\n        .then(() => maybeLoadScript(\"vega-lite\", \"5.20.1\"))\n        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n        .catch(showError)\n        .then(() => displayChart(vegaEmbed));\n    }\n  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-17841cde8fd018f6a954527b38b6c974\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"x\": {\"field\": \"month_str\", \"sort\": [\"Jan\", \"Feb\", \"Mar\", \"Apr\", \"May\", \"Jun\", \"Jul\", \"Aug\", \"Sep\", \"Oct\", \"Nov\", \"Dec\"], \"type\": \"ordinal\"}, \"y\": {\"field\": \"n_donations\", \"type\": \"quantitative\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.20.1.json\", \"datasets\": {\"data-17841cde8fd018f6a954527b38b6c974\": [{\"month_int\": null, \"n_donations\": 1514, \"total_amount\": 250297, \"mean_amount\": 165.3216644649934, \"median_amount\": 100.0, \"month_str\": null}, {\"month_int\": 1.0, \"n_donations\": 348979, \"total_amount\": 174837854, \"mean_amount\": 500.9982090612902, \"median_amount\": 124.0, \"month_str\": \"Jan\"}, {\"month_int\": 2.0, \"n_donations\": 581646, \"total_amount\": 255997655, \"mean_amount\": 440.126219384299, \"median_amount\": 100.0, \"month_str\": \"Feb\"}, {\"month_int\": 3.0, \"n_donations\": 1042577, \"total_amount\": 430906797, \"mean_amount\": 413.3093258339672, \"median_amount\": 80.0, \"month_str\": \"Mar\"}, {\"month_int\": 4.0, \"n_donations\": 1088244, \"total_amount\": 299252692, \"mean_amount\": 274.98676032213365, \"median_amount\": 50.0, \"month_str\": \"Apr\"}, {\"month_int\": 5.0, \"n_donations\": 1374247, \"total_amount\": 387317192, \"mean_amount\": 281.83957614606396, \"median_amount\": 48.0, \"month_str\": \"May\"}, {\"month_int\": 6.0, \"n_donations\": 1667285, \"total_amount\": 465305247, \"mean_amount\": 279.07960966481437, \"median_amount\": 44.0, \"month_str\": \"Jun\"}, {\"month_int\": 7.0, \"n_donations\": 1607053, \"total_amount\": 320528605, \"mean_amount\": 199.45117242555162, \"median_amount\": 35.0, \"month_str\": \"Jul\"}, {\"month_int\": 8.0, \"n_donations\": 2023466, \"total_amount\": 473544182, \"mean_amount\": 234.02626088108227, \"median_amount\": 35.0, \"month_str\": \"Aug\"}, {\"month_int\": 9.0, \"n_donations\": 2583847, \"total_amount\": 697888624, \"mean_amount\": 270.0967294116099, \"median_amount\": 38.0, \"month_str\": \"Sep\"}, {\"month_int\": 10.0, \"n_donations\": 3686024, \"total_amount\": 850820707, \"mean_amount\": 230.82343115508743, \"median_amount\": 29.0, \"month_str\": \"Oct\"}, {\"month_int\": 11.0, \"n_donations\": 2545616, \"total_amount\": 285143995, \"mean_amount\": 112.01375030640914, \"median_amount\": 25.0, \"month_str\": \"Nov\"}, {\"month_int\": 12.0, \"n_donations\": 2119311, \"total_amount\": 283081648, \"mean_amount\": 133.57249030463203, \"median_amount\": 25.0, \"month_str\": \"Dec\"}]}}, {\"mode\": \"vega-lite\"});\n</script>\n```\n:::\n:::\n\n\n## Conclusion\n\nThanks for following along! I hope you've learned something about Ibis, and\nmaybe even about campaign finance.\n\nIbis is a great tool for exploring data. I now find myself reaching for it\nwhen in the past I would have reached for pandas.\n\nSome of the highlights for me:\n\n- Fast, lazy execution, a great display format, and good type hinting/editor support for a great REPL experience.\n- Very well thought-out API and semantics (e.g. `isinstance(val, NumericValue)`?? That's beautiful!)\n- Fast and fairly complete string support, since I work with a lot of text data.\n- Extremely responsive maintainers. Sometimes I've submitted multiple feature requests and bug reports in a single day, and a PR has been merged by the next day.\n- Escape hatch to SQL. I didn't have to use that here, but if something isn't supported, you can always fall back to SQL.\n\nCheck out [The Ibis Website](https://ibis-project.org/) for more information.\n\n",
     "supporting": [
       "index_files"
     ],
     "filters": [],
     "includes": {
       "include-in-header": [
-        "<script src=\"https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js\" integrity=\"sha512-c3Nl8+7g4LMSTdrm621y7kf9v3SDPnhxLNhcjFJbKECVnmZHTdo+IRO05sNLTH/D3vA6u1X32ehoLC7WFVdheg==\" crossorigin=\"anonymous\"></script>\n<script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.5.1/jquery.min.js\" integrity=\"sha512-bLT0Qm9VnAYZDflyKcBaQ2gg0hSYNQrJ8RilYldYQ1FxQYoCLtUjuuRuZo+fjqhx/qtq/1itJ0C2ejDxltZVFg==\" crossorigin=\"anonymous\" data-relocate-top=\"true\"></script>\n<script type=\"application/javascript\">define('jquery', [],function() {return window.jQuery;})</script>\n"
+        "<script src=\"https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js\" integrity=\"sha512-c3Nl8+7g4LMSTdrm621y7kf9v3SDPnhxLNhcjFJbKECVnmZHTdo+IRO05sNLTH/D3vA6u1X32ehoLC7WFVdheg==\" crossorigin=\"anonymous\"></script>\n<script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.5.1/jquery.min.js\" integrity=\"sha512-bLT0Qm9VnAYZDflyKcBaQ2gg0hSYNQrJ8RilYldYQ1FxQYoCLtUjuuRuZo+fjqhx/qtq/1itJ0C2ejDxltZVFg==\" crossorigin=\"anonymous\" data-relocate-top=\"true\"></script>\n<script type=\"application/javascript\">define('jquery', [],function() {return window.jQuery;})</script>\n<script src=\"https://unpkg.com/@jupyter-widgets/html-manager@*/dist/embed-amd.js\" crossorigin=\"anonymous\"></script>\n"
+      ],
+      "include-after-body": [
+        "<script type=application/vnd.jupyter.widget-state+json>\n{\"state\":{\"0389db2e49ae4f7891db531b7a1d8822\":{\"model_module\":\"@jupyter-widgets/controls\",\"model_module_version\":\"2.0.0\",\"model_name\":\"ProgressStyleModel\",\"state\":{\"_model_module\":\"@jupyter-widgets/controls\",\"_model_module_version\":\"2.0.0\",\"_model_name\":\"ProgressStyleModel\",\"_view_count\":null,\"_view_module\":\"@jupyter-widgets/base\",\"_view_module_version\":\"2.0.0\",\"_view_name\":\"StyleView\",\"bar_color\":\"black\",\"description_width\":\"\"}},\"173d1703846746568e547f6f95747097\":{\"model_module\":\"@jupyter-widgets/base\",\"model_module_version\":\"2.0.0\",\"model_name\":\"LayoutModel\",\"state\":{\"_model_module\":\"@jupyter-widgets/base\",\"_model_module_version\":\"2.0.0\",\"_model_name\":\"LayoutModel\",\"_view_count\":null,\"_view_module\":\"@jupyter-widgets/base\",\"_view_module_version\":\"2.0.0\",\"_view_name\":\"LayoutView\",\"align_content\":null,\"align_items\":null,\"align_self\":null,\"border_bottom\":null,\"border_left\":null,\"border_right\":null,\"border_top\":null,\"bottom\":null,\"display\":null,\"flex\":null,\"flex_flow\":null,\"grid_area\":null,\"grid_auto_columns\":null,\"grid_auto_flow\":null,\"grid_auto_rows\":null,\"grid_column\":null,\"grid_gap\":null,\"grid_row\":null,\"grid_template_areas\":null,\"grid_template_columns\":null,\"grid_template_rows\":null,\"height\":null,\"justify_content\":null,\"justify_items\":null,\"left\":null,\"margin\":null,\"max_height\":null,\"max_width\":null,\"min_height\":null,\"min_width\":null,\"object_fit\":null,\"object_position\":null,\"order\":null,\"overflow\":null,\"padding\":null,\"right\":null,\"top\":null,\"visibility\":null,\"width\":\"auto\"}},\"25fdb26f53744bfdb6c6fad84a62bdc6\":{\"model_module\":\"@jupyter-widgets/controls\",\"model_module_version\":\"2.0.0\",\"model_name\":\"FloatProgressModel\",\"state\":{\"_dom_classes\":[],\"_model_module\":\"@jupyter-widgets/controls\",\"_model_module_version\":\"2.0.0\",\"_model_name\":\"FloatProgressModel\",\"_view_count\":null,\"_view_module\":\"@jupyter-widgets/controls\",\"_view_module_version\":\"2.0.0\",\"_view_name\":\"ProgressView\",\"bar_style\":\"\",\"description\":\"\",\"description_allow_html\":false,\"layout\":\"IPY_MODEL_93ac02e0821a436bbcdf5d1ac03992b5\",\"max\":100,\"min\":0,\"orientation\":\"horizontal\",\"style\":\"IPY_MODEL_d727cb1ab6b3463187431a147febc1e8\",\"tabbable\":null,\"tooltip\":null,\"value\":100}},\"27a3b4410dbf49959cc31fc5b5129cb1\":{\"model_module\":\"@jupyter-widgets/base\",\"model_module_version\":\"2.0.0\",\"model_name\":\"LayoutModel\",\"state\":{\"_model_module\":\"@jupyter-widgets/base\",\"_model_module_version\":\"2.0.0\",\"_model_name\":\"LayoutModel\",\"_view_count\":null,\"_view_module\":\"@jupyter-widgets/base\",\"_view_module_version\":\"2.0.0\",\"_view_name\":\"LayoutView\",\"align_content\":null,\"align_items\":null,\"align_self\":null,\"border_bottom\":null,\"border_left\":null,\"border_right\":null,\"border_top\":null,\"bottom\":null,\"display\":null,\"flex\":null,\"flex_flow\":null,\"grid_area\":null,\"grid_auto_columns\":null,\"grid_auto_flow\":null,\"grid_auto_rows\":null,\"grid_column\":null,\"grid_gap\":null,\"grid_row\":null,\"grid_template_areas\":null,\"grid_template_columns\":null,\"grid_template_rows\":null,\"height\":null,\"justify_content\":null,\"justify_items\":null,\"left\":null,\"margin\":null,\"max_height\":null,\"max_width\":null,\"min_height\":null,\"min_width\":null,\"object_fit\":null,\"object_position\":null,\"order\":null,\"overflow\":null,\"padding\":null,\"right\":null,\"top\":null,\"visibility\":null,\"width\":\"auto\"}},\"8435117ea8ec42f9889527c9c1c5f2e6\":{\"model_module\":\"@jupyter-widgets/controls\",\"model_module_version\":\"2.0.0\",\"model_name\":\"FloatProgressModel\",\"state\":{\"_dom_classes\":[],\"_model_module\":\"@jupyter-widgets/controls\",\"_model_module_version\":\"2.0.0\",\"_model_name\":\"FloatProgressModel\",\"_view_count\":null,\"_view_module\":\"@jupyter-widgets/controls\",\"_view_module_version\":\"2.0.0\",\"_view_name\":\"ProgressView\",\"bar_style\":\"\",\"description\":\"\",\"description_allow_html\":false,\"layout\":\"IPY_MODEL_27a3b4410dbf49959cc31fc5b5129cb1\",\"max\":100,\"min\":0,\"orientation\":\"horizontal\",\"style\":\"IPY_MODEL_9014329d5fe74117b5976e74aa1b4501\",\"tabbable\":null,\"tooltip\":null,\"value\":100}},\"9014329d5fe74117b5976e74aa1b4501\":{\"model_module\":\"@jupyter-widgets/controls\",\"model_module_version\":\"2.0.0\",\"model_name\":\"ProgressStyleModel\",\"state\":{\"_model_module\":\"@jupyter-widgets/controls\",\"_model_module_version\":\"2.0.0\",\"_model_name\":\"ProgressStyleModel\",\"_view_count\":null,\"_view_module\":\"@jupyter-widgets/base\",\"_view_module_version\":\"2.0.0\",\"_view_name\":\"StyleView\",\"bar_color\":\"black\",\"description_width\":\"\"}},\"93ac02e0821a436bbcdf5d1ac03992b5\":{\"model_module\":\"@jupyter-widgets/base\",\"model_module_version\":\"2.0.0\",\"model_name\":\"LayoutModel\",\"state\":{\"_model_module\":\"@jupyter-widgets/base\",\"_model_module_version\":\"2.0.0\",\"_model_name\":\"LayoutModel\",\"_view_count\":null,\"_view_module\":\"@jupyter-widgets/base\",\"_view_module_version\":\"2.0.0\",\"_view_name\":\"LayoutView\",\"align_content\":null,\"align_items\":null,\"align_self\":null,\"border_bottom\":null,\"border_left\":null,\"border_right\":null,\"border_top\":null,\"bottom\":null,\"display\":null,\"flex\":null,\"flex_flow\":null,\"grid_area\":null,\"grid_auto_columns\":null,\"grid_auto_flow\":null,\"grid_auto_rows\":null,\"grid_column\":null,\"grid_gap\":null,\"grid_row\":null,\"grid_template_areas\":null,\"grid_template_columns\":null,\"grid_template_rows\":null,\"height\":null,\"justify_content\":null,\"justify_items\":null,\"left\":null,\"margin\":null,\"max_height\":null,\"max_width\":null,\"min_height\":null,\"min_width\":null,\"object_fit\":null,\"object_position\":null,\"order\":null,\"overflow\":null,\"padding\":null,\"right\":null,\"top\":null,\"visibility\":null,\"width\":\"auto\"}},\"c555021fe66b479dbde9787832c212b9\":{\"model_module\":\"@jupyter-widgets/controls\",\"model_module_version\":\"2.0.0\",\"model_name\":\"FloatProgressModel\",\"state\":{\"_dom_classes\":[],\"_model_module\":\"@jupyter-widgets/controls\",\"_model_module_version\":\"2.0.0\",\"_model_name\":\"FloatProgressModel\",\"_view_count\":null,\"_view_module\":\"@jupyter-widgets/controls\",\"_view_module_version\":\"2.0.0\",\"_view_name\":\"ProgressView\",\"bar_style\":\"\",\"description\":\"\",\"description_allow_html\":false,\"layout\":\"IPY_MODEL_173d1703846746568e547f6f95747097\",\"max\":100,\"min\":0,\"orientation\":\"horizontal\",\"style\":\"IPY_MODEL_0389db2e49ae4f7891db531b7a1d8822\",\"tabbable\":null,\"tooltip\":null,\"value\":100}},\"d727cb1ab6b3463187431a147febc1e8\":{\"model_module\":\"@jupyter-widgets/controls\",\"model_module_version\":\"2.0.0\",\"model_name\":\"ProgressStyleModel\",\"state\":{\"_model_module\":\"@jupyter-widgets/controls\",\"_model_module_version\":\"2.0.0\",\"_model_name\":\"ProgressStyleModel\",\"_view_count\":null,\"_view_module\":\"@jupyter-widgets/base\",\"_view_module_version\":\"2.0.0\",\"_view_name\":\"StyleView\",\"bar_color\":\"black\",\"description_width\":\"\"}}},\"version_major\":2,\"version_minor\":0}\n</script>\n"
       ]
     }
   }
diff --git a/docs/posts/campaign-finance/index.qmd b/docs/posts/campaign-finance/index.qmd
index 7a623f93cc5e..32251103bfcd 100644
--- a/docs/posts/campaign-finance/index.qmd
+++ b/docs/posts/campaign-finance/index.qmd
@@ -214,7 +214,7 @@ from ibis.expr.types import StringValue, DateValue
 
 
 def mmddyyyy_to_date(val: StringValue) -> DateValue:
-    return val.cast(str).lpad(8, "0").to_timestamp("%m%d%Y").date()
+    return val.cast(str).lpad(8, "0").nullif("").to_timestamp("%m%d%Y").date()
 
 
 cleaned = cleaned.mutate(date=mmddyyyy_to_date(_.TRANSACTION_DT)).drop("TRANSACTION_DT")