From a7b291422b21ba280149d0809f0aa5074d372867 Mon Sep 17 00:00:00 2001 From: n-shenoy <51956619+n-shenoy@users.noreply.github.com> Date: Tue, 19 Apr 2022 19:27:46 +0530 Subject: [PATCH] Updated comments --- ...quencing Cycles and Bases-checkpoint.ipynb | 503 ++++++++++++++++++ seq_processing.py | 4 + 2 files changed, 507 insertions(+) create mode 100644 .ipynb_checkpoints/Identifying Poor Quality Sequencing Cycles and Bases-checkpoint.ipynb diff --git a/.ipynb_checkpoints/Identifying Poor Quality Sequencing Cycles and Bases-checkpoint.ipynb b/.ipynb_checkpoints/Identifying Poor Quality Sequencing Cycles and Bases-checkpoint.ipynb new file mode 100644 index 0000000..5df8015 --- /dev/null +++ b/.ipynb_checkpoints/Identifying Poor Quality Sequencing Cycles and Bases-checkpoint.ipynb @@ -0,0 +1,503 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e26d48f0-53af-4b2b-9142-d16c75359dba", + "metadata": {}, + "source": [ + "

Identifying Poor Quality Sequencing Cycles and Corresponding Bases from DNA Sequencing Reads

\n", + "

Navami Shenoy

\n", + "

Dec 27, 2021

" + ] + }, + { + "cell_type": "markdown", + "id": "5ad65013-b27b-4e38-8c79-687033ff970a", + "metadata": {}, + "source": [ + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "1d7cd926-a2bd-4b8f-9d47-6d8cbff9ec17", + "metadata": {}, + "source": [ + "### Objective\n", + "This Python project focuses on identifying poor quality sequencing cycles and deducing the corresponding unidentified bases (i.e. bases reported as 'N' during the sequencer reads). The following raw sequence data has been sourced from Ajay _et al_ (2011) and contains the first 1000 reads from the whole-genome sequence derived from a human male individual. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6670a3ec-f15a-4523-ad56-5206d7aa726d", + "metadata": {}, + "outputs": [], + "source": [ + "# contains functions used in this notebook\n", + "from seq_processing import *" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "43135b7c-c4bc-428b-bf19-1453265af3a6", + "metadata": {}, + "outputs": [], + "source": [ + "import collections\n", + "import urllib\n", + "\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "id": "b7d3d539-c0e8-494f-9a25-db0bbba8861e", + "metadata": {}, + "source": [ + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "6b1843a6-12b0-4707-abbb-d6a9930bffa7", + "metadata": {}, + "source": [ + "### Methods\n", + "The FASTQ file is parsed using functions obtained from the seq_processing module found in this repository. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "edec8009-c0bf-404e-8a89-643b19b8090e", + "metadata": {}, + "outputs": [], + "source": [ + "# retrieving raw sequence data in FASTQ file format\n", + "urllib.request.urlretrieve(\"https://d28rh4a8wq0iu5.cloudfront.net/ads1/data/ERR037900_1.first1000.fastq\", \"ERR037900_1.first1000.fastq\")\n", + "\n", + "# parsing the FASTQ file and storing the sequences and phred base qualities in two separate lists:\n", + "seqs , base_quals = readFASTQ(\"ERR037900_1.first1000.fastq\")" + ] + }, + { + "cell_type": "markdown", + "id": "b968cb97-95fe-4e64-b6f3-b16ceac84709", + "metadata": {}, + "source": [ + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "7ae0a4cb-01e0-44d0-9eee-7c98ac5f1917", + "metadata": {}, + "source": [ + "The list 'seqs' contains the sequencings reads with the corresponding base qualities stored in the 'base_quals' list. Let's examine whether the file has been parsed accurately:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3398e210-2fd6-416c-a21f-d5e9f7859561", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(['TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCNAACCCTAACCCTAACCCTAACCCTAACCCTAAC',\n", + " 'TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCNAACCCTAACCCTAACCCTAACCCTNACCCTAAC'],\n", + " ['HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGFHHHFHFFHHHHHGHHFHEH@4#55554455HGFBF<@C>7EEF@FBEDDD<=C@54455C/7=CGHEGEB;C############'])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "seqs[:2] , base_quals[:2]" + ] + }, + { + "cell_type": "markdown", + "id": "a2f0786f-a17c-4f71-b85c-0bc4ad08f95e", + "metadata": {}, + "source": [ + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "786d08cb-98d7-4021-977f-48e6d581ad20", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1000, 100)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(seqs) , len(seqs[0])" + ] + }, + { + "cell_type": "markdown", + "id": "854e45b1-0ffa-4147-b6ec-3b21141fc9fc", + "metadata": {}, + "source": [ + "Indeed, this dataset contains a total of 1000 reads with each read being 100 bases long." + ] + }, + { + "cell_type": "markdown", + "id": "9438cdb3-4994-4c40-b0d0-b407273075bd", + "metadata": {}, + "source": [ + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "85b1def8-00db-4f81-9874-0f757c944d4f", + "metadata": {}, + "source": [ + "It is worth checking the base composition of the sequencing reads as it will give us a rough indication of the ratios of A:T and G:C and the proportion of no-confidence bases or N's in the data:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "af111661-7b86-44dc-96a1-c91d00bdfd29", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Counter({'T': 22476, 'A': 24057, 'C': 29665, 'N': 914, 'G': 22888})" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "count = collections.Counter()\n", + "for seq in seqs:\n", + " count.update(seq)\n", + "count" + ] + }, + { + "cell_type": "markdown", + "id": "3912d205-cec1-4a81-96a4-c1e061a8ea1f", + "metadata": {}, + "source": [ + "We can see that the number of N's in the data is quite high. Additionally, while the total number of A's and T's are close to each other, the number of C's is significantly higher than the number of G's. Both of these observations suggest the presence of poor quality sequencing cycles likely to have resulted from some low confidence reads of G's. \n", + "\n", + "We can locate the sequencing cycles that contain the poor quality reads by plotting the base quality scores against the sequencing cycles:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "876baf12-f25e-4336-ac78-8d22e27bdebc", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAEWCAYAAACnlKo3AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAABBIUlEQVR4nO3dd3hc1bX38e9PI42ai9xxxYCNiTFgY4NNCT1gWoAXEiC0EBJCQgJcklBSLgmBe0NyExIIJRA6BHAogRCa6b3YYNwo7rhhy5blIslWW+8fZ48Zy5I8ljUaS7M+zzOPZvZp+8wZzZpdzt4yM5xzzrmWyMl0BpxzzrVfHkScc861mAcR55xzLeZBxDnnXIt5EHHOOddiHkScc861mAcRl1GSXpH03fD8DEnPZzpPbvvUnj4fyZ/rjs6DyHZO0nxJVZLWSVol6T+SBmY6X+lgZg+Y2ZGJ15JM0pBM5sltPxp+Ptz2wYNI+3C8mXUC+gLLgBsznJ+sJyk303loSBH/n3Ztyj9w7YiZrQceAYYn0iQdK+lDSWskLZT066RlBZLul7RSUrmk9yX1Ccu6SrpD0lJJiyVdIynW2HElFUq6O5SEZkr6maRFScs3KTGEda8Jz7tJekpSadj+KUkDmjjOtyW9EZ6/FpI/CqWwUyVNl3R80vp5klZIGtXIvnqGY5VLKpP0euILVtJASY+FPK2U9NeQniPpl5IWSFou6V5JXcOyweE8z5P0OfBSSP+OpI/DuT0naceQLknXh/2skTRN0ogmzvsVSf8r6b2w7hOSuictHyfprXAuH0k6pMG210p6E6gEdm5k/5eHa7xW0qeSDk863yskzQnvw4QGxz0rvBcrJf0ilIqPaHiNw+tDGnwm+kl6NLzH8yRdlLTs1+FY94Y8zZA0Jml5U9dn4+cjvDZJF0iaFd6bmyQpLItJ+mP4fMyT9KOwfqPBv7FjSoqHz84eSev1llQpqVd4fYKkKeG6zZE0von9N/o56Qg8iLQjkoqAU4F3kpIrgLOBEuBY4AeSTgzLzgG6AgOBHsAFQFVYdjdQCwwBRgFHAk3V4V4F7BIeR4X9pioHuAvYERgUjv/XLW1kZgeFp3uZWSczexi4FzgzabVjgKVm9mEju/gJsAjoBfQBfg6YokD5FLAAGAz0Bx4K23w7PA4l+jLu1EheDwa+Ahwl6YSw3/8XjvM68GBY70jgIGBXomvwTWBlM6d8NvAdotJmLXADgKT+wH+Aa4DuwE+BRxNfYsFZwPlA53BeG0kaBvwI2MfMOhNdv/lh8Y+BE8M59QNWATeF7YYDt4R99yP6/DQa/BtSFKz/DXxE9P4eDlwi6aik1b5O9L6XAE8S3uctXJ/GHAfsA+xJ9B4njvE94GhgJLB3OM+m8tvoMc2sOhw7+TN3OvCimZVK2pfoM/mzcB4H8eV7m7z/5j4n7Z+Z+WM7fhB9KNcB5UANsATYo5n1/wxcH55/B3gL2LPBOn2ADUBhUtrpwMtN7HMuMD7p9fnAoqTXBgxJen03cE0T+xoJrEp6/Qrw3fD828Abzey3H7AW6BJePwJc1sRxrgaeSN4+pO8HlAK5jWzzIvDDpNfDwnueS/TlYsDOScufAc5Lep1DVBrYETgM+AwYB+Rs4Rq/Avwu6fVwoBqIAZcD9zVY/zngnKRtr25m30OA5cARQF6DZR8Dhye97pt0vv9N9EWaWFYc8nREY9cYOCTxmQDGAp83ONaVwF3h+a+BFxqcb1UK16exz8eBSa8nAFeE5y8B309adkRYv7H9NnfMscDngMLrScA3w/O/Ef7Xmrimic91k5+T5j4X7eXhJZH24UQzKwEKiH5VvippBwBJYyW9HIrhq4lKGz3DdvcRfeE8JGmJpN9LyiP6kssDloZqgHKif4jeTRy/H7Aw6fWCJtbbjKQiSX8L1SJrgNeAEjVRddYcM1sCvAmcLKmE6JfmA02s/gdgNvC8pLmSrgjpA4EFZlbbyDb92PTcFhB9ofZJSkt+H3YE/pL0HpYBAvqb2UtEv65vApZLuk1Sl2ZOr+H7m0d0HXcEvpE4RjjOgURf+I1tuwkzmw1cQvTFvVzSQ5L6JeX/8aT9fgzUhfPd5JqbWQXNl6SS7Qj0a5Dnn7Pp+/hF0vNKoCBUNTV3fRrTcD+dwvOGn9km36Pmjmlm74b9HiJpN6Kg/GTSdnNSyGOTn5MUtt3ueRBpR8yszsweI/pHPzAk/4PoQz3QzLoCtxJ9QDGzGjP7jZkNB/YnKvqfTfQPtQHoaWYl4dHFzHZv4tBLif5hEgY1WF4JFCW93iHp+U+IftGPNbMuREV+EnlsgXuIqhe+AbxtZosbW8nM1prZT8xsZ6Kqk0tDW8BCYFATdeNLiP7hEwYRVS0tS9510vOFRL92S5IehWb2VsjDDWY2muiX9q5E1R5Nafj+1gArwjHua3CMYjP7XRN52oyZ/cPMDgznZsB1Sfk/usG+C8J7usk1D1WpPZJ2W0HT13whMK/Bfjub2THN5TNp26auz9ZYyqbVb831aNzSMROfubOARyxqm0xst0sKeWn2c9LeeRBpRxQ5AehG9KsRonrwMjNbH+pov5W0/qGS9gi/+tcQfTHVm9lS4Hngj5K6KGpg3UXSwU0cegJwpaJG8gFEdenJpgDfCo2Z44nq2BM6E7WDlCtqtL1qK055GZs3FP+LqI77YqL66EZJOk7SkNDQupoo8NYD7xF9wfxOUrGizgcHhM0eBP5L0k6SOgH/AzzczK/iW4nel93DMbtK+kZ4vk8oJeYRfeGuD8dvypmShocv66uJvqzqgPuB4yUdFd7fAkWN2Km2TwyTdJik/JCHqqR83Apcqy87A/QKny+IqgqPk3SgpHjIU/L3xRTgGEndQ6n4kqRl7wFrFTXoF4Z8j5C0TwpZbu76bI0JwMWS+odS6+XbcMz7gZOIAknyZ+4O4FxJh4f/of6htNJQk5+TjsCDSPvwb0nriALBtUT14TPCsh8CV0taS1SPPSFpux2IvgzWEAWdV4mquCAqkcSBmUQNqo+waRVJst8QVbHMIwo+9zVYfjFwPFG7zRlEX/QJfwYKiX5VvwM8m9IZR34N3BOqAb4JYGZVwKPATsBjzWw7FHiBqD3pbeBmM3s5fDEfT1Qt8TlR4/upYZs7w7m9Fs51PZsHzI3M7HGiX/UPhaq66URVbABdgNuJ3tsFRFVBf2gmv/cRtTN8QVRteVE4xkIg0TBbSvSr9mek/r+bD/yO6P3/gqjK8sqw7C9Epdjnw+fnHaI2AMLn60Kiku7ScB6LkvZ7H1HD+Xyiz8TDiQXhPT6OqP1rXjj234k6GDRrC9dna9we8jUV+BB4mqhUWbe1xwzX4AOiUtzrSenvAecC1xP9UHmVTUuyifWa+5y0e4nGIudSpqiL6f1mltKv4TQc/7+BXc3szC2u3A5IeoXo/fx7pvPSHEnziRqLX8h0XraWpKOBW82sRV1rJd0JLDGzX7Zuztq/7e6GKeeaE6rEziOqn3auUZIKibpqP0/UoH8V8HgL9zWYqHvuZvcjOa/Ocu2IpO8RVec8Y2avbWl9l9VEVA27iqg662Oi6t6t24n0W6Lqpz+Y2bxWzWEHkfbqrNCoOwlYbGbHSdqJ6AaeHsBk4Cwzqw4Nf/cCo4nqj081s/lhH1cS/fqsAy4ys+dC+niiet0Y8PcGPVacc86lWVuURC7my55EEDUwXW9mQ4h+JZwX0s8jugltCFFD1XWw8c7Z04DdgfHAzaG3R4yoD/7RRF0oTw/rOuecayNpbRMJ3RCPJepRdGnobnkYX3ZDvYeoB84tRD1Qfh3SHwH+GtY/gejO2Q3APEmzgX3DerPNbG441kNh3ZnN5alnz542ePDg1jg955zLGpMnT15hZr0apqe7Yf3PwGVE9wpAVIVVntTvfhFf3rXZn3BXqZnVKrr7ukdITx4rKnmbhQ3SxzaWCUnnEw3VwaBBg5g0aVLLz8g557KQpEZHqkhbdZak44DlZjY5XcdIlZndZmZjzGxMr16bBVLnnHMtlM6SyAHA1yUdQ3TzVBeiRvASSbmhNDIASAxbsZhoaIJFYfiBrkQN7In0hORtmkp3zjnXBtJWEjGzK81sgJkNJmoYf8nMzgBeBk4Jq51DNNIqRHfOJoYYPyWsbyH9NEn5oWfXUKJhCt4HhoYhKuLhGImB0ZxzzrWBTNxseDnR7f/XEPXfviOk3wHcFxrOy4iCAmY2Q9IEogbzWuDCMEwBkn5ENEptDLgzaSgQ55xzbSDrhj0ZM2aMecO6c85tHUmTzWxMw3S/Y90551yLeRBxzjnXYj4AY4pemLmMFes2UFFdR+WGWuK5OfQtKaRf1wIK4zEqNtRRsaGWDbX1xHNFbk4OBXkxuhTm0rUwjy4FeRTkxYjltHQuJuec2/54EEnRdc9+wqzl67Z5P/HcHPJjUQHQAAm6FOTRrTgKNLV1xvraOmrrjL0GduXgXXtzwJAedC7I2+ZjO+dca/MgkqI7v70PsRxRHM+lKD9GVU0dS8vXs2R1Feur6+hckEdxfox4bg61dUZtfT1V1fWsWV/D6qoa1q6voaq6nqqaOjbURvPiCFFvxpqqGlZVVrNmfS15MdGtKE69Gf/+aCkPvreQWI7oURyne3GckqI8+nYtpH9JIf1KCqmqqWNJeRVLyquorK7bOE9qp/wYvTsX0KdLAb0759O9U5wexXG6FOQRz80hnhsFsooNtVRsqGNlxQbmLF/HnNIKVqzbQJ8uBfQvKWRg90KG7dCFHbsXkeOlKOdcAx5EUjSwe9Emr/NiOXTZIY9hO3RuYottV1NXz+QFq3hrzkqWr1lPWUU1ZRXVvDevjC/WrKeuPgoZhXkx+pUU0CmptLJ4VSWvf7aCtRuamtm1cZ0LcundOZ/XZ61gXdK2RfEYO/cqxgyqaqKS0uCexezerwtDenVi3ooKPvh8FZ9+sZZTxgzg8qN286DjXBbwILIdy4vlMG7nHozbucdmy2rr6lm2dgNFeTFKivKIxqrcXMWGWlas28CKdVEAWru+hpq6eqpr6zGgOJ5LcX6MkqI4O/cqplenfCRhZqxZX8uClRV8snQtM5euYe6KCvJyREE8Ro7E7OXruP21udTWG7EcMbxvF/YY0JW/vTqX+Ssq+POpoyiMx9L8LjnnMsnvE3HbZENtHZ+vrKR/t0KK4tFvkrvenMfVT81kj/5dufRruzKgWyF9uxZSnP/lb5b1NXUsLKtk/spKKqtr6ZSfS1E8l/U1dcxbUcG8FRVUVtexY48iduxRRN+uhcRyRCxHdMrPZeeexV7Sca4NNXWfiAcRlxYvzFzGRQ99SGV13ca0HEWlq7xYDhXVtTT30etckEthXozlazc0urxTfi57DezKiH5d6VYcp3NB1AuuT5cC+naN2oLyYt6D3bnW4kEk8CDSdsorq5m9fB2Ly6tYXF5F5YY6aurqqakzuhTmslPPYgb3KKY4P5fK6lrWbaglPzeHwT2K6V4cRxJV1XV8XlbJ8rVRG1C9GWUVNXy0sJwPF67ik6Vrqa3f/DOcmyNGDSrhoKG9OHBoT4bt0HljSck5t/U8iAQeRDoWM6Oqpo6162spr6zhizXrWVpexbyVFbw9ZyXTFq/eWOLp2Smfgd0Lqa+P2nvWbahl/116cOGhQ9i1T/o6SDjXETQVRPynmWvXJFEUj9pT+nQp2Ky33Mp1G3h3XhnzVlSwYGUFi8uryM3JYWD3InJzxPMzl/HElCUcObwPZ47bkXE799jY/dk5t2UeRFyH1qNTPsfs0bfJ5asqqrnrzXnc/dZ8np+5jM75uRw8rBd7D+rGgG6F9O9WSK/O+RtHHHDObcqrs5wj6i32xqwVTJy5jBc/WcaKddWbrROP5bBjjyIOGdaLQ4f1Zni/LlSH7tJF8Vy6F8czkHPn2oa3iQQeRNyWmBllFdUsLq9iYVkVZRUbWLO+ljXra5i5ZA3vzi2juq5+s+2G9O7Efjv34IAhPTlo157ekO86FG8TcS5FkujRKZ8enfLZc0DJZssrNtTy1pyVLFhZQX5ejPzcHFauq+aduSt59INF3PfOAvJzczh4114cu2dfxo/YgfxcrwpzHZOXRJxrRTV19bw/v4znpn/BszO+YNmaDfTsFOfMcTtyxtgd6dU5P9NZdK5FvDor8CDi2kp9vfHmnBXc+cY8Xv60lFiOGNGvC/vu1J19d+rBgUN6+rAwrt3wIBJ4EHGZMKd0HY9/sJj35pUxZVE51bX1FObFOGy33hy7Z1+OHrFDk+OfObc98DYR5zJol16d+OlRw4CoJ9ik+at4ZvpSnpvxBf+ZtpQrjt6NCw7eJcO5dG7rpe2uKkkFkt6T9JGkGZJ+E9LvljRP0pTwGBnSJekGSbMlTZW0d9K+zpE0KzzOSUofLWla2OYG+U851w4U5MU4cGhPrj1pD979+RF8bXgf/vzCZywsq8x01pzbaum8NXcDcJiZ7QWMBMZLGheW/czMRobHlJB2NDA0PM4HbgGQ1B24ChgL7AtcJalb2OYW4HtJ241P4/k41+piOeI3X9+dmMQv/jWdbKtedu1f2oKIRRLzyeaFR3P/IScA94bt3gFKJPUFjgImmlmZma0CJhIFpL5AFzN7x6L/vHuBE9N1Ps6lS7+SQn561DBe+6yUf09dmunsOLdV0jpIkKSYpCnAcqJA8G5YdG2osrpeUqLPY39gYdLmi0Jac+mLGklvLB/nS5okaVJpaem2npZzre7s/Qaz54CuXP3vGbw7dyWrq2oynSXnUpLWIGJmdWY2EhgA7CtpBHAlsBuwD9AduDydeQj5uM3MxpjZmF69eqX7cM5ttViO+J+T9mDt+lpOve0d9vrN8xzwu5e45ZU51DRyd7xz24s2Ga7UzMqBl4HxZrY0VFltAO4iaucAWAwMTNpsQEhrLn1AI+nOtUsj+nfljcsP485vj+Gy8cPYuVcx1z37CSfe9CbTF6/OdPaca1Ta7hOR1AuoMbNySYXA88B1wGQzWxp6Ul0PrDezKyQdC/wIOIaoEf0GM9s3NKxPBhK9tT4ARptZmaT3gIuAd4GngRvN7Onm8uX3ibj25NnpS/nVEzMoq6hmYLdCEvNvHbZbby4fv5vfrOjaTCbuE+kL3CMpRlTimWBmT0l6KQQYAVOAC8L6TxMFkNlAJXAuQAgWvwXeD+tdbWZl4fkPgbuBQuCZ8HCuwxg/oi/77dyTm1+dzRer15MjsW5DLXe/NZ83Zq/ghtNGMbxfl0xn02Uxv2PduXbo9Vml/GTCR5RX1nDdKXtw0qgBW97IuW3QVEnEp3Bzrh366tBePHvJQew5oCv//cQMyis3n//EubbgQcS5dqp7cZxrThrBug213Prq3Exnx2UpDyLOtWO77dCFE0f2564357FszfpMZ8dlIQ8izrVz/3XErtTVGze8OCvTWXFZyIOIc+3coB5FnL7vIB5+fyELVlZkOjsuy3gQca4D+PFhQ8iNifPvncxNL89m2qLV1NdnV89Llxnexde5DuKJKYu59dW5fLx0DQAlRXkcsEtPDhzak68O7cmAbkUZzqFrz3xSKuc6uBNG9ueEkf0pXbuBN2ev4PVZK3hz9gr+My0aGXj3fl04avcdOG7Pvuzcq1OGc+s6Ci+JONeBmRlzStfx0ifLeW7GMiYvWEUsR3z/oJ256PChFOT5sCkuNT7HeuBBxGWzZWvW86fnP+PhSQsZ0rsTfzhlT0YN6rblDV3W8zvWnXP06VLAdafsyT3f2ZfKDbWcdts7LF/r95e4lttiEJH0DUmdw/NfSnosef5z51z7c/Cuvbj3vLFsqK3niQ+XZDo7rh1LpSTyKzNbK+lA4AjgDsL858659mtI706MHFjCI5MX+dzursVSCSJ14e+xwG1m9h8gnr4sOefayimjB/DpsrXMWLIm01lx7VQqQWSxpL8BpwJPhznRvS3FuQ7g+D37Ec/N4ZHJizKdFddOpRIMvgk8BxwVprntDvwsnZlyzrWNrkV5fO0rfXhiymKqa30ud7f1thhEzKwSWA4cGJJqAR/pzbkO4pTRA1hVWcPLny7PdFZcO5RK76yrgMuBK0NSHnB/OjPlnGs7Xx3ak16d83nUq7RcC6RSnXUS8HWgAsDMlgCd05kp51zbyY3lcOLIfrz0yXIuevBDJry/kCXlVZnOlmsnUhk7q9rMTJIBSCpOc56cc23sgoN3YeW6al6btYInP4ruGzl930FcPn4YJUXeGdM1LZWSyITQO6tE0veAF4Dbt7SRpAJJ70n6SNIMSb8J6TtJelfSbEkPS4qH9PzwenZYPjhpX1eG9E8lHZWUPj6kzZZ0xVaeu3Mu6NEpnz+dOpL3f3E4z11yEN85YCcmTFrI4X98lX9OWkidDyvvmtDs2FmSBAwAdgOOBAQ8Z2YTt7jjaNtiM1snKQ94A7gYuBR4zMweknQr8JGZ3SLph8CeZnaBpNOAk8zsVEnDgQeBfYF+REFs13CYz4CvAYuA94HTzWxmc/nysbOcS83MJWv45b+m8cHn5fTrWsCp+wzi1H0GskPXgkxnzWVAi8bOsijCPG1mE83sZ2b201QCSGJbM1sXXuaFhwGHAY+E9HuAE8PzE8JrwvLDQyA6AXjIzDaY2TxgNlFA2ReYbWZzzawaeCis65xrBcP7deGRC/bn1jP3Zpfenbj+hc844LqXNlZ3OQepVWd9IGmfluxcUkzSFKIuwhOBOUC5mdWGVRYB/cPz/sBCgLB8NdAjOb3BNk2lO+daSU6OGD+iL/edN5bXfnYoew8q4bJHPmKm3+HuglSCyFjgbUlzJE2VNE3S1FR2bmZ1ZjaSqEpsX6JqsTYn6XxJkyRNKi0tzUQWnGv3BvUo4uYzRtO1MI/v3z+J8srqTGfJbQdSCSJHAbsQVUMdDxwX/qYs3On+MrAfUQN9olfYAGBxeL4YGAgQlncFVianN9imqfTGjn+bmY0xszG9evXamqw755L06pzPLWeO5ovV67n4oSne4O5SumN9AVBCFDiOB0pCWrMk9ZJUEp4XEjWAf0wUTE4Jq50DPBGePxleE5a/FNpkngROC723dgKGAu8RNaQPDb294sBpYV3nXBrtPagbv/767rz6WSlPTfX2kWyXyh3rFwMPAL3D435JP05h332Bl0PV1/vARDN7iuju90slzSZq87gjrH8H0COkXwpcAWBmM4AJwEzgWeDCUE1WC/yIaFyvj4EJYV3nXJqdvPcAAJaU+4RW2S6Vmw3PA8aaWQWApOuAt4Ebm9vIzKYCoxpJn0vUPtIwfT3wjSb2dS1wbSPpTwNPb/kUnHOtKT83hxxBZXXtlld2HVoqbSLiyzlFCM+Vnuw459oDSRTFc6msrtvyyq5DS6UkchfwrqTHw+sT+bIKyjmXpYriMS+JuC0HETP7k6RX+HIo+HPN7MO05so5t90rzs+lYoOXRLLdFoOIpHHADDP7ILzuImmsmb2b9tw557ZbhXkxr85yKbWJ3AKsS3q9LqQ557JYcb5XZ7kUG9YtaZRGM6sntbYU51wHVhTPpcJLIlkvlSAyV9JFkvLC42Jgbroz5pzbvhXFY1R5SSTrpRJELgD2JxpSZBHRWFrnpzNTzrntX1HcG9Zdar2zlhMNKeKccxt5m4iD1IY9+X3okZUn6UVJpZLObIvMOee2X4XxmLeJuJSqs440szVEo/fOB4YAP0tnppxz27/ieC7VtfXU1tVnOisug1IJIokqr2OBf5rZ6jTmxznXThTFYwBU1nhpJJulEkSekvQJMBp4UVIvwIfudC7LFcWj35eV3rie1VKZT+QKot5ZY8ysBqjE5zJ3LusV54eSiDeuZ7WUbho0s7Kk5xVARdpy5JxrFzaWRLxxPaulUp3lnHObSbSJVGzwkkg28yDinGsRb1h3kNp9Io9JOlaSBxzn3EbF+d6w7lIridwMfAuYJel3koalOU/OuXagMC9UZ3nDelZLpXfWC2Z2BrA30c2GL0h6S9K5kvLSnUHn3PYpURKp8ob1rJZSFZWkHsC3ge8CHwJ/IQoqE5vZZqCklyXNlDQjjP6LpF9LWixpSngck7TNlZJmS/pU0lFJ6eND2mxJVySl7yTp3ZD+sKT4Vp6/c66FNjase0kkq6XSJvI48DpQBBxvZl83s4fN7MdAp2Y2rQV+YmbDgXHAhZKGh2XXm9nI8Hg6HGc40UCPuwPjgZslxSTFgJuAo4HhwOlJ+7ku7GsIsAo4b6vO3jnXYvm5OeTI20SyXSolkdvNbLiZ/a+ZLQWQlA9gZmOa2sjMliam1DWztcDHQP9mjnMC8JCZbTCzecBsYN/wmG1mc82sGngIOEGSgMOAR8L29wAnpnA+zrlWIInieK6XRLJcKkHkmkbS3t6ag0gaDIwCEvOy/0jSVEl3SuoW0voDC5M2WxTSmkrvAZSbWW2DdOdcGynKj3mbSJZrMohI2kHSaKBQ0ihJe4fHIURVWymR1Al4FLgkjAZ8C7ALMBJYCvyx5dlPOQ/nS5okaVJpaWm6D+dc1vApcl1zw54cRdSYPgD4U1L6WuDnqew89N56FHjAzB4DMLNlSctvB54KLxcDA5M2HxDSaCJ9JVAiKTeURpLX34SZ3QbcBjBmzBhrbB3n3NYriseo9DvWs1qTQcTM7gHukXSymT26tTsObRZ3AB+b2Z+S0vsm2laAk4Dp4fmTwD8k/QnoBwwF3gMEDJW0E1GQOA34lpmZpJeBU4jaSc4BntjafDrnWq44nutjZ2W5JoOIpDPN7H5gsKRLGy5PDgxNOAA4C5gmaUpI+zlR76qRgBHdd/L9sL8ZkiYAM4l6dl1oZnUhLz8CngNiwJ1mNiPs73LgIUnXEHU9vmNLJ+ycaz1F+TFWVVRnOhsug5qrzioOf5vrxtskM3uDqBTR0NPNbHMtcG0j6U83tp2ZzSXqveWcy4CieIxFq7wkks2aq876W/j7m7bLjnOuPSmK52a8d5aZ8faclTzw3ue89PFy/vTNvTh6j74ZzVM2aa4664bmNjSzi1o/O8659qQ4HsvofSLL1qznW7e/w5zSCroW5tGrcz6XPTqVPQeW0L+kMGP5yibNVWdNbrNcOOfapcJ4bkbvWJ++eDVzSiv4+TG7cfZ+g1m2Zj3H/OV1/uuhKTx4/jhiOY3VqLvWtKXeWc4516TieIzqunpq6urJi7X9bBFloVH/6BF9KciLsWOPYn574ggunfARt7wymx8dNrTN85Rttjg9rqReRL2ghgMFiXQzOyyN+XLOtQNF+V9Okdu1sO2DyKrKKIh0K/5y7NWTRvXnlU9Luf6FWQzoVsQJI/sR3XHg0iGVq/4A0bhXOwG/IeqW+34a8+Scayc2zm6YoXaRVZU15MVEccgHRGN6XXPSCEb078olD0/hW7e/y6xlazOSv2yQShDpYWZ3ADVm9qqZfYdo4EPnXJb7Mohkpl1kVUU13Yrim5U0uhTk8dgP9ueaE0cwc+kajv7L6zzw7oKM5LGjSyWI1IS/S8M0uaOA7mnMk3OunSiOZ3aK3FWVURBpTCxHnDluR176ycEcOLQnv3h8Og++93kb57DjS2kUX0ldgZ8APwX+DvxXWnPlnGsXMj0x1aqKGroVNz/Bao9O+fztrNEcOqwXVz42jQnvL2x2fbd1Upke9ykzW21m083sUDMbbWZPtkXmnHPbty8b1jMTRMoqq+levOUJTfNzY9xy5mgO2rUXlz82ledmfNEGucsOqfTOuotonKtNhLYR51wWK85wm0h5ZTUlTVRnNVSQF+O2s0Zz7A2v87dX53DU7jukOXfZIZXqrKeA/4THi0AXYF06M+Wcax8KE0EkA20iZsaqyhq6pxhEIAok3xgzkA8+L2fByoo05i57pFKd9WjS4wHgm0CT0+I657JHomE9E20ia9bXUldvlBQ13ybSUHTfCDz+YaPTD7mt1JK7g4YCvVs7I8659qcoP3PVWYkh6FNpE0nWt2sh++3cg399uBgzn6NuW20xiEhaK2lN4i/wb6I72J1zWS4eyyGWo4w0rJc1crd6qk4c1Z/5Kyv5cGF5K+cq+6RSndXZzLok/d21JTMdOuc6HkkUxWNUZKBNpDwRRLaiTSTh6BE7kJ+bw7+8SmubpdI7a+/mlpvZB62XHedce1OcoTlFyiqi+6C3pmE9oXNBHl8b3od/f7SEXx03PCODR3YUWwwiwM3A3sBUopkK9wQmAeuJuv76ECjOZbGiDM0pkiiJlGzhZsOmnDSqP09NXcqrn5ZyxPA+rZm1rJJK+F0CjDazMWY2GhgFLA43HnoAcS7LFeXHMtKwXlZRTW6O6Jyfym/hzR20a6+Nk1hNmLSQ+npvZG+JVILIMDOblnhhZtOBr6QvS8659qQonpuRhvVV4UbDlg7znhfL4f7zxrJzz2Iue2Qq3/zb20xeUOY9trZSKiF8qqS/A/eH12cQVW055xxF8djGyaHa0qqKGrq3sCorYdgOnZnw/f149INF/O8zn3DyLW8zpHcnvjF6AN8YM3Cruw9no1RKIucCM4CLw2NmSGuWpIGSXpY0U9IMSReH9O6SJkqaFf52C+mSdIOk2ZKmJjfoSzonrD9L0jlJ6aMlTQvb3CCfeca5Nlccz6ViQ2a6+LakZ1ZDOTniG2MG8tplh3LdyXvQtTCP/33mE46/8Q1K125ohZx2bKl08V1vZteb2Unhcb2ZrU9h37XAT8xsODAOuFDScOAK4EUzG0o0jMoVYf2jiW5kHAqcD9wCUdABrgLGAvsCVyUCT1jne0nbjU/lpJ1zracoHstI76zyVgoiCZ3yczl1n0E8+oP9eeSC/SirqOa7907KyLm1J2nr12ZmSxPdf81sLdHsiP2BE4DE/O33ACeG5ycA91rkHaBEUl/gKGCimZWZ2SpgIjA+LOtiZu9YVIl5b9K+nHNtJOqdlZkuvi250TAVYwZ35y+njWTqonIunTDFG92b0SadoyUNJurV9S7Qx8yWhkVfAIm+df2B5IH+F4W05tIXNZLe2PHPlzRJ0qTS0tJtOxnn3CaK8tu+Yd3MQklk29pEmnPk7jvwi2O+wjPTv+D3z32atuO0dykHEUlFLTmApE7Ao8AlZrYmeVkoQaQ9xJvZbaGL8phevXql+3DOZZXieIyaOqO6tr7Njrl2Qy219Zb2hu/zDtyJM8YO4tZX5/DU1CVpPVZ7lcrYWftLmgl8El7vJenmVHYuKY8ogDxgZo+F5GWhKorwd3lIXwwMTNp8QEhrLn1AI+nOuTZUGEbybcu2g8Tgi6nOJdJSkrjq+N0ZvWM3LntkKp8tW5vW47VHqZREridql1gJYGYfAQdtaaPQU+oO4GMz+1PSoieBRA+rc4AnktLPDr20xgGrQ7XXc8CRkrqFBvUjgefCsjWSxoVjnZ20L+dcGynOwBS5qyrDkCfb2MU3FfHcHG4+Y2+K4rlccN9k1qyvSfsx25OUqrPMrOGkxKn85DgAOAs4TNKU8DgG+B3wNUmzgCPCa4CngbnAbOB24Ifh2GXAb4H3w+PqkEZY5+9hmznAM6mcj3Ou9WRiitxESaQ1e2c1p0+XAm761igWlFVy2T/9NrlkqdxsuFDS/oCF6qmLiXpaNcvM3iAaa6sxhzeyvgEXNrGvO4E7G0mfBIzYUl6cc+lTlNf2c4qUtXEQARi7cw8u/dqu/OG5T3l37krG7tyjzY69PUulJHIB0Zd7f6I2h5E08WXvnMs+iYmp2nI4+FXbMJfItvjOATvRq3M+f5r4mQ+PEqRys+EKMzvDzPqYWW8zO9PMVrZF5pxz27/EFLltWp1VWU0sR3QpaNngiy1VGI9x4SG78O68Mt6a41+DkFrvrN9L6iIpT9KLkkolndkWmXPObf+K4pmozqqhW1Feiwdf3Ban7TuIvl0LvDQSpFKddWS4v+M4YD4wBPhZOjPlnGs/MtGw3tpDnmyNgrwYFx46hMkLVvHqZ37zcipBJFFePBb4p5mtTmN+nHPtzMYuvm3YJlJWkbkgAvDNMQPpX1LI7575hCXlVRnLx/YglQrFpyR9AlQBP5DUi2hWQ+ecozAEkaqazYPIPyct5P35ZeTnxsjPzSGem0NeLPq714ASDhzas0XHLK+sYXDPFg2i0SriuTn89/HDuejBDznsj69wwcG78P2Ddtn4XmSTLQYRM7tC0u+Jbv6rk1RBNFiic84Rj+WQm6NGh4O/4aVZLF+zgeL8XNbX1FFTV09N3ZftCGeN25FfHPsVCvK27su3rLKaUUUl25r1bXLU7jvwwqUH87tnPuHPL8zi0Q8W8fD5+9GvpDCj+WprqXZt6AccIakgKe3eNOTHOdfOSKIwL8b6ms3Hzqqqrufk0QP4n5P22JhmZlTV1PGn5z/j72/M4/35Zdx4+iiG9umc0vHMjFUV1W3evbcxA7sXcdMZe3PmnJWcf+8kzr3rff75g/3oUpD+O+m3F6n0zroKuDE8DgV+D3w9zflyzrUjBfFYo9VZ62vqKGxQypBEUTyXXx43nLvO3YfStRs49sY3+OtLs1IaxHFdGHwxnSP4bq39dunBrWeNZk7pOn5w/+Q2HYwy01JpWD+F6A7zL8zsXGAvoGtac+Wca1cK82JUNeidlShxNAwiyQ4d1ptnLvkqXxveh/97/jOOv/ENnp2+lNdnlfLqZ6W8OXsF0xatZmFZ5cbeX6sqorGrMtmw3pgDhvTkupP35M3ZK7n80amUV7b9lMGZkEp1VpWZ1UuqldSFaNTdgVvayDmXPQrzNi+J1NQZdfW2xcbm3p0LuOlbe3PSyGX86onpXHD/B42ulxcTJ43qz367RMONbI/zn588egBLyqv448TP+PdHS/jq0J6cOKo/x+/Zj5ycjjl7dypBZJKkEqJBEScD64C305kp51z7ElVnbVqFkwgqqTaaHzG8D/sP6cHMJdG0QxLU1hmrq2pYXVXDR4vK+eekRUyYFM1Fl+5h4Fvqx4cP5dDdevPvj5bw1NSlXPzQFBatquLCQ4dkOmtpkUrvrB+Gp7dKepZoSlofxtI5t1FhXg7rG9yxvj4EkeaqsxoqiucyZnD3Rpd9Y8xALjliV+55az6T5q9i1z6dWp7hNBvRvysj+nfliqN34+KHpvDH5z9l5MASDhjSsi7N27OUemdJ+n/AgUSzEL4BeBBxzm1UmBdjxbpN2wASw6AUxltvFu6enfL5yZHDWm1/6SaJ//1/ezBz6RouevBD/nPRV9mha8GWN2xHUumddTPRSL7TgOnA9yXdlO6MOefaj8JGemclZjrcmpJIR1Scn8utZ+5NVU0dF/7jA2rqOlbPrVR+IhwGHGVmd5nZXcAxIc055wAozMvdbHrcrW0T6ciG9O7M/5y0B5MXrOLJKR1rrvZUgshsYFDS64EhzTnngKjKan1N420iRfG2Ha59e3XCyH4M6FbIv6YsznRWWlWTQUTSvyU9CXQGPpb0iqSXiWY1TO3WUudcVmisi69XZ21KEieO7M+bs1ewfG3HGX6wuZ8I/9dmuXDOtWuJIGJmG+f4SASV1mxYb+9OHNWPv748m39/tJTzDtwp09lpFU0GETN7tS0z4pxrvwriMcxgQ239xjYQbxPZ3JDenRnRvwv/+nBxhwkiafuJIOlOScslTU9K+7WkxZKmhMcxScuulDRb0qeSjkpKHx/SZku6Iil9J0nvhvSHJW2fdx45lwUSVVbJjestuU8kG5w4sj/TFq9m9vJ1mc5Kq0hnOfNuYHwj6deb2cjweBpA0nDgNGD3sM3NkmKSYsBNwNHAcOD0sC7AdWFfQ4BVwHlpPBfnXDMK8zafU2Rjm0gWzrHRnK/v1Y8cwRMdpIE9bUHEzF4DylJc/QTgITPbYGbziHp/7Rses81srplVAw8BJyiqdD0MeCRsfw9wYmvm3zmXusYmpkrcbFiQ60EkWe8uBRwwpCf/mrK4Q8zR3lzvrGmSpjb12IZj/ijs405J3UJaf2Bh0jqLQlpT6T2AcjOrbZDe1LmcL2mSpEmlpT4nsnOtraCJ6qz83JwOO/DgtjhxZH8WllXxqyem8+LHy1izvibTWWqx5npnHRf+Xhj+3hf+nrENx7sF+C3R8Cm/Bf4IfGcb9pcSM7sNuA1gzJgx7T/0O7edKQolkeR7Rapq6rwqqwlH77EDz874ggmTFnH/O5+To2iWxyuO/kq7e8+a6521AEDS18xsVNKiKyR9AFzR+JZNM7NlieeSbgeeCi8Xs+nw8gNCGk2krwRKJOWG0kjy+s65NtZUm0iRN6o3qiiey+1nj2F9TR0ffl7OU1OXcM/bC3hj9gr+fOoo9hjQfqZsSqVNRJIOSHqxf4rbNbajvkkvTyIaiwvgSeA0SfmSdgKGAu8B7wNDQ0+sOFHj+5MWVSS+TDRhFsA5wBMtyZNzbts1Vp1VVVNHQTv7Vd3WCvJi7LdLD649aQ/uP28sFRvqOOnmN7n4oQ958qMlrK7a/qu5UhmP4DzgTkmJ0FhOClVQkh4EDgF6SloEXAUcImkkUXXWfOD7AGY2Q9IEYCZQC1xoZnVhPz8CngNiwJ1mNiMc4nLgIUnXAB8Cd6RwLs65NGisYb2xqXFd0w4c2pNnL/kqf3juU56Z/gVPTFlCbo741thB/PLY4cRzt8+bNlOZT2QysFciiJjZ6lR2bGanN5Lc5Be9mV0LXNtI+tPA042kzyXqveWcy7DG7hPZ0tS4bnMlRXGuPWkPrj5hBFMWlvPYB4u49+0FTF+8mlvOHE2fLtvfMPKpDAXfR9IdRF1wV0saLsnvyXDObdRUm0h7ayTeXsRyxOgdu3HtSXtw07f25pMv1nLcjW8wZWF5prO2mVTKR3cTVSf1C68/Ay5JU36cc+1QY9VZVTX1PuRJKzh2z77868IDiMdyuOyRj7a7e0tSCSI9zWwCUA8QekPVNb+Jcy6b5If6+uQpcquqa706q5Xs2qczlxwxlM+WreON2SsynZ1NpBJEKiT1IGoMR9I4IKV2EedcdpC02XDw3ibSur4+sh89O+VzxxvzMp2VTaQSRC4l6oK7i6Q3gXuBH6c1V865dqfhFLneJtK68nNjnDVuR175tJTZy9dmOjsbNRtEwgCIB4fH/kRdcnc3s20Z9sQ51wEV5sWoqv5y/vD1NfUeRFrZGeMGEc/N4c4352c6Kxs1G0TCvRqnm1mtmc0ws+lmtv3f/eKca3OF8djGYU9q6+qprqv36qxW1rNTPieN7M9jHyxiVUV1prMDpFad9aakv0r6qqS9E4+058w5164kt4msr63fmOZa13cO3In1NfXc8uoc6uoz31MrlTvWR4a/VyelGdFQ7M45B0QBo7I6Glg7cdOhD3vS+obt0Jlj9tiB216by0ufLOfHhw3huD37EcvQaMmp3LF+aFtkxDnXvhXEYxvHevJZDdPrxtP35pg9lnLji7O5+KEpXPXkDHbv14Xhfbtw6LDe7D+kZ5vlZYtBRFI+cDIwOHl9M7u6qW2cc9mnMC+HZauj4FHlQSStYjniuD37ccyIvjw/cxmvfracmUvWcO/bC/j7G/O45sQRnDF2xzbJSyrVWU8Q3RcyGdiQ3uw459qr5DaRyo1T426fgwZ2FDk5YvyIHRg/YgcgKgH+8IEP+MXj06mqruO7X9057XlIJYgMMLPG5kp3zrmNku8T2dgm4iWRNlWQF+PWM0fzXw9P4Zr/fMzi8iqO27Mfu/frkrZrkUoQeUvSHmY2LS05cM51CAV5sY3DnnibSObEc3P4y2kj6ZSfy11vzueuN+eTFxPD+3Xl7m/vQ7fieKser8kgImkaUS+sXOBcSXOJqrMEmJnt2ao5cc61a0XJJZHwtyieyu9U19pyYzlcd8qeXHrkrnz4eTlTFpbz2bK1lBTltf6xmll2XDPLnHNuE4V5MWrrjZq6+o3VWV4Syaw+XQo2aTNJh+aCyDLgAmAIMA24I4zg65xzmylImlMkURIp8Ib1Dq+5K3wPMIYogBwN/LFNcuSca5cS42Str67zNpEs0lxJZLiZ7QEQZjZ8r22y5JxrjxIBo7K6zntnZZHmSiIbB1r0aizn3JYkT5FbWVNHXkzkxbw6q6Nr7grvJWlNeKwF9kw8l7RmSzuWdKek5ZKmJ6V1lzRR0qzwt1tIl6QbJM2WNDV5gEdJ54T1Z0k6Jyl9tKRpYZsbJGVm4BjnHPDlOFlVNVFJxEsh2aHJIGJmMTPrEh6dzSw36XmXFPZ9N9DwJsUrgBfNbCjwYngNUZvL0PA4H7gFoqADXAWMBfYFrkoEnrDO95K28xsincugREkk0Sbi7SHZIW1lTTN7DShrkHwCUYM94e+JSen3WuQdoERSX+AoYKKZlZnZKmAiMD4s62Jm71g0a/29SftyzmVAYYPeWT4hVXZo6wrLPma2NDz/AugTnvcHFiattyikNZe+qJH0Rkk6X9IkSZNKS0u37Qycc40qalCd5SWR7JCxVq9QgmiTGVXM7DYzG2NmY3r16tUWh3Qu62y8T6TaSyLZpK2DyLJQFUX4uzykLwYGJq03IKQ1lz6gkXTnXIZsvE+kxttEsklbB5EngUQPq3OIhplPpJ8demmNA1aHaq/ngCMldQsN6kcCz4VlaySNC72yzk7al3MuAzZrE/EgkhXSNjqapAeBQ4CekhYR9bL6HTBB0nnAAuCbYfWngWOA2UAlcC6AmZVJ+i3wfljvajNLNNb/kKgHWCHwTHg45zLky+qsaOwsnxo3O6QtiJjZ6U0sOryRdQ24sIn93Anc2Uj6JGDEtuTROdd6YjkinptDZU2tN6xnEb+d1DnXagrDnCJenZU9PIg451pNYopc752VPTyIOOdaTWE8RkV1Hetr6n3YkyzhQcQ512oK82KsrozGbi3ykkhW8CDinGs1hfEYZRXV0XMviWQFDyLOuVZTmBdjVaUHkWziQcQ512oK8mKsDCURv08kO3gQcc61msJ4jOra+ui5l0SyggcR51yrKczLSXruQSQbeBBxzrWa5MBRGPevl2zgV9k512qS20H8PpHs4EHEOddqNimJeBDJCh5EnHOtJvkGw6J42sZ3ddsRDyLOuVbjJZHs40HEOddqkttBCrxhPSv4VXbOtZrEyL05gnjMv16ygV9l51yrSVRhFebFiGaudh2dBxHnXKvZGER8yJOs4UHEOddqEveJ+D0i2cODiHOu1SRXZ7nskJEgImm+pGmSpkiaFNK6S5ooaVb42y2kS9INkmZLmipp76T9nBPWnyXpnEyci3PuS16dlX0yWRI51MxGmtmY8PoK4EUzGwq8GF4DHA0MDY/zgVsgCjrAVcBYYF/gqkTgcc5lRuJmQy+JZI/tqTrrBOCe8Pwe4MSk9Hst8g5QIqkvcBQw0czKzGwVMBEY38Z5ds4lSbSJeEkke2QqiBjwvKTJks4PaX3MbGl4/gXQJzzvDyxM2nZRSGsqfTOSzpc0SdKk0tLS1joH51wD3iaSfTI1uM2BZrZYUm9goqRPkheamUmy1jqYmd0G3AYwZsyYVtuvc25TebEccnPkQSSLZKQkYmaLw9/lwONEbRrLQjUV4e/ysPpiYGDS5gNCWlPpzrkMKsyLke9BJGu0eRCRVCypc+I5cCQwHXgSSPSwOgd4Ijx/Ejg79NIaB6wO1V7PAUdK6hYa1I8Mac65DLrs6N04dZ+BW17RdQiZqM7qAzwehkTIBf5hZs9Keh+YIOk8YAHwzbD+08AxwGygEjgXwMzKJP0WeD+sd7WZlbXdaTjnGnPWuB0znQXXhmSWXU0EY8aMsUmTJmU6G845165Impx0S8ZG21MXX+ecc+2MBxHnnHMt5kHEOedci3kQcc4512IeRJxzzrWYBxHnnHMt5kHEOedci2XdfSKSSoluZmyJnsCKVsxOe5CN5wzZed7ZeM6QnefdknPe0cx6NUzMuiCyLSRNauxmm44sG88ZsvO8s/GcITvPuzXP2auznHPOtZgHEeeccy3mQWTr3JbpDGRANp4zZOd5Z+M5Q3aed6uds7eJOOecazEviTjnnGsxDyLOOedazINICiSNl/SppNmSrsh0ftJF0kBJL0uaKWmGpItDendJEyXNCn+7ZTqvrU1STNKHkp4Kr3eS9G645g9Limc6j61NUomkRyR9IuljSft19Gst6b/CZ3u6pAclFXTEay3pTknLJU1PSmv02oZZY28I5z9V0t5bcywPIlsgKQbcBBwNDAdOlzQ8s7lKm1rgJ2Y2HBgHXBjO9QrgRTMbCrwYXnc0FwMfJ72+DrjezIYAq4DzMpKr9PoL8KyZ7QbsRXT+HfZaS+oPXASMMbMRQAw4jY55re8GxjdIa+raHg0MDY/zgVu25kAeRLZsX2C2mc01s2rgIeCEDOcpLcxsqZl9EJ6vJfpS6U90vveE1e4BTsxIBtNE0gDgWODv4bWAw4BHwiod8Zy7AgcBdwCYWbWZldPBrzXRlNyFknKBImApHfBam9lrQMPpwpu6ticA91rkHaBEUt9Uj+VBZMv6AwuTXi8KaR2apMHAKOBdoI+ZLQ2LvgD6ZCpfafJn4DKgPrzuAZSbWW143RGv+U5AKXBXqMb7u6RiOvC1NrPFwP8BnxMFj9XAZDr+tU5o6tpu03ecBxG3GUmdgEeBS8xsTfIyi/qEd5h+4ZKOA5ab2eRM56WN5QJ7A7eY2SigggZVVx3wWncj+tW9E9APKGbzKp+s0JrX1oPIli0GBia9HhDSOiRJeUQB5AEzeywkL0sUb8Pf5ZnKXxocAHxd0nyiqsrDiNoKSkKVB3TMa74IWGRm74bXjxAFlY58rY8A5plZqZnVAI8RXf+Ofq0Tmrq22/Qd50Fky94HhoYeHHGihrgnM5yntAhtAXcAH5vZn5IWPQmcE56fAzzR1nlLFzO70swGmNlgomv7kpmdAbwMnBJW61DnDGBmXwALJQ0LSYcDM+nA15qoGmucpKLwWU+cc4e+1kmaurZPAmeHXlrjgNVJ1V5b5Hesp0DSMUT15jHgTjO7NrM5Sg9JBwKvA9P4sn3g50TtIhOAQUTD6H/TzBo22rV7kg4Bfmpmx0namahk0h34EDjTzDZkMHutTtJIos4EcWAucC7RD8sOe60l/QY4lagn4ofAd4nq/zvUtZb0IHAI0ZDvy4CrgH/RyLUNAfWvRFV7lcC5ZjYp5WN5EHHOOddSXp3lnHOuxTyIOOecazEPIs4551rMg4hzzrkW8yDinHOuxTyIuA5N0i/CqK1TJU2RNDbTeWqKpKcllWQ6HxB1d06MaOxcc3K3vIpz7ZOk/YDjgL3NbIOknkT3RGyXzOyYTOfBua3lJRHXkfUFViRuHDOzFWa2BEDSaEmvSpos6bmk4SBGS/ooPP6QmI9B0rcl/TWxY0lPhZsTkXSkpLclfSDpn2HsMSTNl/SbkD5N0m4hvZOku0LaVEknJ63fU9JgRfN73B5KUc9LKgzr7JNUqtqYv4YkXR72/5Gk30naRdIHScuHJl6Hfb4V1n1PUucG+ypWND/Fe2Gwxg45irVrGQ8iriN7Hhgo6TNJN0s6GDaOD3YjcIqZjQbuBBKjENwF/NjM9krlAKF080vgCDPbG5gEXJq0yoqQfgvw05D2K6KhJfYwsz2BlxrZ9VDgJjPbHSgHTk7K3/fNbCRQ10SejiYaaHBsOI/fm9kcYHW4Sx2iu9PvCkP5PAxcHNY9AqhqsMtfEA0Hsy9wKPCHMOKvcx5EXMdlZuuA0UQT7ZQCD0v6NjAMGAFMlDSFKAgMCO0RJWEuBoD7UjjMOKLJyt4M+zoH2DFpeWIQy8nA4PD8CKKJzhL5XNXIfueZ2ZTkbUP+OpvZ2yH9H03k6QjgLjOrDPtPDFvyd+BcRROtnRq2HwYsNbP3w7prkoZFTzgSuCKc3ytAAdHQGc55m4jr2MysjuiL7xVJ04i+5CcDM8xsv+R1t9CoXcumP7oKEpsBE83s9Ca2S4zBVMfW/b8lj91UBxRuxbZNeZRoDKWXgMlmtlJSvxS2E3CymX3aCnlwHYyXRFyHJWmYpKFJSSOJBp77FOgVGt6RlCdp9zCzX3kYiBLgjKRt5wMjJeVIGkg04yXAO8ABkoaEfRVL2nULWZsIXJiUz5TmMQ/5W5vUw+y0ZvZ/rqSisP/uYfv1wHNEVWt3hXU/BfpK2ies21lfDoue8Bzw4zBQH5JGpZJflx08iLiOrBNwj6SZkqYSVTv9OkxzfApwnaSPgCnA/mGbc4GbQtWNkvb1JjCPaOjwG4DENMKlwLeBB8Mx3gZ220K+rgG6SZoejn/oVpzTecDtIX/FRLPzbcLMniUa3ntSWO+nSYsfIBqh+fmwbjVR1daNIS8T+bKUlfBbIA+YKmlGeO0c4KP4OtckRVMEP2VmIzKdlwRJnUJbD5KuAPqa2cVbsf1Pga5m9qt05dFlF28Tca59OVbSlUT/uwuISkEpkfQ4sAvR7I3OtQoviTjnnGsxbxNxzjnXYh5EnHPOtZgHEeeccy3mQcQ551yLeRBxzjnXYv8faynFE96tCwUAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "bq_hist = histogram(base_quals)\n", + "plt.plot(range(len(bq_hist)), bq_hist, label = 'Base Quality Scores')\n", + "plt.xlabel('Sequencing cycle')\n", + "plt.ylabel('Phred base quality scores')\n", + "plt.title('Base quality scores per sequencing cycle')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "19ea0fd4-155b-46c1-b706-a1155cb15b6e", + "metadata": {}, + "source": [ + "(Note: in this plot, the histogram function multiplies the base quality scores by 1000. That is why the quality scores are in tens of thousands rather than the conventional 2-to-60 scale.)\n", + "\n", + "For the above graph, the base quality scores are on the higher side for the first half of the reads, after which the scores consistently drop, indicating low confidence in the accuracy of the identified bases in the latter half of the reads. Moreover, a significant fall in base quality is observed somewhere between the cycles 60 and 70. \n", + "\n", + "To narrow down exactly which sequencing cycle resulted in the poorest base quality (below 5, as seen in the above graph), we can get the offset of the lowest base quality between cycles 60 to 70:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8e805f35-a92e-4a15-ad7c-778a4ec2f554", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4526" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "min(bq_hist[60:70])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "9df2ad5a-634b-454f-90f4-8c987259eadf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4.526" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# lowest base quality:\n", + "lowest_bq = min(bq_hist[60:70])/1000\n", + "lowest_bq" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "03facfb6-069b-4bbd-bc95-f778550d86a5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "66" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# offset corresponding to the lowest base quality of 4526:\n", + "bq_hist.index(4526)" + ] + }, + { + "cell_type": "markdown", + "id": "b1631db1-f619-43eb-84c9-f5ca8536641a", + "metadata": {}, + "source": [ + "Therefore, the lowest base quality score of 4.526 occuring at offset 66 indicates that the 67th sequencing cycle is the poorest of the batch. To find out what bases exist at offset 66 of every sequencing read: " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3a845d8d-0cf4-4451-b078-9fe8554665c1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N']" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create a list of bases at offset 66 of every read\n", + "bases = []\n", + "for seq in seqs:\n", + " bases.append(seq[66])\n", + "\n", + "bases[:10]" + ] + }, + { + "cell_type": "markdown", + "id": "fb48c7ad-a790-4a53-b6fe-eff32f8a37a6", + "metadata": {}, + "source": [ + "The first 10 bases in this list are all N's. Let us calculate the proportion of N's in the entire list of base qualities of all 67th sequencing cycles:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "3c7548b5-9618-42f0-a37b-35c33638ed55", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'N' consists of 90 % of the bases at the 67th sequencing cycle of all reads\n" + ] + } + ], + "source": [ + "count = collections.Counter()\n", + "for base in bases:\n", + " count.update(base)\n", + " \n", + "print(\"'N' consists of %d %% of the bases at the 67th sequencing cycle of all reads\" % ((count['N']/len(bases))*100))" + ] + }, + { + "cell_type": "markdown", + "id": "265b841f-5cdf-4b9f-bf77-0a74c831950c", + "metadata": {}, + "source": [ + "As expected, 90% of the bases reported at all 67th sequencing cycles are low confidence reads. Before, we speculated that these unidentifiable bases G's or C's due to their vastly disproportionate ratio in the sequencing sample. The identity of the base can be confirmed by examining the GC content of each read against its corresponding sequencing cycle:" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "e0bde418-3270-41c2-bc07-637da6dd9db8", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAABDzElEQVR4nO3deXwcdf348dd7N1fTXM3R+0jP9L4PSqFcVVtQ6pdbUW4RBG9RFEFB/YmoeAIicikooMghlLu0QKE3pXfSNkmbXmmO5mjSNMnu5/fHzG5nk91kk3abNvN+Ph55dHdmduYzO9t5z+cWYwxKKaXcy9PVCVBKKdW1NBAopZTLaSBQSimX00CglFIup4FAKaVcTgOBUkq5nAYCpRQicqaI5Hd1OqIhIk+IyM+7Oh3diQaCbkJErhCRFSJSJyIH7NdfExFxbDNTRBaJSJWIVIrIShG59gSn04jIiOO0r7NFZPfx2JfbGWPeN8bkdXU6VNfQQNANiMh3gT8Avwb6An2Am4A5QIK9zWxgMbAUGAFkATcDC7ogyackEfF2dRqUigljjP6dwn9AOlAHXNzOdh8AD3Rw318BtgC1wGZgqr18DLAEqAI2ARc6PvME8ADwqv25FcBwe917gLHTewi43F7+WWCdvb8PgYmO/RUD3wPWA9XAs0AS0BM4DPjtfR0C+oc5hyeAvwBv2elZCgxxrB9tr6sE8oHLWnz2IWCRneZ5YfZ/DVBo77sIuNKx7jr7+zsIvNHiuJ8Cttrn9Gc7XTfY634KPOXYNtf+3uIc1/xRYB+wB/g54HWk5wPgN/Zxi4AFjn1lAo8De+31L9rLzwZ2t/e9O9Z/3z7+XuAGO30jIvyOIh1zI/A5x3bxQDkwxX5/hv17qAJKgGsc1+Xnjs9F/P3oX5T/17s6Afp3jBcQ5gPNgZtEhG2SAR9wTgf2e6l9k5kBCFYuYoj9n3U78COs3Ma59k0wz/7cE0AFMBOIA54GnnHsN+SGAUwBDgCzAC9wtX0TSrTXFwMrgf72DWULcJO9LuTmFeE8nrDTNxdIxMo5fWCv62nfYK610zrFvhGNdXy2Gitn5XHeCB2fr3Gcez9gnP16of09jbH3/WPgQ3tdtp2mS+zv89v2NYw2ELwAPGwfv7f9/XzVXncN0IQVxL1Yub69gNjrX8W6qfeyj31WuO+yne99PrAfGIf123qq5XVt8T1FOub3gWcd2y0ENtivh9jf0Rfsz2QBkx3X5efR/H70L8r/712dAP07xgsIXwL2t1gWeIo6jHUDHGD/Rx3dgf2+AXwzzPIz7ZuAx7HsX8BP7ddPAH9zrDsf2Op43zIQPAT8rMUx8h03i2LgS4519wF/sV+H3LwinMcThAaiFKygOAi4HHi/xfYPAz9xfPbvbey7p/09Xwz0aLHuNeB6x3sPUG/f4K4CljvWCbCbKAIBVrHfEefx7Jvlu/bra4DtjnXJ9mf7YgUqP9ArzLmEfJftfO+PAb90rBvR8ro61rV1zP5YN/s0+/1/gO/br38IvNDGNQ0EgjZ/P/oX3Z/WEZz6KoBsEYkLLDDGnG6MybDXebCy436s/5TRGgTsCLO8P1BijPE7lu3ECjYB+x2v67FuvpEMAb5rV2BXiUiVfez+ndxfOCWBF8aYQ1jFQP3tY89qcewrsW6arT7bkjGmDiuY3ATsE5FXRWS047z+4NhvJdYNf4B9bGeaTFvHaSGQK9vn2PfDWDmDgOD3ZYypt1+mYH2vlcaYg1EeK9L3HpL+dtIe8ZjGmL3AMuBiEcnAqq962vG5cL+/lqL5/ah2aCA49X2E9YS4MNIG9s3gI6wn12iVAMPDLN8LDBIR529nMFYxUmeUAL8wxmQ4/pKNMf+K4rMmymMMCrwQkRSsoo699rGXtjh2ijHm5miPYYx5wxjzKawguxV4xHFeX22x7x7GmA+xytadaRLne6z6iGTH+5aB6QiQ7dhvmjFmXBTfQwmQad90j8U+YKDj/aBIG0ZxzCexcrWXAh8ZY/Y4Phfu9xdu/539/SibBoJTnDGmCrgbeFBELhGRVBHxiMhkrKKLgO8D14jIbSKSBSAik0TkmQi7/hvwPRGZJpYRIjIEq/K3Hvi+iMSLyNnA54BI+2mpFBjmeP8IcJOIzLKP01NELhCR1Cj3lSUi6e1sd76InCEiCcDPsIplSoBXgFEi8mX7XOJFZIaIjInmRESkj4gsFJGeWDfnQ1g5L7AqqH8oIuPsbdNF5FJ73avAOBG5yM7JfYPQm/06YK6IDLbP7YeBFcaYfcCbwG9FJM2+1sNF5Kz20mt/9jWs30ov+3znRnOuLTwHXCsiY0QkGbjzGI75IjAV+Cbwd8fyp4F5InKZiMSJSJb9m27pWH4/yqaBoBswxtwHfAfrZl9q/z0M/ACrvgD7SfRc+69QRCqBv2K1iAm3z38DvwD+iVWO+yKQaYxpxLrxL8CqWH0QuMoYszXK5P4UeNLOxl9mjFmNVbH5Z6wirO1Y5dzRnPdWrPqJQnt/kYoD/gn8BKt4ZhrWEyjGmFrg08AVWDmE/cCvsCqVo+HB+t732vs+C6tyFmPMC/a+nhGRGqwWMgvsdeVYT8D3YhXfjcQqIgmc11tYlavrgTVYAcvpKqyK+s1Y39l/iL7Y78tYlclbsSpZvxXl54KMMa8BfwTexbpey+1VRzp6TGPMYeB5YCjwX8fyXVj1S9/F+m7XAZPCpKXTvx91VKAlgVLdkog8gVUJ+uOuTktbRGQJVgXx37o6LR1l56A2YrXUae7E5+8CRhljvnTcE6eiojkCpVSHicj/iUiiiPTCyvn8r5NBIBO4Hit3qrqIBgKlVGd8FauYZwdWc9yb2968NRH5ClZl72vGmPeOb/JUR2jRkFJKuZzmCJRSyuXi2t/k5JKdnW1yc3O7OhlKKXVKWbNmTbkxJifculMuEOTm5rJ69equToZSSp1SRGRnpHVaNKSUUi6ngUAppVxOA4FSSrmcBgKllHK5mAYCEZkvIvkisl1Ebo+wzWUisllENonIP2OZHqWUUq3FrNWQPb/rA1hT8u0GVonIy8aYzY5tRmKNrDjHGHNQRHqH35tSSqlYiWWOYCbWTEmF9oiVz9B6zPyvYM2jexDAGHMghulRSikVRiwDwQBCZy7aTegsVgCjsMaDXyYiy0VkfrgdiciNIrJaRFaXlZXFKLlKKXVy8vsNv3h1MzvKDsVk/11dWRyHNRb72Vjzrj4SbiYjY8xfjTHTjTHTc3LCdoxTSqlu6/m1u3nk/SLW7Ix2ltGOiWUg2EPoFHYDaT2d4W7gZWNMkzGmCCjACgxKKaWA6vom7n1tK1MHZ3DJ1IHtf6ATYhkIVgEjRWSoPUXgFcDLLbZ5ESs3gIhkYxUVFcYwTUopdUr57Vv5HKxv5J6F4/F4JCbHiFkgsCepuBV4A9gCPGeM2SQi94jIhfZmbwAVIrIZa9q724wxFbFKk1JKnQyq65t4fs1u2psGYOOeap5avpMvnzaE8QPam5q782I66JwxZhEt5sQ1xtzleG2w5nz9TizToZRSJ5PfvV3AEx8WMyQrmem5mWG38fsNd720kV7JCXzn03kxTU9XVxYrpdRJyxjDA+9uZ3nh8SuoqD7cxHOrrQaVr6zfF/G4P/3fJtbuquJH548hvUf8cTt+OBoIlFIqgr9/tJNfv5HPnxZv69TnfX5D9eGmkGXPrtpFfaOPUX1SWLRhH35/6+Kh+98q4O8f7eSrc4dx0dSWre6PPw0ESikVxse7DvLzVzeTGOdhVdFB6hubO7yP+97YyuxfvsPGPdUANPv8PLGsmNOGZXLruSM5UHuE1S2ahD7yXiF/WrydL8wcxO0LRiMSmwpiJw0ESinVwsG6Rm55ei190pL47WWTaPT5O1w81OTz8+/Vu6lv9HHDk6sprWngtY372VvdwA1nDOO80b1JjPPw6vq9wc8s3lrKLxZt4YKJ/fj55yeckCAAGgiUUsfA5zc8uGQ7s3/5DkXldV2dnGOycU81z60u4XdvFXDN4yspP9TIg1dOZd6YPvSI97I0v2OjGizNL6OyrpHbPpNHTUMTX/n7ah55v5Ch2T05d3RveibGce7o3izauB+f31Db0MQdL2xkVJ8U7r9sEt4YNRUN55SbqlIpdXLYW3WYbz+7jhVFlQB8sL2codk9j2mfd720keyURL5xXnT9Sl/fuJ/HlhXx6NXTSU3qfIXqgdoGFj6wDJ/fIAK9UxO59+IJTByYAcDs4VksLehYIPjvx7vJ6pnAjXOHkdcnla/8YzXGwM8Wjgv2B7hgYj9e27iflUWVvLphL6U1DTz0pTkkxnk7fS6doTkCddLw+0277aoVvL5xHw1Nvi5NQ1F5HfN//x4b9lRz3yUTSe8Rz+a91ce0z5qGJv65YhdPfFiML0wFajhL8g+wsqiSu/+3uf2N27CisBKf3/D4tTPI/9kCVvxoHhc5evHOHZlNcUU9Oyuiy/VU1zfx9uYDXDi5P/FeD/PG9uHuC8cxZXAGF087ut9zR/emR7yX+97YylPLd3HtnKFMHpRxTOfSGRoI1Elj/h/e44/vbO/qZJzU8vfXctNTa/n36pL2N46hxVsPUNPQzAtfm8Nl0wcxrn8am/bWHNM+l+aX0ew3VNY1sq4kujF1dlXWIwL/WbObRRvCN8WMxvLCClIS4zhzRDYJca1vi2flWSPkvxdlruDVDfto9Pm52BFMrpqdywtfm0NywtGCmOSEOM4d05uPd1UxKLMH3/30qE6fw7HQQKBOCjUNTRSUHuKFj9vvbelm2w9Yo08ej8HHquubKD90pFOfzd9fQ3ZKAnl9UwEY1z+NrftrafL5O52ed7aUkt4jnjiP8PaW6Eak31VZz/nj+zFpYDo/emEDpTUN1Dc28+LHe/jV61ujzjmtKKpkem4v4rzhb4m5WckMzkyOunjov2t3M7J3CuP6p7W77cVTB+D1CPdeNDEkSJxIGgjUSWFXRT0AxRX17Cg7tSsdY6nQHoZ47a6qY97Xd55bx5cfXdmpz+bvrw0GAYBx/dNpbPZ3epjkZp+fd/PLOG9Mb2bkZvLOltJ2P9Pk87O36jDDcnpy/+WTaWjycelfPmL6z9/mW8+u46ElO/jjO+23/y8/dITtBw4xa2hWxG1EhLNG5fDhjgoam9sOdjsr6li98yAXTR0YVaufc0f3Ye2dn2LOiOx2t40VDQTqpFDsKHuN5ibgVoGWObsq6zv9NA/Q0ORj2Y5ytuyroaSyvkOf9fsNBaWHyOtz9Gl3rP3ku2lP54qH1uw8SPXhJuaN6cN5Y3pTUHqo3XTtrTqM38CgzGSG56Rwz8LxNDT5WDh5AM/eeBqXThvIw+8VBtvwR7Ki0KrsnjUs/FAPAXNH5VDf6GP1zso2t3vx472IwOen9G9zO6dY9xxujwYCFeJATUOXNAPcaecIhmX35J0oiwXcaEd5HalJVvHBx8eQK1i76yANTdaT7ZL8jn3fuyrrOdzkI69vSnDZsOyeJMZ5Ol1P8M7WA8R7hTNHZjNvTB8A3m7ngWCXHSgGZyYDcNn0Qay8Yx6/vGgCs4Zl8eMLxpLZM4EfPL+e5jaKrFYUVZCc4GVCO4O6zR6eRbxX+MlLm7j+iVVc/8Qqnl+zu9V2q4orGdc/jX7pPdrc38lEA4EKamjyccUjy7nhyVUn/NjF5XX0Tk3kgon9WL2zkoN1jSc8DSc7YwxFZYeYP64vcR5h7a7O1xN8uL0Cr0fol57Eux1sH791fy0AeX2P5gjivB5G90tjUydbDr29pZTThmWRmhRPbnZPhue0/0AQeHgYkpUcdn16cjw/WziOTXtreOT9ooj7WVFYybQhvYiPUD8QkJIYx/VnDCMx3kNpbQNrdh3kgSWhjRuMMWzeV8O4frEbKTQWNBCooD8t3kZhWR07yuo4dKTj3emPxc6KeoZkJXPemD74DSwp6HyuYF/1YV78uOUcSKe+irpGahqaGdMvjXH90/j4GALBsh3lTByYzmfG9eXDHeUdao5aUFqLCIzqkxKyfFz/NDbvq+lwZX9ReR2FZXWcN7p3cNm8MX1YUVRBbUNTxM+VVNaT4PXQJzUp4jbzx/djwfi+/O7tgrA53cq6RvJLazltWOT6AafbF4zmla+fyStfP5NrTx9KUXno/5XSmiNU1jUGi8pOFRoIFACb9lbz8NLCYIegrfuOrSlgRxVX1DEkqycTB6STk5oYdauRcP72fhHfenYdb28+Oesa/r26hDte2BB2sLGA0poGbnl6LdX1R2+EgRvZ0JyeTBnci09Kqtss8gjYsLs6WBkPVgut9burOWNENueM7k1Dk5+POjB8Qv7+WgZnJrdq4TKufxq1Dc3sPng46n3B0Tqh8+wiocDrJp/hvYLyiJ/bVVnPwMwe7U7WcveF40j0erj7f5taBamVRdZ5n9ZO/UA4EwamYQxsctRBbN5nvdZAoFopP3SE0pqGrk5GRM0+Pz94fj0ZyQk8eOVUADYfYyCoqm/kJy9tpKq+/SKe+sZmDtQeITcrGY9HODevN+/llwVbZxSWHeJAbfTf3yclVQDc/cqmkCfdJp+frfujPy9jDD97ZTNf/cdqvvqP1dz0jzWsLm67orAtzT4/d/9vE7f9Zz1Pr9jVZhn4u1sP8OqGfbzrKL8vsltTDc9OYcrgDA43+YLFNJH4/IZrHl/JDX9fFeykFeg8dfrwbGYNzaRHvJclW6MPvFv315DXJ7XV8nH9reKQjhQPVdU38uK6PeT1SWVQ5tEinqmDM8hIjuetzfsjfnZXZX2wfqAtvdOS+PanRrEkv4w3WzwcLC+sJCnew4QBGVGnOSAwUcwGZyCw60hG9239/ZzMNBBE4Z0tpZz32yW8vrFzHVa+/s+PmXf/0k6XnwYs217OnHsX8+1n17G0oCyqp8FIXl2/jztf3MidL27kpqfWsHFPDfcsHMfovqn0So4P/qA76/63Cnjyo50hN7JIApV+Q7Ks3Mh5Y3pTe6SZJz4s4vonVnHub5fy3ec+ieq4zT4/G/dWM2lQBiWVh/nL0h2AVf/xlb+vZv7v32fD7uiuw6a9NTz6QRGb99Wws6KeJQUHePSDyGXNbalpaOK6J1fz+LJirjk9l0GZPXho6Y6IxSjb7P4CKx2BZ0f5IRK8Hgb06sHUwb0AgsVDxhieWr4z5MkfYP3uKirqGikoPcRL66zismXby0mK9zB1SAZJ8V7mjMhicf6BqIp0Gpp8FFfUh73Rje6bitcjIRXG5YeOhN2vz2/418pdnPObJWzeW8P1ZwwNWR/n9XDBhH4s2rCf3Qdbtx4yxrCrIrpAAHDV7CGM7pvKPf/bzOHGow8HK4qs+oFwncja0zs1ib5pSaGBYF8NgzOTj2m4i66ggaANDU0+7nppI9c/uZqi8jp+/OImatooswynpqGJlcWV1DY0c9WjK4PtwDtqR9khbn5qDX5jeHtLKVc/tpLT711MfjtPhOEYY8189O81Jby6YR9rd1Xx5dOGsGB8X0SEsXZZb2dtP1DL0yt2AbT7xApQXG79R8+1A8EZI63enf9v0VbW7jrIqD4pfFJSFdWNqqD0EA1Nfq6bk8vnJvXnwSU72Ly3hmseX8nSgjJErF6x0Qi0pnnha3N4/VtzuWBCf5YXVrRZpBPJXS9u5MPt5fzyogn89MJx3HjmMD7eVcXKovA5jGAgcKwvKqtjSFYyXo8wsFcPslMSgy2HHv2giB+/uJF7X98Ssp93tx7AIzCidwq/e7uAxmY/H+4oZ0ZuZnA8m7PzelNSeThs/42PdlRww5OrOdJs3Ty3HziEz28YFSYQJMV7GZ7TMxgIXvh4NzN+8TbPrw2trzHGyqX88L8bGNk7lVe/cSaXzRjUan+3nDMCBP7wduu+AFX1TdQeaY46EMR5PdyzcDx7qg7zwLvbKSw7xP1vFbB1fw2ntdF/oD3jB6S3yhGM7XdqFQuBBoKI6hub+fwDy/j7Rzu5/oyh/Pum2VTUHeH3b3VsgooPt5fj8xt+dfEEAL70txXsqqjnSLOPI82+qG4qB+sauf6JVcR7PTz31dms/vE8/vKlaTQ0+fj1G/kdPrfiinoq6hr5yefGsfbOT7H2zk/xs8+PD3Z+GdM3jfz9tZ3Ocfxy0VaS470MyuwRVaAKjN8y2G79kZwQx88XjucnnxvLstvP5erTc6mJsuz5k91VAEwelMEd548h3iMsfOADVhUf5PeXT2biwAyWRlkRvXjrASYNTCc7JRGA04dncbC+Karg5rS36jCvrN/H1afn8oWZgwG4dPogsnom8JCdY2lpW2ktHrFuvBV2f4HC8rpgHY6IMHVwBmt3HWRVcSW/fG0rSfEe3t5yIKSC9d38MqYO7sUdF4yhpPIwf168jYLSQyGdl87OywHCNyNdVVzJ21tKg5XvgesZqehjXP90Nu2t5q3NpXzv3+sxxhpa2WlnRT3vbyvnlnOG8+xXT2NMhBtn/4wefPm0ITy/djfbD4R+5y2bjkZj5tBMLpoygAeWbOfc3y7lT4u3MWd4NpfPbB2EojVxYHqwwvjQkWZ2VtafcvUD4KJAsKfqcIeKdlYUVrJ1fy33XzaJOz87lmlDMvnizME8+VExWzrwtLy0oIzUpDgunjqQJ6+bSW1DM3N//S55P36dvB+/zoI/vN/mk25js5+bnlrD3uoG/nrVNAZlJpMY52X++L5cf8Yw3t5S2m6HmZYCwxNMG9Ir7Pqx/dM40uyPqj/Bog37mHT3m/zpnW1WJ6Xt5byz9QC3nDuCaYN7RRUIiivqyeyZENKp5rIZg7h2zlCSE+I6VPb8SUkVGcnxDM5Mpm96Et+fPxqvR3joyqksnDyAs0blsK6kqt26i8q6Rj4uqeLsvKMtWWYPt54cO1KxCvDkR8X4jeGa03ODy5LivVx3xlCW5Je1+j3VNjSxr7qBc+xjryo+iM9v2FlRx7Ccoy11pgzuRXFFPTc/tZaBvXrwly9No7HZz5ubrBvvgdoGNuyp5pzRvTl7VA4zczP542KrueMZjkAwsFcyo/qkhM0pNdsPKg8vLcTnN+SX1pIQ5wnm3loa1z+N0poj3PL0Wsb3T2PB+L4sL6wMeeBZtsOqAI6m5+3Xzh5Oj3gvv32zIGR5MBBEaDoayQ/PH8P5E/pxx/lj+Oj283jqhln0bqPVUXsmDEgPVhjn76/BGDRHcDJ7ed1ebnpqbatp4yL5uKQKj8BnxvUNLrvtM3mkJcVx10sboyqmMMawNL+MM0ZkE+f1MH5AOv+5+XRu+0wet30mj89N6k9+aW2bN9xX1u9lRVEl9140gWlDQls2XDMnl9SkuKi60Tut2XmQtKQ4RuSkhF0feKJpr3jocKOPn72yGb8x/PatAj7z+/e488WNDOzVg2tOzyWvbxr7qhtCWr6Es7OiLmJbcAhf9hzJupIqJg7MCN5grj49l09+8mk+bV/Hs0bl4DfWkMltea+gDGOs0SED+mf0YEhWMh/tiBwI6o40h5RB1x1p5l8rdjF/fN+QylCAL80aQs8Eb7AeIyAwntBFUweSGOdhZVEluw/W0+QzDHMM8zx1cAZgBY4Hr5zKWaNyGNirBy99Yk10ssTuH3B2Xg4iwm3zrQnQM5LjW92sJgzICPs7DOQKC8vreGvzfrbur2VETkrEMXkCv53c7GSeuHYm543pE2yiGbBsezl905JCziWSrJREbjhzGK9t3M96O7cHRwPBoF4dCwQ5qYk88MWpfGXuMPqmdz4ABDgrjAP1apojOIkFekFuK40uW7+upIpRfVLpmXi0iVxGcgI/mD+aVcUHI0467bT9wCH2Vjcwd1SOIx2p3HLOCG45ZwTfmmeNub68MHJLlJVFlaT3iOfzk1vPW5reI57r5gzlzc2lHarcXbOzkqlDekVsdjc8J4UEr6fdfT76QSH7qht49OoZ/OP6mcR5hMLyOn4wfzRJ8d5g8UF+O9/5zop6hrSRxW9Z9hxJfWMzBaW1TB4Y2pnHObb7pIHppCXFtTuK5Lv5B8jqmdCqt+npw7NYUVQRdpjkQNn3Ob9ZEryZP792NzUNzVx/xrBW26cnx/PFWYN5Zf0+Kh0d6AL1A+P6pzFlcAariisptG/Sw3KO3jwnDcpg/IA07r14AuP6pyMiXDipP8u2l1N+6AhL8g/QJy0xeNOfkZvJ5dMHcfmMQa2ufUKcBJ/+nXx+Q0KchyFZyTy0ZAf5+2vabBEzIzeT78/P46nrZ9GrZ8LRXJQdPP1+w0c7KpgzIjvq2bduOHMovZLjQ3IFJZX1ZKckhPz/7Ao5qYn0S7cqjDfvqyEjOZ5+xyHAnGiuCQSj+kR3UwLrx/pJSRVT7Ccup8umD2JARo+oOiwFRip0BgKnYdk9yUlNZEVR5CfMVcWVTG/jpn3dnKGkJsZFPbl29WFrlM9pg8MXCwHEez2M6pvSZo7gQG0DDy7ZwfxxfZk5NJMzR+bw2jfn8r9bz+Bzk6wxVgKDkuW30WSzocnH3urDwRZDkQTKntuyaW8NfmPdICOJ83o4c2QOSwvKIubqfH7D0oIyzsrLafW9nzYsi9qG5rBpWbPzIKuKD1J+6AiXP/wRG/dU89gHRUwelBGxGG7++L74/IYVjuKmbaW1JMZ5GJSZzMzcTDbtrQ62dHJO/JIU7+WVr5/J/005OtTxwskD8PkNL63by/sF5ZyT1zvkhvurSybywwVjWn8vHk/YOqEmnyHB6+HGucP4ZHc1pTVHQgabayne6+FrZ4+gd5p1MxyQ0YPBmcnB4rTN+2o4WN/EnBHRV9CmJsVzw5nDWFpQFmz+u7MDLYZiLVBhvHlvDWP6pp2w6SWPJ9cEggEZPeiZ4KUgijLrooo6qg83hZ0gwuMR5o3pzQfby0OKAMJZWlDGyN4pDMgIP+aIiDBraCYrCivD3pQqDh1hR1kd03Mjd3ZJT47n2jm5vLZxf1R1BYHmhpFuTAFj+6WxeW/kXqL3v1lAk8/P7QtGB5clxHmY4Hga75eeRGpSXJuVq7sP1mOMVZTQlkDZc1sDrQX6DwRmlYrkrFE5lNYcifhQsK7kIFX1TSHFQgGzh4U+4To9+kER6T3ieenWOSTGebjowQ8prqhv1TTSaeLADJITvCH1DtsOHGJ4TgpejzBzaBZ+Y+Us0nvEk9kzoc1zy+ubyui+qfzxnW3UHmkOqeNoi9cTKUfgJ84rXDx1YLDSvK1AEM7pw7NYXmjlopbZRXIdHWnzylmD6RHv5TG7+W60fQhOhIkDrArjLftrT8liIXBRIBARRvVNjSpHsM5ukjclwlPzeWP6cMRuhhdJfWMzKworOStCbiBg1rAs9tc0BMdNcQpU6s7Ibfumfd0ZQ8lOSeRrT68NKWIIZ+3Og3g90uZTM8CYfmlU1DVyoLb1jTcwt+tVs3PJbaOcV0TI65NKQRvfeaDpaHs5guDolm0UD60rqWJARg9yUhPb3Fcgh+acg/ZAbUOwQvPdrWV4PcKZI1pfu95pSQzP6dmqwriksp43Nu3ni7MGM65/Os/dNJt+GUkMzkxmwfi+rfYTEO/1MCM3MySwbCs9xEh7+IYpgzPweoSdFfUMze4Z1dPm5yb1p/pwE/Fe4YyR0d1w471Cs691IGj2G+I8QlK8lxvnDiXeKx2+2c0ebuWiNu+tYdmOCkb0TqFPWseKTzKSE7h42gBe/Hgv+6oPs6/68EkTCMYPtCqMG5v9p2RFMbgoEADk9Uklf39tuxW960qqSEmMY3iEytRZwzLpmeBtNQxCk88fzF6vKKyk0efnrLy2A8Fsu2t7uOKh1TsPkmBXMrclIzmBR66axv6aBm76x5pgm+9wVu88yJh+qe2WrQZ+0C3rCTbtreaqx1aSlZLI188d0eY+wHp63NrGdx4YfjpSK5SAwCBebRUPfbK7ikmD2h/sq296EqP7prK0oIw9VYe5+ak1zPzFO8z51WJ+uWgLr23cx7TBvUhPDt8p6PTh2awsqgyZhOXxZcV4RLh6di5gtcR541tzefnWORErVgNmD89i2wGr93TdkWb2VB1mZG/rt9czMS54/Z31A2250C6amzk0k5Qoy9C9Hk/Yeo9mnyHOY6X/K2cOY8lt53S4lU0gF/XetjJWFVUyZ3jn2u1fN2cojT4/v3pta3D46ZOBsx5JcwSngFF9UjlY30T5obafmq2WJ+l4I5TLJ8Z5mTsqh8VbS4M3uMZmP+f/4X0m3f0mX350BX9avI2keOtpry3Dc1LITkkIjonutKq4kokD00mKb38i6ymDe/GbSyexsriSH/03fKumZp+fdSVVbdYPBIwJ03Jozc6DfOGvy0mKs/ozZCS3XUwBVouf2oZm9lWHHyJiZ0U9qUlx9Ipw0w1IT45nYK8eEXMEFYeOUFJ5mEntFAsFzB2Vw8qiSs777RLezT/AV+cOY2y/NB79oIgdZXWcE6ZYKGD28CzqG32st8vtaxqaeHbVLj47sV9IS5SkeG9U31HgRrm8sDJYyTyi99Hil1lDrd9QNK1swLpB/mD+aG49J7oJ4MHKETT5W9cRNPtN8P+BiEQs5mxLIBf1+LIiDjf5OL2TE7AMy0nhvNG9eXGd1SrqZMkRZKck0j89iQSvJ+LD48kupoFAROaLSL6IbBeR28Osv0ZEykRknf13QyzTEyjbbKuooqHJx5Z9Ne1OIH3emD6U1hxhoz0Rx98/KmbbgUOcM7o3ZbVHWLurirNH9W73Ji4izByayfLCipCbd0OTj417qtusH2jpwkn9+da8kTy/djdP2T17nbbur6W+0cfUduoHANKS4hmU2YNPSqzer39evI0vP7qCzJ4JPHfT7JBKy7YEhiqO1J9gZ2U9uVnRFXmM658WsSVT4KbcXpFXwGfG9cVnDOeO7s073z2bH54/hkevmcHKO+bx5y9O4erTh0T8bGCkyt+/XcBjHxRx3+tbqWv0hW0ZFI1x/dNITYrjox0VwRZDIx0je860fwMducncfPbwYIudaHg9gjG0yhU0+/3Ee4+98nP28CzKDzXiEaIe6TMcZ31Le8WJJ9Ls4dlMHZLRqaEqTgYxa3slIl7gAeBTwG5glYi8bIzZ3GLTZ40xt8YqHU7BlkP7ayNWVm3cU02z37QbCM7Jy0HEGkd9YK8e/PGdbcwdlcOfv2gN2lbT0ERilD+K04Zl2WOqHA5mdz8pqaLJZ9qtH2jpm+eNZHXxQe57fSvzx/UNKS9fG2VFccDYfmm8sak0OFDXtCG9eOjKqcEWIdEIDE62dX9t2KfsnRV17U4IEjCufzpvbCrl0JHmVkUeH+4oxyNEva9pQ3qx4aefabWfzJ4JfHZi2zNLZfZM4MJJ/Xlrcynvb7PqiU4blhlSUd4RcV4Ps4Zm8tGOctJ6xJHg9YQ0pz07L4f/938TOHdMdBW/nREYi7/Z78frOfrw4swRHIvZw7J5avkuJgzMOKbZuGYPz2JMvzR2lB2idzt1QSfSLy+agP8Unms7lo1wZwLbjTGFACLyDLAQaBkITpjslAQyeya0mSNYZ7c8mRym6ahTVkoiUwf34p2tpVQfbuLQkWbuOP9os7y0Dgw6FZgrdXlhRTAQrG6n928kIsLdC8cx//fv8avXt/KbSycF163ZeZC+aUlRZ+9vOms4w3JSmDIog+m5me22WAkn3W5X7WxCerCukVc37OPFj/ews6KehZOim9IvMBH4ln01IUVuG3ZX8/iyYi6Y2L9D7cqjLT8P549fmIIxhoP1TeytOtzhjk0tnTYsi7e3HOD9gnKG5fQMqVeI83r44qzBx7T/9sTZN/tmn8H5tTT7/O1O2BKN04ZlEucR5kZZeR2JiHDvRRPI31/b7vDTJ9KpmhMIiGUgGACUON7vBmaF2e5iEZkLFADfNsaUtNxARG4EbgQYPLjz/yFEhFF9UtpsOfTxLqvlSTQVYueN6c19r+ezdV8tl88Y3OFmdQEje6fQKzme5YWVXDrdGvdkVXElI3unRFXG3NLwnBRuOHMYDy3ZwRUzBjE9N5OlBWUs3nqAuaNyom7nPGVwr4gtpzoiUGEM8MG2cm78x2rqG32M7J3CbZ/J40unRS6GcQoONbGnOhgIDjf6+NazH5OdksjPFo475rR2hIiQ2TOhUwGypUAxzuZ9NXx2Yr9j3l9HBZ76WzYh9R2nHEFWSiIv3jIn6grvtkwalBF1EaCKTleHsf8BucaYicBbwJPhNjLG/NUYM90YMz0np+1WOO3J65NKQRutWNaVVLWbGwgIzK2aGOfhO58a1ek0eTzCrKFZLNtezp6qw/j8hjU7D3aofqClr587gv7pSdz50ibu+d9mrn5sJf3Sk/jep/M6vc/Oyuubyo6yQ7y2YR/XPbGKwZnJvPL1M3jz23O55ZwRURcV9ElLJKtnAp/srg5ev1++toUdZXX85tJJnQqaJ4sxfdPIsCvMR/bu3APFsQgWDbXoVNbkM+22eorW+AHprSazUSeHWF6VPYBzWL+B9rIgY4yzzeTfgPtimB4ARvVNpa7Rx56qwwx0ZOeNsW6+e6oOc+2c3Kj2NbJ3Cp8a24ez83LabbvenkumDeTtLaWcdd+7nJ2XQ21Dc4frB5ySE+K487NjufnptWzZV8M1p+dy+4LRUbVAOt7y+qTS5DPc/PRaJg3K4MlrZ3Tqpi1i9X944eM9vFdQxvgB6SwtKOO6OUOjbi9/svJ4hNOGZvH6pv0hFcUnSuCpv2Vlsc/uR6C6t1gGglXASBEZihUArgC+6NxARPoZYwKD9lwIhA6mHgOBysuC0loG9kqmocnHox8U8d+1u9lRVkePeG/UvTFFhEeumn5c0jVvbB+Wfv8cHnmvkGdW7UKEdpuetmf++L7ccf4YRvVNbbdjWywF2sHPHJrJY9fMOKay+fsumcjbm0tZWVzJquJKJg/K4PvzT3wuJxbOGJnN65v2d8nsVoGWQU0tAkGTz6+BwAViFgiMMc0icivwBuAFHjPGbBKRe4DVxpiXgW+IyIVAM1AJXBOr9ASMDLYcOsQ5eb350Qsb+O/aPczMzeS6M4ZywYR+XVbEMCCjBz+9cBxfP3cExRX1x9xhRkT4ytzONWk8nkb1SeXlW+cwqk/qMedIslMSuWLmYK6YGdvK065w+YxB5PVNDRlq+kTx2p3GfL7WOYLE+K4uQVaxFtMCO2PMImBRi2V3OV7/EPhhLNPQUnoPqxVLQWktz64q4b9r9/DN80by7WMo4z/eslISyUo5eZrGHQ/tjf+jjg430TXHDuQIWtQR+A09PRoIujtX1tyM6pPKsu3lvLphH2eOzOYb50XfA1Op7sjraD7q5PNr0ZAbuDLU5/VN5UDtETKTE/j95ZOPS/M4pU5lgfGEmlvkCJp9hrjj0LNYndxcGQimD+lFYpyHB66c0u2KYJTqjEDRUMscgTX6qCtvE67iyqKhT4/ryyc/+XSXNKVU6mQUqUNZs8+vOQIXcG2o1yCg1FGROpQdr7GG1MnNtYFAKXVUpA5lzT5DvBYNdXt6hZVSETuUNfsNXi0a6vY0ECiljnYoa9lqyO8nXouGuj0NBEqpYF+Bppb9CHwmGCRU96VXWCkVbBnUsvlo03GaoUyd3DQQKKUidig7XvMRqJObBgKlVNgOZcaY4zofgTp56RVWSoVtPhp4qWMNdX8aCJRSwQ5lztFHm+zOZdqzuPvTQKCUCpsjCLzWHEH3p4FAKRXsPexsPhqoL9BB57o/vcJKqWDvYWeHskALIi0a6v40ECilwnYoa/ZrjsAt9AorpYKBoDlsINAcQXengUAp5agsdhQNaash19BAoJRCRIj3Ssjoo4EcgfYs7v40ECilAOuG72w+Gigmiteexd2eXmGlFGA1IW3ytW41pDmC7k8DgVIKsJqQhs8RaCDo7jQQKKUAq5louOajOh9B96dXWCkFWM1Ew7Ua0hnKuj8NBEopwGom6uxH4NNWQ66hgUApBVg5Amfz0cBrnY+g+4vpFRaR+SKSLyLbReT2Nra7WESMiEyPZXqUUpHFeT0hRUOB19qzuPuLWSAQES/wALAAGAt8QUTGhtkuFfgmsCJWaVFKtS/OIyGVxYHX2rO4+4tljmAmsN0YU2iMaQSeARaG2e5nwK+AhhimRSnVjrgWzUd9Ouica8TyCg8AShzvd9vLgkRkKjDIGPNqWzsSkRtFZLWIrC4rKzv+KVVK2c1HdYYyN2o3EIjIr6JZ1lEi4gHuB77b3rbGmL8aY6YbY6bn5OQc66GVUmHEeSLlCDQQdHfR5Ag+FWbZgig+twcY5Hg/0F4WkAqMB5aISDFwGvCyVhgr1TVaNh8NzlCmrYa6vbhIK0TkZuBrwDARWe9YlQosi2Lfq4CRIjIUKwBcAXwxsNIYUw1kO463BPieMWZ1R05AKXV8xHk81Dc3B9/rfATuETEQAP8EXgN+CTibftYaYyrb27ExpllEbgXeALzAY8aYTSJyD7DaGPPyMaRbKXWcxXklePMHx1SVGgi6vYiBwH5ir8Zq9ukF+tjbp4hIijFmV3s7N8YsAha1WHZXhG3P7kC6lVLHWauxhnTyetdoK0cAgP1U/1OgFAg0KTDAxNglSyl1orUaa0gnr3eNdgMB8C0gzxhTEeO0KKW6UKvKYh1ryDWiyfOVYBURKaW6sThPizoCnaHMNaLJERRiNfF8FTgSWGiMuT9mqVJKnXBxXk9w6Gk4miPQDEH3F00g2GX/Jdh/SqluqHWOwE+cRxDRSNDdtRsIjDF3A4hIsjGmPvZJUkp1hZbNR31+oxXFLhHNEBOzRWQzsNV+P0lEHox5ypRSJ1ScJ7RoqMlntOmoS0RzlX8PfAaoADDGfALMjWGalFJdoGXRkM/v1xyBS0QV7o0xJS0W+WKQFqVUF7Iqi0NnKNNexe4QTWVxiYicDhgRiceaRGZLbJOllDrRrByBY4YyLRpyjWiu8k3ALVhzCewBJmMNRqeU6kbivILfgN8uHmry+7UzmUtEkyPIM8Zc6VwgInOIbgRSpdQpIlAM1Ow3JNhzE8RrHYErRJMj+FOUy5RSp7DAvAOB4qFmn9EcgUu0NR/BbOB0IEdEvuNYlYY1rLRSqhtx5gisf/06vIRLtFU0lACk2NukOpbXAJfEMlFKqRMvGAjslkOaI3CPtuYjWAosFZEnjDE7T2CalFJdwNuyaMhvdJpKl4imsjhRRP4K5Dq3N8acG6tEKaVOvPiWOQK/X/sRuEQ0geDfwF+Av6EdyZTqtoKVxY6iIQ0E7hBNIGg2xjwU85QopbrU0crio0VDSfFaNOQG0Vzl/4nI10Skn4hkBv5injKl1AkVGFfoaKsh7VnsFtHkCK62/73NscwAw45/cpRSXaV1qyGtI3CLaOYjGHoiEqKU6lqBp/9A0ZDOR+Ae7QYCe6C5mzk69PQS4GFjTFMM06WUOsG8LYqGmnx+LRpyiWiKhh4C4oHAZDRftpfdEKtEKaVOvHhPaKshzRG4RzSBYIYxZpLj/WIR+SRWCVJKdQ1vi1ZDTdqz2DWiyff5RGR44I2IDEP7EyjV7QRGGnXmCOK1aMgVoskR3Aa8KyKFgABDgGtjmiql1AnXavRRvz9Yb6C6t2haDb0jIiOBPHtRvjHmSDQ7F5H5wB+wRiv9mzHm3hbrA5Pe+IBDwI3GmM0dSL9S6jhp1XzUb4LDTqjurd18n4jcAvQwxqw3xqwHkkWk3RnKRMQLPAAsAMYCXxCRsS02+6cxZoIxZjJwH3B/R09AKXV8tOpQ5jN4tWjIFaK5yl8xxlQF3hhjDgJfieJzM4HtxphCY0wj8Ayw0LmBMabG8bYnVkc1pVQXCD8fgeYI3CCaOgKviIgxxkDwST8his8NAEoc73cDs1puZOc4vmPvM+yIpiJyI3AjwODBg6M4tFKqo4Idynw6Q5nbRJMjeB14VkTOE5HzgH/Zy44LY8wDxpjhwA+AH0fY5q/GmOnGmOk5OTnH69BKKQevI0dgjNH5CFwkmhzBD7Cexm+237+FNSR1e/YAgxzvB9rLInkGq6OaUqoLxDuGofbZxUM61pA7RNNqyI81H8FfOrjvVcBIERmKFQCuAL7o3EBERhpjttlvLwC2oZTqEoEcgc/vD9YTaM9id4gmR9ApxphmEbkVeAOr+ehjxphNInIPsNoY8zJwq4jMA5qAgxwd6VQpdYIFKoabfOZoINAcgSvELBAAGGMWAYtaLLvL8fqbsTy+Uip6zg5lPl8gEGgdgRtEvMoikiQirWpmRSRHRJJimyyl1InmbD7aZPcu1qIhd2gr3P8RODPM8jOA38UmOUqpruLsWXy0slhzBG7Q1lWeZoz5b8uFxpgXODo3gVKqm3A2H22y+xJoHYE7tBUIkjv5OaXUKUhEiPMIzT7/0RyBFg25Qls39AMiMrPlQhGZAZTFLklKqa7i9Qg+v6HJrizWnsXu0FaroduA50TkCWCNvWw6cBVWnwClVDcT7/XQ5KgjiNeexa4Q8SobY1ZijQ0kwDX2nwCzjDErTkTilFInlpUj8AfrCDRH4A5t9iMwxpQCPzlBaVFKdbF4r9Dkd+YINBC4QVv9CBbaI4MG3q8QkUL779ITkzyl1IkU5/HQ7Ds6xITOR+AObV3l7wMvO94nAjOAs4GbYpgmpVQX8XqEZr8JDkWtM5S5Q1tFQwnGGOd8Ah8YYyqAChHpGeN0KaW6QLxXQjqUaR2BO7SVI+jlfGOMudXxVicFUKobCjYfDfYj0KIhN2jrKq8QkVZTUorIV4GVsUuSUqqrWM1H/fj82rPYTdoqGvo28KKIfBFYay+bhlVX8PkYp0sp1QVadijTnsXuEDEQGGMOAKeLyLnAOHvxq8aYxSckZUqpEy7O6wlpPqqDzrlDNDOULQb05q+UC8S16FCmOQJ30HCvlAqK80jIEBNaR+AOGgiUUkHxXrtDmU9bDbmJXmWlVFCgsljnLHYXDQRKqaB4r9iT12vzUTfRQKCUCgrmCHTyelfRq6yUCrKaj/qP5gi01ZAraCBQSgXFtagj0LGG3EEDgVIqyBqG+mjRkM5Q5g56lZVSQXEeodl/dD4CzRC4gwYCpVRQnD0MdbPPT7xXENFI4AYaCJRSQUdHHzVaP+AiGgiUUkHO0Ufjtemoa8T0SovIfBHJF5HtInJ7mPXfEZHNIrJeRN4RkSGxTI9Sqm1xwcnr/Xi16ahrxCwQiIgXeABYAIwFviAiY1ts9jEw3RgzEfgPcF+s0qOUal+cY4Yy7UzmHrG80jOB7caYQmNMI/AMsNC5gTHmXWNMvf12OTAwhulRSrUjzuOxexb7dXgJF4llIBgAlDje77aXRXI98Fq4FSJyo4isFpHVZWVlxzGJSimnwM2/ocmvvYpd5KTI+4nIl4DpwK/DrTfG/NUYM90YMz0nJ+fEJk4pFwkMO93Q5NMcgYu0O0PZMdgDDHK8H2gvCyEi84A7gLOMMUdimB6lVDuCOYJmv85F4CKxvNKrgJEiMlREEoArgJedG4jIFOBh4EJ7jmSlVBcKFAdpjsBdYhYIjDHNwK3AG8AW4DljzCYRuUdELrQ3+zWQAvxbRNaJyMsRdqeUOgECuYAjTT6tI3CRWBYNYYxZBCxqsewux+t5sTy+UqpjnJXFSQneLk6NOlG0EFApFXS0jsBHvBYNuYYGAqVUkLOOQMcacg8NBEqpoEBv4oYmv85F4CJ6pZVSQUfrCDRH4CYaCJRSQcFWQ83WfATKHTQQKKWCnH0HNEfgHhoIlFJBzr4D2rPYPfRKK6WCnENPa89i99BAoJQKCskR6HwErqFXWikV5MwFaI7APTQQKKWCQoqGtNWQa2ggUEoFhRYNaSBwCw0ESqmgkKIhbTXkGnqllVJBzpu/5gjcQwOBUiooNEeggcAtNBAopYKcN3+vNh91Db3SSqkg7VDmThoIlFJBWjTkThoIlFJB2nzUnTQQKKWCQouG9PbgFnqllVJBoaOPao7ALTQQKKWCQsca0tuDW+iVVkoFiUhwQhqtI3APDQRKqRDBQKBFQ66hgUApFSLeDgQ6VaV7aCBQSoUIjDcUr4POuYZeaaVUiDjNEbhOTAOBiMwXkXwR2S4it4dZP1dE1opIs4hcEsu0KKWiE6gbiNc6AteIWSAQES/wALAAGAt8QUTGtthsF3AN8M9YpUMp1TGBZqM66Jx7xMVw3zOB7caYQgAReQZYCGwObGCMKbbX+WOYDqVUBwRzBFo05BqxDPkDgBLH+932sg4TkRtFZLWIrC4rKzsuiVNKhefVOgLXOSXyfsaYvxpjphtjpufk5HR1cpTq1uLtIiGdqtI9Ynml9wCDHO8H2suUUicx7VnsPrEMBKuAkSIyVEQSgCuAl2N4PKXUcRBoLaQ9i90jZoHAGNMM3Aq8AWwBnjPGbBKRe0TkQgARmSEiu4FLgYdFZFOs0qOUik6gSEgHnXOPWLYawhizCFjUYtldjtersIqMlFInCR1ryH005CulQgSLhrSOwDU0ECilQni11ZDr6JVWSoWI11ZDrqOBQCkVQpuPuo8GAqVUiHhtNeQ6eqWVUiG01ZD7aCBQSoUIBAAda8g9NBAopUIExhrSGcrcQ6+0UiqE184RaIbAPTQQKKVCxHuEeK8gopHALTQQKKVCxHk9Wj/gMjEda0gpder5vykDGJyZ3NXJUCeQBgKlVIjxA9IZPyC9q5OhTiAtGlJKKZfTQKCUUi6ngUAppVxOA4FSSrmcBgKllHI5DQRKKeVyGgiUUsrlNBAopZTLiTGmq9PQISJSBuzs5MezgfLjmJxThRvP243nDO48bzeeM3T8vIcYY3LCrTjlAsGxEJHVxpjpXZ2OE82N5+3GcwZ3nrcbzxmO73lr0ZBSSrmcBgKllHI5twWCv3Z1ArqIG8/bjecM7jxvN54zHMfzdlUdgVJKqdbcliNQSinVggYCpZRyOdcEAhGZLyL5IrJdRG7v6vTEgogMEpF3RWSziGwSkW/ayzNF5C0R2Wb/26ur03q8iYhXRD4WkVfs90NFZIV9vZ8VkYSuTuPxJiIZIvIfEdkqIltEZLZLrvW37d/3RhH5l4gkdbfrLSKPicgBEdnoWBb22orlj/a5rxeRqR09nisCgYh4gQeABcBY4AsiMrZrUxUTzcB3jTFjgdOAW+zzvB14xxgzEnjHft/dfBPY4nj/K+B3xpgRwEHg+i5JVWz9AXjdGDMamIR1/t36WovIAOAbwHRjzHjAC1xB97veTwDzWyyLdG0XACPtvxuBhzp6MFcEAmAmsN0YU2iMaQSeARZ2cZqOO2PMPmPMWvt1LdaNYQDWuT5pb/Yk8PkuSWCMiMhA4ALgb/Z7Ac4F/mNv0h3POR2YCzwKYIxpNMZU0c2vtS0O6CEicUAysI9udr2NMe8BlS0WR7q2C4G/G8tyIENE+nXkeG4JBAOAEsf73faybktEcoEpwAqgjzFmn71qP9Cnq9IVI78Hvg/47fdZQJUxptl+3x2v91CgDHjcLhL7m4j0pJtfa2PMHuA3wC6sAFANrKH7X2+IfG2P+f7mlkDgKiKSAjwPfMsYU+NcZ6z2wt2mzbCIfBY4YIxZ09VpOcHigKnAQ8aYKUAdLYqButu1BrDLxRdiBcL+QE9aF6F0e8f72rolEOwBBjneD7SXdTsiEo8VBJ42xvzXXlwayCra/x7oqvTFwBzgQhEpxiryOxer7DzDLjqA7nm9dwO7jTEr7Pf/wQoM3flaA8wDiowxZcaYJuC/WL+B7n69IfK1Peb7m1sCwSpgpN2yIAGrcunlLk7TcWeXjT8KbDHG3O9Y9TJwtf36auClE522WDHG/NAYM9AYk4t1XRcbY64E3gUusTfrVucMYIzZD5SISJ696DxgM934Wtt2AaeJSLL9ew+cd7e+3rZI1/Zl4Cq79dBpQLWjCCk6xhhX/AHnAwXADuCOrk5PjM7xDKzs4npgnf13PlaZ+TvANuBtILOr0xqj8z8beMV+PQxYCWwH/g0kdnX6YnC+k4HV9vV+EejlhmsN3A1sBTYC/wASu9v1Bv6FVQfShJX7uz7StQUEq1XkDmADVouqDh1Ph5hQSimXc0vRkFJKqQg0ECillMtpIFBKKZfTQKCUUi6ngUAppVxOA4E66YnIHfZok+tFZJ2IzOrqNEUiIotEJKOr0wEgImcHRmNVqi1x7W+iVNcRkdnAZ4GpxpgjIpINnLRDDBtjzu/qNCjVUZojUCe7fkC5MeYIgDGm3BizF0BEponIUhFZIyJvOLrfTxORT+y/XwfGdBeRa0Tkz4Edi8grInK2/frTIvKRiKwVkX/b4zUhIsUicre9fIOIjLaXp4jI4/ay9SJysWP7bBHJtecIeMTOzbwpIj3sbWY4cjfB9LUkIj+w9/+JiNwrIsNFZK1j/cjAe3ufH9rbrhSR1Bb76inWGPcr7UHqut3ou6rzNBCok92bwCARKRCRB0XkLAiOqfQn4BJjzDTgMeAX9mceB75ujJkUzQHsXMaPgXnGmKlYvXW/49ik3F7+EPA9e9mdWF35JxhjJgKLw+x6JPCAMWYcUAVc7EjfV40xkwFfhDQtwBpcbZZ9HvcZY3YA1SIy2d7sWqzRRxOAZ4Fv2tvOAw632OUdWMNvzATOAX5tj1aqlAYCdXIzxhwCpmFNuFEGPCsi1wB5wHjgLRFZh3UjH2iXz2cYazx3sIYgaM9pWBMWLbP3dTUwxLE+MHjfGiDXfj0Pq1t/IJ0Hw+y3yBizzvlZO32pxpiP7OX/jJCmecDjxph6e/+Bsen/Blwr1mRLl9ufzwP2GWNW2dvWmKNDMgd8GrjdPr8lQBIwOMKxlctoHYE66RljfFg3ryUisgHrRr0G2GSMme3ctp2K2mZCH36SAh8D3jLGfCHC547Y//ro2P+ZI47XPqBHBz4byfPAT7ByIGuMMRUi0j+KzwlwsTEm/zikQXUzmiNQJzURyRORkY5Fk4GdQD6QY1cmIyLxIjLOWLN0VYnIGfb2Vzo+WwxMFhGPiAzCmrkOYDkwR0RG2PvqKSKj2knaW8AtjnRGNTewnb5aR8unK9rY/7UikmzvP9P+fAPwBlYx1eP2tvlAPxGZYW+bKkeHZA54A/i6PWInIjIlmvQqd9BAoE52KcCTIrJZRNZjFeH81FhTjl4C/EpEPsEaafV0+zPXAg/YxSDi2NcyoAhr2OI/AoFpPcuAa4B/2cf4CBjdTrp+DvQSawL1T7DK3aN1PfCInb6eWLNshTDGvI41vPBqe7vvOVY/jTUb25v2to1YxUR/stPyFkdzOwE/A+KB9SKyyX6vFICOPqq6N7Gm7HzFWBOdnxREJMWu+0BEbgf6GWO+2YHPfw9IN8bcGas0KnfROgKlTrwLROSHWP//dmLlRqIiIi8Aw7FmYlPquNAcgVJKuZzWESillMtpIFBKKZfTQKCUUi6ngUAppVxOA4FSSrnc/wdERI85QxVtcQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# calculate GC content\n", + "gcs = findGCbyPos(seqs)\n", + "\n", + "# plot GC content against the sequencing cycle\n", + "plt.plot(range(len(gcs)) , gcs)\n", + "plt.xlabel('Sequencing cycle')\n", + "plt.ylabel('GC content')\n", + "plt.title('GC content per sequencing cycle')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "ad8f7f45-4eda-4361-a39e-8fb6960c85c2", + "metadata": {}, + "source": [ + "The average GC content of the human genome is usually greater than 0.5 which can be observed in the graph where most values hover around or above 0.5 with some fluctuations due to noise. However, a significant drop in the GC percentage can be observed in the reads somewhere between the 60th and the 70th sequencing cycle in line with the observation from the base qualities graph. To find the exact sequencing cycle with the lowest GC content:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "37292194-ab0f-4d1d-a621-91c2784a91ef", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "66" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gcs.index(min(gcs[60:70]))" + ] + }, + { + "cell_type": "markdown", + "id": "88de3461-b52a-49ed-99ae-a2e4b72c4e20", + "metadata": {}, + "source": [ + "This is the same cycle as the one with the lowest base quality. " + ] + }, + { + "cell_type": "markdown", + "id": "1b2afa9e-05a0-4d49-8ae3-c3c4ff2783a1", + "metadata": {}, + "source": [ + "### Conclusion\n", + "The 67th sequencing cycle of all reads were found to have produced the lowest base quality scores. It was also determined that about 90% of all reads contained N's in the 67th sequencing cycle. Lastly, a disporportionate ratio of G:C in sample along with a significant drop in GC content was observed in the 67th sequencing cycles. Therefore, it can be concluded that the 67th sequencing cycle produced the lowest quality read due to low confidence in the identification of the GC content present at these positions in the DNA sequence. " + ] + }, + { + "cell_type": "markdown", + "id": "fdbaed54-f3c2-4126-a2b1-2d5e7a1cf22b", + "metadata": {}, + "source": [ + "### References:\n", + "1. Ajay, S. S., Parker, S. C., Abaan, H. O., Fajardo, K. V. F., & Margulies, E. H. (2011). Accurate and comprehensive sequencing of personal genomes. _Genome research_, _21_(9), 1498-1505." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/seq_processing.py b/seq_processing.py index 67392ec..be6b8ba 100644 --- a/seq_processing.py +++ b/seq_processing.py @@ -3,9 +3,12 @@ and manipulaing and visualizing sequence datasets. """ +#contains functions from Johns Hopkins University's "Algorithms for DNA Sequencing" course on Coursera + def readFASTA(filename): # parse and read FASTA file genome = '' + with open(filename, 'r') as f: for line in f: if line[0] != '>': @@ -84,6 +87,7 @@ def findGCbyPos(reads): def countBase(string): + # return the number of each base present in the sequence counts = {'A' : 0, 'G' : 0, 'C' : 0 , 'T' : 0, 'N' : 0} for base in counts: