Skip to content

Commit

Permalink
feat: confidence level and central limit theory
Browse files Browse the repository at this point in the history
  • Loading branch information
brycemcwilliams authored Apr 14, 2020
1 parent c395e4d commit 6592796
Show file tree
Hide file tree
Showing 2 changed files with 999 additions and 0 deletions.
230 changes: 230 additions & 0 deletions bootstrapping_confidence_level.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from pandas import read_csv\n",
"from sklearn.utils import resample\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.metrics import accuracy_score\n",
"from matplotlib import pyplot\n",
"import numpy as np\n",
"\n",
"# load dataset\n",
"data = read_csv('pima-indians-diabetes.data.csv')\n",
"values = data.values"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.6961206896551724\n",
"0.6924731182795699\n",
"0.673202614379085\n",
"0.7251082251082251\n",
"0.6827133479212254\n",
"0.70020964360587\n",
"0.6953781512605042\n",
"0.6730769230769231\n",
"0.6941964285714286\n",
"0.7154989384288747\n",
"0.6924731182795699\n",
"0.709051724137931\n",
"0.7245119305856833\n",
"0.6478260869565218\n",
"0.7172995780590717\n",
"0.6631130063965884\n",
"0.7188841201716738\n",
"0.6949891067538126\n",
"0.6995708154506438\n",
"0.684322033898305\n",
"0.665938864628821\n",
"0.632258064516129\n",
"0.6739130434782609\n",
"0.6666666666666666\n",
"0.6709129511677282\n",
"0.7083333333333334\n",
"0.6938325991189427\n",
"0.6825053995680346\n",
"0.6857749469214437\n",
"0.7270742358078602\n",
"0.7307692307692307\n",
"0.6723768736616702\n",
"0.7118279569892473\n",
"0.670995670995671\n",
"0.6717391304347826\n",
"0.707983193277311\n",
"0.6946236559139785\n",
"0.6622807017543859\n",
"0.6826722338204593\n",
"0.7217573221757322\n",
"0.6630434782608695\n",
"0.7062634989200864\n",
"0.7070484581497798\n",
"0.6991525423728814\n",
"0.693446088794926\n",
"0.6848739495798319\n",
"0.7323340471092077\n",
"0.6826086956521739\n",
"0.6702819956616052\n",
"0.7086956521739131\n",
"0.6274089935760171\n",
"0.6909871244635193\n",
"0.6762114537444934\n",
"0.7366167023554604\n",
"0.6898047722342733\n",
"0.7130434782608696\n",
"0.7037037037037037\n",
"0.7263157894736842\n",
"0.6863157894736842\n",
"0.6666666666666666\n",
"0.7239130434782609\n",
"0.7043478260869566\n",
"0.7015250544662309\n",
"0.69593147751606\n",
"0.6902654867256637\n",
"0.6861471861471862\n",
"0.7456521739130435\n",
"0.6876355748373102\n",
"0.6985138004246284\n",
"0.6751592356687898\n",
"0.7198275862068966\n",
"0.6652719665271967\n",
"0.7130242825607064\n",
"0.6903225806451613\n",
"0.6877729257641921\n",
"0.6978260869565217\n",
"0.6501079913606912\n",
"0.6914893617021277\n",
"0.6811279826464208\n",
"0.6810344827586207\n",
"0.6780383795309168\n",
"0.6872340425531915\n",
"0.6997885835095138\n",
"0.7029914529914529\n",
"0.6306695464362851\n",
"0.7006507592190889\n",
"0.6616379310344828\n",
"0.6984815618221258\n",
"0.6943231441048034\n",
"0.7200854700854701\n",
"0.7033898305084746\n",
"0.6808510638297872\n",
"0.6813417190775681\n",
"0.673773987206823\n",
"0.6736842105263158\n",
"0.6545842217484008\n",
"0.6997840172786177\n",
"0.6494623655913978\n",
"0.7066381156316917\n",
"0.6631130063965884\n"
]
}
],
"source": [
"# configure bootstrap\n",
"n_iterations = 100 # Number of bootstrap samples to create\n",
"n_size = int(len(data) * 0.50) # picking only 50 % of the given data in every bootstrap sample\n",
"\n",
"# run bootstrap\n",
"stats = list()\n",
"for i in range(n_iterations):\n",
" # prepare train and test sets\n",
" train = resample(values, n_samples=n_size) # Sampling with replacement \n",
" test = np.array([x for x in values if x.tolist() not in train.tolist()]) # picking rest of the data not considered in sample\n",
" # fit model\n",
" model = DecisionTreeClassifier()\n",
" model.fit(train[:,:-1], train[:,-1])\n",
" # evaluate model\n",
" predictions = model.predict(test[:,:-1])\n",
" score = accuracy_score(test[:,-1], predictions) # caution, overall accuracy score can mislead when classes are imbalanced\n",
" print(score)\n",
" stats.append(score)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAASOUlEQVR4nO3df6xkZ33f8fcn/pEqQMqavTjGP1iaGisG1YZcLVALZEJt1obgNELgVZsuLcnmh6mClEZ12iqu4B+3EUFNjGJt8BYnAkNLMHFkg711iAyRTXxtbGyzwBpnUy9reS8s2FhESpZ8+8ecVS/jmb1z58y9s/v4/ZJGc87zPOc8z6PRfu7ZM3POSVUhSWrXj8x7AJKk9WXQS1LjDHpJapxBL0mNM+glqXEnz3sAo2zevLm2bNky72FI0gnjvvvu+1ZVLYyqOy6DfsuWLSwtLc17GJJ0wkjyN+PqPHUjSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGrdq0Cc5O8nnkuxN8kiSX+/KT0uyJ8m+7n3TmO13dG32Jdkx6wlIko5tkiP6I8BvVNVPAa8FrkpyPnA1cGdVnQvc2a3/kCSnAdcArwG2AteM+4MgSVofqwZ9VT1RVfd3y98D9gJnAlcAN3bNbgR+bsTmbwb2VNXhqvoOsAfYNouBS5Ims6YrY5NsAV4FfBE4vaqegMEfgyQvHrHJmcDjK9YPdGWj9r0T2AlwzjnnrGVY0obZcvWt8x7Chtt/7VvmPQT1NPGXsUmeD/wJ8N6qenrSzUaUjXykVVXtqqrFqlpcWBh5uwZJ0hQmCvokpzAI+Y9W1ae64ieTnNHVnwEcGrHpAeDsFetnAQenH64kaa0m+dVNgBuAvVX1uyuqbgGO/opmB/CnIza/Hbg0yabuS9hLuzJJ0gaZ5Ij+IuAXgJ9J8kD3uhy4FrgkyT7gkm6dJItJPgxQVYeB9wP3dq/3dWWSpA2y6pexVfUFRp9rB3jTiPZLwC+uWN8N7J52gJKkfrwyVpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUuFUfPJJkN/BW4FBVvbIr+wRwXtfkhcB3q+rCEdvuB74H/AA4UlWLMxq3JGlCqwY98BHgOuCPjhZU1TuPLif5APDUMbZ/Y1V9a9oBSpL6meRRgncl2TKqrntw+DuAn5ntsCRJs9L3HP3rgSerat+Y+gLuSHJfkp09+5IkTWGSUzfHsh246Rj1F1XVwSQvBvYk+WpV3TWqYfeHYCfAOeec03NYkqSjpj6iT3Iy8PPAJ8a1qaqD3fsh4GZg6zHa7qqqxapaXFhYmHZYkqQhfU7d/Avgq1V1YFRlkuclecHRZeBS4OEe/UmSprBq0Ce5CbgbOC/JgSTv7qquZOi0TZKXJLmtWz0d+EKSB4G/Am6tqs/ObuiSpElM8qub7WPK3zWi7CBwebf8GHBBz/FJknrq+2WsnsO2XH3r3Pref+1b5ta3dKLxFgiS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4r4zVCWmeV+VKJxqP6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjJnmU4O4kh5I8vKLsvyb5ZpIHutflY7bdluRrSR5NcvUsBy5JmswkR/QfAbaNKP9gVV3YvW4brkxyEvAh4DLgfGB7kvP7DFaStHarBn1V3QUcnmLfW4FHq+qxqvo74OPAFVPsR5LUQ59z9O9J8uXu1M6mEfVnAo+vWD/QlY2UZGeSpSRLy8vLPYYlSVpp2qD/A+AngQuBJ4APjGiTEWU1bodVtauqFqtqcWFhYcphSZKGTRX0VfVkVf2gqv4B+EMGp2mGHQDOXrF+FnBwmv4kSdObKuiTnLFi9V8CD49odi9wbpKXJTkVuBK4ZZr+JEnTW/XulUluAi4GNic5AFwDXJzkQganYvYDv9y1fQnw4aq6vKqOJHkPcDtwErC7qh5Zl1lIksZaNeiravuI4hvGtD0IXL5i/TbgWT+9lCRtHK+MlaTGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMatGvRJdic5lOThFWW/k+SrSb6c5OYkLxyz7f4kDyV5IMnSLAcuSZrMJEf0HwG2DZXtAV5ZVf8M+DrwW8fY/o1VdWFVLU43RElSH6sGfVXdBRweKrujqo50q/cAZ63D2CRJMzCLc/T/DvjMmLoC7khyX5Kdx9pJkp1JlpIsLS8vz2BYkiToGfRJ/jNwBPjomCYXVdWrgcuAq5K8Ydy+qmpXVS1W1eLCwkKfYUmSVpg66JPsAN4K/KuqqlFtqupg934IuBnYOm1/kqTpTBX0SbYB/xF4W1V9f0yb5yV5wdFl4FLg4VFtJUnrZ5KfV94E3A2cl+RAkncD1wEvAPZ0P528vmv7kiS3dZueDnwhyYPAXwG3VtVn12UWkqSxTl6tQVVtH1F8w5i2B4HLu+XHgAt6jU6S1JtXxkpS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjJgr6JLuTHEry8Iqy05LsSbKve980ZtsdXZt93QPFJUkbaNIj+o8A24bKrgburKpzgTu79R+S5DTgGuA1wFbgmnF/ECRJ62OioK+qu4DDQ8VXADd2yzcCPzdi0zcDe6rqcFV9B9jDs/9gSJLWUZ9z9KdX1RMA3fuLR7Q5E3h8xfqBruxZkuxMspRkaXl5ucewJEkrrfeXsRlRVqMaVtWuqlqsqsWFhYV1HpYkPXf0Cfonk5wB0L0fGtHmAHD2ivWzgIM9+pQkrVGfoL8FOPormh3An45ocztwaZJN3Zewl3ZlkqQNMunPK28C7gbOS3IgybuBa4FLkuwDLunWSbKY5MMAVXUYeD9wb/d6X1cmSdogJ0/SqKq2j6l604i2S8AvrljfDeyeanSSpN68MlaSGmfQS1LjDHpJapxBL0mNM+glqXET/epG0nPXlqtvnUu/+699y1z6bZFH9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuOmDvok5yV5YMXr6STvHWpzcZKnVrT57f5DliStxdQ3NauqrwEXAiQ5CfgmcPOIpp+vqrdO248kqZ9Znbp5E/CNqvqbGe1PkjQjswr6K4GbxtS9LsmDST6T5BXjdpBkZ5KlJEvLy8szGpYkqXfQJzkVeBvwv0dU3w+8tKouAH4f+PS4/VTVrqparKrFhYWFvsOSJHVmcUR/GXB/VT05XFFVT1fVM93ybcApSTbPoE9J0oRmEfTbGXPaJslPJEm3vLXr79sz6FOSNKFejxJM8mPAJcAvryj7FYCquh54O/CrSY4AfwtcWVXVp09J0tr0Cvqq+j7woqGy61csXwdc16cPSc9N83pWLbT3vFqvjJWkxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TG9Q76JPuTPJTkgSRLI+qT5PeSPJrky0le3bdPSdLkej1KcIU3VtW3xtRdBpzbvV4D/EH3LknaABtx6uYK4I9q4B7ghUnO2IB+JUnMJugLuCPJfUl2jqg/E3h8xfqBruyHJNmZZCnJ0vLy8gyGJUmC2QT9RVX1aganaK5K8oah+ozYpp5VULWrqharanFhYWEGw5IkwQyCvqoOdu+HgJuBrUNNDgBnr1g/CzjYt19J0mR6BX2S5yV5wdFl4FLg4aFmtwD/pvv1zWuBp6rqiT79SpIm1/dXN6cDNyc5uq+PVdVnk/wKQFVdD9wGXA48Cnwf+Lc9+5QkrUGvoK+qx4ALRpRfv2K5gKv69CNJmp5XxkpS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktS4vg8eOe5sufrWeQ9Bko4rHtFLUuOmDvokZyf5XJK9SR5J8usj2lyc5KkkD3Sv3+43XEnSWvU5dXME+I2qur97QPh9SfZU1VeG2n2+qt7aox9JUg9TH9FX1RNVdX+3/D1gL3DmrAYmSZqNmZyjT7IFeBXwxRHVr0vyYJLPJHnFMfaxM8lSkqXl5eVZDEuSxAyCPsnzgT8B3ltVTw9V3w+8tKouAH4f+PS4/VTVrqparKrFhYWFvsOSJHV6BX2SUxiE/Eer6lPD9VX1dFU90y3fBpySZHOfPiVJa9PnVzcBbgD2VtXvjmnzE107kmzt+vv2tH1Kktauz69uLgJ+AXgoyQNd2X8CzgGoquuBtwO/muQI8LfAlVVVPfqUJK3R1EFfVV8Askqb64Drpu1DkuZhXlfY77/2LeuyX6+MlaTGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMb1fTj4tiRfS/JokqtH1P9okk909V9MsqVPf5KktevzcPCTgA8BlwHnA9uTnD/U7N3Ad6rqnwIfBP7btP1JkqbT54h+K/BoVT1WVX8HfBy4YqjNFcCN3fIngTclOeZzZiVJszX1w8GBM4HHV6wfAF4zrk1VHUnyFPAi4FvDO0uyE9jZrT6T5Gs9xjZPmxkxvxNYS/NpaS7Q1nxamgtMOZ/0O+fx0nEVfYJ+1JF5TdFmUFi1C9jVYzzHhSRLVbU473HMSkvzaWku0NZ8WpoLHH/z6XPq5gBw9or1s4CD49okORn4x8DhHn1KktaoT9DfC5yb5GVJTgWuBG4ZanMLsKNbfjvw51U18ohekrQ+pj51051zfw9wO3ASsLuqHknyPmCpqm4BbgD+OMmjDI7kr5zFoI9zJ/zppyEtzaeluUBb82lpLnCczSceYEtS27wyVpIaZ9BLUuMM+jVY7ZYPXZt3JPlKkkeSfGyo7seTfDPJdRsz4vH6zCXJOUnuSLK3q9+yUeMep+d8/ntXtjfJ7837or4Jbi3ywSQPdK+vJ/nuirodSfZ1rx3D287DtPNJcmGSu7vP5stJ3rnxo3/WWKf+bLr6+WRAVfma4MXgC+dvAP8EOBV4EDh/qM25wJeATd36i4fq/wfwMeC6E3kuwF8Al3TLzwd+7ESdD/DPgb/s9nEScDdw8fE8l6H2/57BDyEATgMe6943dcubjvfP5hjzeTlwbrf8EuAJ4IUn4lxWlM0lAzyin9wkt3z4JeBDVfUdgKo6dLQiyU8DpwN3bNB4j2XquXT3Mzq5qvZ05c9U1fc3bugj9flsCvhHDP7h/ihwCvDkhox6tEnmstJ24KZu+c3Anqo63M1zD7BtXUe7uqnnU1Vfr6p93fJB4BCwsM7jPZY+n81cM8Cgn9yoWz6cOdTm5cDLk/xlknuSbANI8iPAB4Df3JCRrm7quXTl303yqSRfSvI73Q3u5mnq+VTV3cDnGBwtPgHcXlV7N2DM40wyFwCSvBR4GfDna912A/WZz8q6rQz+GH9jHcY4qannMu8M6HMLhOeaSW7ncDKDUwQXM7hS+PNJXgn8a+C2qnr8OLmnW5+5nAy8HngV8H+BTwDvYnDNxLz0mc9m4Ke6MoA9Sd5QVXet01hXM/FtQxhcl/LJqvrBFNtulD7zGewgOQP4Y2BHVf3DjMe3Fn3m8mvMMQMM+slNesuHe6rq74G/7m7Mdi7wOuD1SX6NwTntU5M8U1UjvzTcAH3mcgD4UlU9BpDk08BrmW/Q95nPxV35MwBJPsNgPvMK+knmctSVwFVD2148tO1fzHBs0+gzH5L8OHAr8F+q6p51GeHk+sxlvhkwry82TrQXgz+KjzH479jRL2JeMdRmG3Bjt7yZwX/zXjTU5l3M/8vYqefC4AupB4GFru5/AledwPN5J/B/un2cAtwJ/OzxPJeu3XnAfrqLHruy04C/ZvBF7KZu+bTj/bM5xnxO7T6P985zDrOYy1D9hmeA5+gnVFVHgKO3fNgL/K/qbvmQ5G1ds9uBbyf5CoPzvr9ZVd+ez4jH6zOXGvxX9D8AdyZ5iMF/Z/9w42fx//X8bD7J4LzvQwz+4T5YVX+24ZPoTDgXGHzR9/HqkqPb9jDwfgb3oboXeF9XNjd95gO8A3gD8K4VP1m8cMMGP6TnXObKWyBIUuM8opekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXH/D7AfuYDfnAzJAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"95.0 confidence interval 64.0% and 73.2%\n"
]
}
],
"source": [
"# plot scores\n",
"pyplot.hist(stats)\n",
"pyplot.show()\n",
"# confidence intervals\n",
"alpha = 0.95 # for 95% confidence \n",
"p = ((1.0-alpha)/2.0) * 100 # tail regions on right and left .25 on each side indicated by P value (border)\n",
"lower = max(0.0, np.percentile(stats, p)) \n",
"p = (alpha+((1.0-alpha)/2.0)) * 100\n",
"upper = min(1.0, np.percentile(stats, p))\n",
"print('%.1f confidence interval %.1f%% and %.1f%%' % (alpha*100, lower*100, upper*100))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Interesting here we can start to see the Central Limit Theorem"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 6592796

Please sign in to comment.