Skip to content

Commit

Permalink
Created using Colaboratory
Browse files Browse the repository at this point in the history
  • Loading branch information
janchorowski committed Nov 5, 2019
1 parent bab0ddf commit ed6a561
Showing 1 changed file with 56 additions and 5 deletions.
61 changes: 56 additions & 5 deletions assignment2/Assignment2.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -798,7 +798,9 @@
"colab_type": "text"
},
"source": [
"TODO: copy the above cell 3 times and run simulations with different parameters. Add a 1-2 sentence note explaining how is the Kalman filter behaivng in each case."
"TODO: copy the above cell 3 times and run simulations with different parameters. Add a 1-2 sentence note explaining how is the Kalman filter behaivng in each case.\n",
"\n",
"Try to see what happens when the parameters of the simulator (such as the motion sigma) don't match the ones used by the filter."
]
},
{
Expand Down Expand Up @@ -909,13 +911,13 @@
},
"source": [
"def entropy(counts):\n",
" raise NotImplementedError\n",
" TODO\n",
"\n",
"def gini(counts):\n",
" raise NotImplementedError\n",
" TODO\n",
"\n",
"def mean_err_rate(counts):\n",
" raise NotImplementedError\n",
" TODO\n",
"\n",
"\n",
"# Make a plot of the purity functions\n"
Expand Down Expand Up @@ -1311,7 +1313,56 @@
"\n",
" def __call__(self, x):\n",
" TODO\n",
" return best_split, best_purity_gain"
" \n",
" def __str__(self):\n",
" return f\"NumericalSplit: {self.attr} <= {self.th}\"\n",
"\n",
" def iter_subtrees(self):\n",
" return self.subtrees\n",
" \n",
" def add_to_graphviz(self, dot, parent, print_info):\n",
" self.subtrees[0].add_to_graphviz(dot, print_info)\n",
" dot.edge(f'{id(parent)}', f'{id(self.subtrees[0])}',\n",
" label=f'<= {self.th:.2f}')\n",
" self.subtrees[1].add_to_graphviz(dot, print_info)\n",
" dot.edge(f'{id(parent)}', f'{id(self.subtrees[1])}',\n",
" label=f'> {self.th:.2f}')\n",
"\n",
"\n",
"def get_numrical_split_and_purity(df, parent_purity, purity_fun, attr,\n",
" normalize_by_split_entropy=False):\n",
" \"\"\"Find best split thereshold and compute the average purity after a split.\n",
" Args:\n",
" df: a dataframe\n",
" parent_purity: purity of the parent node\n",
" purity_fun: function to compute the purity\n",
" attr: attribute over whihc to split the dataframe\n",
" normalize_by_split_entropy: if True, divide the purity gain by the split\n",
" entropy (to compute https://en.wikipedia.org/wiki/Information_gain_ratio)\n",
" \n",
" Returns:\n",
" pair of (split, purity_gain)\n",
" \"\"\"\n",
" attr_df = df[[attr, 'target']].sort_values(attr)\n",
" targets = attr_df['target']\n",
" values = attr_df[attr]\n",
" # Start with a split that puts all the samples into the right subtree\n",
" right_counts = targets.value_counts()\n",
" left_counts = right_counts * 0\n",
"\n",
" best_split = None\n",
" best_purity_gain = -1\n",
" N = len(attr_df)\n",
" for row_i in range(N - 1):\n",
" # Update the counts of targets in the left and right subtree and compute\n",
" # the purity of the slipt for all possible thresholds!\n",
" # Return the best split found.\n",
"\n",
" # Remember that the attribute may have duplicate values and all samples\n",
" # with the same attribute value must end in the same subtree!\n",
" row_target = targets.iloc[row_i]\n",
"\n",
" return best_split, best_purity_gain"
],
"execution_count": 0,
"outputs": []
Expand Down

0 comments on commit ed6a561

Please sign in to comment.