{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Exercises for Lecture 4 (Performance analysis)"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["import datetime\n", "now = datetime.datetime.now()\n", "print(\"Last executed: \" + now.strftime(\"%Y-%m-%d %H:%M:%S\"))"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["# Common imports\n", "import os\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "np.random.seed(42) # To make this notebook's output stable across runs"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["# Fetch MNIST dataset\n", "from sklearn.datasets import fetch_openml\n", "mnist = fetch_openml('mnist_784')\n", "#mnist = fetch_openml('mnist_784', parser=\"pandas\")"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["y_train = mnist.target[:60000].to_numpy(dtype=int)\n", "y_test = mnist.target[-10000:].to_numpy(dtype=int)\n", "y_train.shape, y_test.shape"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["X_train = mnist.data[:60000].to_numpy()\n", "X_test = mnist.data[-10000:].to_numpy()\n", "X_train.shape, X_test.shape"]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["## Exercise 1 : Compute number of examples of each digit."]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["## Exercise 2: Construct target train and test vectors for 8 classifier."]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["## Exercise 3: Use Scikit-Learn to perform 3-fold cross validation using [`cross_val_score`](http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_score.html)."]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Exercise 4: Compute the confusion matrix"]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["## Exercise 5: Compute the precision and recall for the confusion matrix `conf_matrix` computed above.\n", "\n", "Compute by hand and then using Scikit-Learn [precision_score](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_score.html) and [recall_score](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.recall_score.html#sklearn.metrics.recall_score)."]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["## Exercise 6: Compute the $F_1$ score for the confusion matrix `conf_matrix` computed above.\n", "\n", "Compute by hand and then using Scikit-Learn [f1_score](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html)."]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["## Exercise 7: Compute the false positive rate for the confusion matrix `conf_matrix` computed above."]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["## Exercise 8: Where is the ideal point in the ROC curve domain?"]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}, "tags": ["exercise"]}, "source": ["##  Exercise 9: What is the AUC for an ideal and random classifier?"]}, {"cell_type": "markdown", "metadata": {"tags": ["exercise"]}, "source": ["Consider the confusion matrix for multiclass classification."]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["y_train_pred = cross_val_predict(sgd_clf, X_train, y_train, cv=3)\n", "conf_mx = confusion_matrix(y_train, y_train_pred)\n", "conf_mx"]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}, "tags": ["exercise"]}, "source": ["## Exercise 10: Convert confusion matrix to probabilities and plot."]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["row_sums = conf_mx.sum(axis=1, keepdims=True)\n", "norm_conf_mx = conf_mx / row_sums"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["import seaborn as sns\n", "plt.figure(figsize=(6,6))\n", "sns.heatmap(norm_conf_mx, square=True, annot=True, cbar=False, fmt='.2f')\n", "plt.xlabel('predicted value')\n", "plt.ylabel('true value');"]}], "metadata": {"celltoolbar": "Tags", "kernelspec": {"display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.15"}}, "nbformat": 4, "nbformat_minor": 4}