week 1

2025-01-24 13:21:11 +00:00 · 2025-01-24 13:21:11 +00:00 · f2b4eaadb9
commit f2b4eaadb9
10 changed files with 18468 additions and 0 deletions
--- a/29
+++ b/29
@ -0,0 +1,29 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+numpy = "==1.24.3"
+matplotlib = "==3.7.4"
+pandas = "==2.0.3"
+scikit-learn = "==1.3.2"
+seaborn = "==0.13.1"
+tensorflow = "==2.13.1"
+tensorflow-datasets = "==4.9.2"
+jupyterlab = "==4.0.10"
+jupyter-book = "==0.15.1"
+astroml = "==1.0.2.post1"
+nbdime = "==4.0.1"
+boto3 = "==1.34.15"
+pyarrow = "==14.0.2"
+pyspark = "==3.5.0"
+pyppeteer = "==1.0.2"
+dvc = "==3.38.1"
+jupyterlab-rise = "*"
+
+[dev-packages]
+
+[requires]
+python_version = "3.11"
+python_full_version = "3.11.11"
--- a/Pipfile.lock
+++ b/Pipfile.lock
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,17 @@
+numpy==1.24.3
+matplotlib==3.7.4
+pandas==2.0.3
+scikit-learn==1.3.2
+seaborn==0.13.1
+tensorflow==2.13.1
+tensorflow_datasets==4.9.2
+jupyterlab==4.0.10
+jupyter-book==0.15.1
+jupyterlab_rise== 0.42.0
+astroML==1.0.2.post1
+nbdime==4.0.1
+boto3==1.34.15
+pyarrow==14.0.2
+pyspark==3.5.0
+pyppeteer==1.0.2
+dvc==3.38.1
--- a/week1/slides/.ipynb_checkpoints/Lecture01_Intro-checkpoint.ipynb
+++ b/week1/slides/.ipynb_checkpoints/Lecture01_Intro-checkpoint.ipynb
--- a/week1/slides/.ipynb_checkpoints/Lecture03_Scikit-Learn-checkpoint.ipynb
+++ b/week1/slides/.ipynb_checkpoints/Lecture03_Scikit-Learn-checkpoint.ipynb
--- a/week1/slides/Lecture01_Intro.ipynb
+++ b/week1/slides/Lecture01_Intro.ipynb
--- a/week1/slides/Lecture02_Pandas.ipynb
+++ b/week1/slides/Lecture02_Pandas.ipynb
--- a/week1/slides/Lecture02_Pandas_Exercises_no_solutions.ipynb
+++ b/week1/slides/Lecture02_Pandas_Exercises_no_solutions.ipynb
@ -0,0 +1 @@
+{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Exercises for Lecture 2 (Data wrangling with Pandas)"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["import datetime\n", "now = datetime.datetime.now()\n", "print(\"Last executed: \" + now.strftime(\"%Y-%m-%d %H:%M:%S\"))"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["import pandas as pd\n", "import numpy as np"]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["## Exercise 1: Data selection\n"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["area = pd.Series({'California': 423967, 'Texas': 695662,\n", "                  'New York': 141297, 'Florida': 170312,\n", "                  'Illinois': 149995})\n", "pop = pd.Series({'California': 38332521, 'Texas': 26448193,\n", "                 'New York': 19651127, 'Florida': 19552860,\n", "                 'Illinois': 12882135})\n", "data = pd.DataFrame({'area':area, 'population':pop})\n", "data"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Create a `DataFrame` containing only those states that have an area greater than 150,000 and a population greater than 20 million."]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Exercise 2: Operating on data in Pandas\n", "Consider the following two series."]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["area = pd.Series({'Alaska': 1723337, 'Texas': 695662,\n", "                  'California': 423967}, name='area')\n", "population = pd.Series({'California': 38332521, 'Texas': 26448193,\n", "                        'New York': 19651127}, name='population') "]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "-"}}, "source": ["Compute the population density for each state (where possible)."]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["## Exercise 3: Detecting null values"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Consider the following series."]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["data = pd.Series([1, np.nan, 'hello', np.nan])\n", "data"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Compute a new Series of bools that specify whether each entry in the above Series is *not* NaN.  Using this Series, construct a new series from the original data that does not contain the NaN entries."]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["### Exercise 4: Remove null values directly\n", "\n", "Remove null values from the previous data `Series` directly."]}], "metadata": {"celltoolbar": "Tags", "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 4}
--- a/week1/slides/Lecture03_Scikit-Learn.ipynb
+++ b/week1/slides/Lecture03_Scikit-Learn.ipynb
--- a/week1/slides/Lecture03_Scikit-Learn_Exercises_no_solutions.ipynb
+++ b/week1/slides/Lecture03_Scikit-Learn_Exercises_no_solutions.ipynb
@ -0,0 +1 @@
+{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Exercises for Lecture 3 (Introduction to Scikit-Learn)"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["import datetime\n", "now = datetime.datetime.now()\n", "print(\"Last executed: \" + now.strftime(\"%Y-%m-%d %H:%M:%S\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Load in example data for exercises."]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["from sklearn import datasets\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt"]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "slide"}}, "source": ["## Exercise 1: Classify hand-written digits "]}, {"cell_type": "markdown", "metadata": {}, "source": ["Consider the classification of hand-written digits."]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["First load example Scikit-Learn data."]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["from sklearn.datasets import load_digits\n", "digits = load_digits()"]}, {"cell_type": "markdown", "metadata": {}, "source": ["- Explore the data-set and plot some example images.\n", "- Split the data-set into training and test sets.\n", "- Train a logistic regression classifier, using a Newton Conjugate Gradient solver (`newton-cg`) with an $\\ell_2$ penalty (see [`LogisticRegression`](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) for further details).\n", "- Compute the accuracy of predictions on the test set."]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["### Plot example images"]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["### Set up feature and target data"]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["### Create training and test sets"]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["### Choose model, instantiate, fit and predict"]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["### Evaluate accuracy on test data"]}], "metadata": {"celltoolbar": "Tags", "kernelspec": {"display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.15"}}, "nbformat": 4, "nbformat_minor": 4}
				`@ -0,0 +1 @@`
				{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Exercises for Lecture 2 (Data wrangling with Pandas)"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["import datetime\n", "now = datetime.datetime.now()\n", "print(\"Last executed: \" + now.strftime(\"%Y-%m-%d %H:%M:%S\"))"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["import pandas as pd\n", "import numpy as np"]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["## Exercise 1: Data selection\n"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["area = pd.Series({'California': 423967, 'Texas': 695662,\n", " 'New York': 141297, 'Florida': 170312,\n", " 'Illinois': 149995})\n", "pop = pd.Series({'California': 38332521, 'Texas': 26448193,\n", " 'New York': 19651127, 'Florida': 19552860,\n", " 'Illinois': 12882135})\n", "data = pd.DataFrame({'area':area, 'population':pop})\n", "data"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Create a `DataFrame` containing only those states that have an area greater than 150,000 and a population greater than 20 million."]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Exercise 2: Operating on data in Pandas\n", "Consider the following two series."]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["area = pd.Series({'Alaska': 1723337, 'Texas': 695662,\n", " 'California': 423967}, name='area')\n", "population = pd.Series({'California': 38332521, 'Texas': 26448193,\n", " 'New York': 19651127}, name='population') "]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "-"}}, "source": ["Compute the population density for each state (where possible)."]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["## Exercise 3: Detecting null values"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Consider the following series."]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["data = pd.Series([1, np.nan, 'hello', np.nan])\n", "data"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Compute a new Series of bools that specify whether each entry in the above Series is not NaN. Using this Series, construct a new series from the original data that does not contain the NaN entries."]}, {"cell_type": "markdown", "metadata": {"slideshow": {"slide_type": "subslide"}}, "source": ["### Exercise 4: Remove null values directly\n", "\n", "Remove null values from the previous data `Series` directly."]}], "metadata": {"celltoolbar": "Tags", "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 4}