diff --git a/ukol.ipynb b/ukol.ipynb index 43947ed..079773e 100644 --- a/ukol.ipynb +++ b/ukol.ipynb @@ -32,6 +32,7 @@ "import pandas as pd\n", "#np.set_printoptions(precision=3)\n", "#from sympy import *\n", + "from scipy import stats \n", "from scipy.stats import norm, uniform, expon\n", "#from scipy.optimize import minimize" ] @@ -46,7 +47,8 @@ "K = 28\n", "L = 8\n", "M = (((K + L) * 47) % 11) + 1\n", - "print(M)" + "print(\"M =\",M)\n", + "print(\"dataset ex0221\")" ] }, { @@ -207,10 +209,18 @@ "Zaneste příslušné hustoty s odhadnutými parametry do grafů histogramu. Diskutujte, které z rozdělení odpovídá pozorovaným datům nejlépe." ] }, + { + "cell_type": "markdown", + "id": "542a742d-2c96-4d06-bf05-49823752ed6d", + "metadata": {}, + "source": [ + "Odhady získáme pomocí momentové metody" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "98f1d4c1-f86b-4a39-aae1-bafbb951c3ca", + "id": "b1995dc6-a79c-4859-a720-5dca78d63e44", "metadata": {}, "outputs": [], "source": [ @@ -230,10 +240,10 @@ " sp4.hist(data, bins=10, density=True, alpha=0.5)\n", " sp5.hist(data, bins=10, density=True, alpha=0.5)\n", " \n", - " mu, std = norm.fit(data) # Get mean and standard deviation\n", - " xmin, xmax = sp1.get_xlim() # Get x-axis limits from the histogram\n", + " mu, std = norm.fit(data)\n", + " xmin, xmax = sp1.get_xlim()\n", " x = np.linspace(xmin, xmax, 100)\n", - " p_norm = norm.pdf(x, mu, std) # Generate the PDF for the normal distribution\n", + " p_norm = norm.pdf(x, mu, std)\n", " sp2.plot(x, p_norm, 'red')\n", " sp2.fill_between(x, p_norm, alpha=0.2, color='red')\n", " sp2.set_title(f'Normal fit ($\\mu={mu:.2f}, \\sigma={std:.2f}$)')\n", @@ -342,6 +352,70 @@ "(1b) Pro každou skupinu zvlášť spočítejte oboustranný 95% konfidenční interval pro střední hodnotu." ] }, + { + "cell_type": "markdown", + "id": "dec7a5cc-4c6b-4213-b3b8-8b58813f73cb", + "metadata": {}, + "source": [ + "Dle Studentova rozdělení:\n", + "$$\n", + "\\begin{align}\n", + "\\left( \\overset{\\_}{X_{n}} - t_{\\frac{\\alpha}{2},n-1}\\frac{s}{\\sqrt{n}}, \\overset{\\_}{X_{n}} + t_{\\frac{\\alpha}{2},n-1}\\frac{s}{\\sqrt{n}} \\right)\n", + "\\end{align}\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69ad2d59-bdc0-4cc7-b51c-7453f2126157", + "metadata": {}, + "outputs": [], + "source": [ + "def conf_interval(data, name):\n", + " conf = stats.t.interval(confidence = 0.95, df = len(data) - 1, loc = np.mean(data), scale = stats.sem(data))\n", + " print(f\"Oboustranný konfidenční interval 95% střední hodnoty skupiny \\\"{name}\\\":\", conf)\n", + " return conf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16ee6a87-8b6a-472c-9689-9d3877d31084", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_conf_interval(title, list, interval):\n", + " fig, ax = plt.subplots(figsize = (6, 4))\n", + " ax.hist(list, bins = 10, color = \"blue\", alpha=0.5)\n", + " ax.set_title(f\"Oboustranný 95% interval skupiny \\\"{title}\\\"\")\n", + " ax.axvline(x = interval[0], color = \"red\")\n", + " ax.axvline(x = interval[1], color = \"red\")\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5fd12a4-8517-4c5e-9f18-eaf627c56afa", + "metadata": {}, + "outputs": [], + "source": [ + "conf_survived = conf_interval(survived, \"survived\")\n", + "conf_perished = conf_interval(perished, \"perished\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f17915e9-aef8-430d-8096-c13f078e25eb", + "metadata": {}, + "outputs": [], + "source": [ + "plot_conf_interval(\"survived\", survived, conf_survived)\n", + "plot_conf_interval(\"perished\", perished, conf_perished)" + ] + }, { "cell_type": "markdown", "id": "1c7cf77b",