{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## pandas' describe() function" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$X \\sim \\mathcal N(1,2^2)$$" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "x = 1.0 + 2.0 * np.random.randn(100000000)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "dataset = pd.DataFrame(x)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
01.362232
1-0.087704
2-0.256481
33.171572
40.081185
\n", "
" ], "text/plain": [ " 0\n", "0 1.362232\n", "1 -0.087704\n", "2 -0.256481\n", "3 3.171572\n", "4 0.081185" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
count1.000000e+08
mean1.000089e+00
std2.000164e+00
min-1.052901e+01
25%-3.490349e-01
50%1.000359e+00
75%2.349312e+00
max1.282455e+01
\n", "
" ], "text/plain": [ " 0\n", "count 1.000000e+08\n", "mean 1.000089e+00\n", "std 2.000164e+00\n", "min -1.052901e+01\n", "25% -3.490349e-01\n", "50% 1.000359e+00\n", "75% 2.349312e+00\n", "max 1.282455e+01" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$\\mathrm{count} \\leftrightarrow N$$\n", "$$\\mathrm{mean} \\leftrightarrow \\bar{X}, \\ \\mathrm{std} \\leftrightarrow s$$\n", "$$ 25\\% \\leftrightarrow Q_1, \\ 50\\% \\leftrightarrow M \\equiv Q_2,\\ 75\\% \\leftrightarrow Q_3$$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Μέτρα Ασυμμετρίας" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Μέτρο ασυμμετρίας Pearson\n", "$$\\tilde{Sk}_p = \\frac{3(\\bar{X}-M)}{s}$$" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 1.000089\n", "dtype: float64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset.mean()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 1.000359\n", "dtype: float64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset.median()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 2.000164\n", "dtype: float64" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset.std()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "pearson = 3*(dataset.mean()-dataset.median())/dataset.std()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 -0.000405\n", "dtype: float64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pearson" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Μέτρο ασυμμετρίας Bowley\n", "$$Sk_b = \\frac{(Q_3 - M)-(M-Q_1)}{Q_3-Q_1}$$" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "Q_1 = dataset.quantile(0.25)\n", "M = dataset.median()\n", "Q_3 = dataset.quantile(0.75)\n", "bowley = ((Q_3 - M) - (M - Q_1))/(Q_3-Q_1)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 -0.000164\n", "dtype: float64" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bowley" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Καμπύλη Lorenz - Συντελεστής Gini" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "wage = np.array([5000, 10000, 15000, 20000, 50000])\n", "\n", "wage = np.insert(wage, 0, 0.0)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(wage, columns=['x'])" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
x
00
15000
210000
315000
420000
550000
\n", "
" ], "text/plain": [ " x\n", "0 0\n", "1 5000\n", "2 10000\n", "3 15000\n", "4 20000\n", "5 50000" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$\\Phi_n = \\frac{\\sum_{j=1}^{n} x_j}{\\sum_{j=1}^{N} x_j}$$" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "100000" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['x'].sum()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 0\n", "1 5000\n", "2 15000\n", "3 30000\n", "4 50000\n", "5 100000\n", "Name: x, dtype: int64" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['x'].cumsum()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "df['Phi'] = df['x'].cumsum()/df['x'].sum()" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xPhi
000.00
150000.05
2100000.15
3150000.30
4200000.50
5500001.00
\n", "
" ], "text/plain": [ " x Phi\n", "0 0 0.00\n", "1 5000 0.05\n", "2 10000 0.15\n", "3 15000 0.30\n", "4 20000 0.50\n", "5 50000 1.00" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 1, 2, 3, 4, 5])" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.array(range(len(wage)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$RF_n = \\frac{n}{N}$$" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "df['RF'] = np.array(range(len(wage)))/(len(wage)-1)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xPhiRF
000.000.0
150000.050.2
2100000.150.4
3150000.300.6
4200000.500.8
5500001.001.0
\n", "
" ], "text/plain": [ " x Phi RF\n", "0 0 0.00 0.0\n", "1 5000 0.05 0.2\n", "2 10000 0.15 0.4\n", "3 15000 0.30 0.6\n", "4 20000 0.50 0.8\n", "5 50000 1.00 1.0" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$\\{(RF_n,\\Phi_n)\\}_{n=1}^N$$" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df.plot(x='RF',y='Phi')\n", "plt.plot((0,1),(0,1))" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0. , 0.05, 0.15, 0.3 , 0.5 ])" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Phi'][:-1].values" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.05, 0.15, 0.3 , 0.5 , 1. ])" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Phi'][1:].values" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$\\Sigma \\Phi_n = \\Phi_n + \\Phi_{n-1}$$" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "SPhi = df['Phi'][1:].values + df['Phi'][:-1].values" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.05, 0.2 , 0.45, 0.8 , 1.5 ])" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "SPhi" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "SPhi = np.insert(SPhi, 0, 0.0)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0. , 0.05, 0.2 , 0.45, 0.8 , 1.5 ])" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "SPhi" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "df['SPhi'] = SPhi" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xPhiRFSPhi
000.000.00.00
150000.050.20.05
2100000.150.40.20
3150000.300.60.45
4200000.500.80.80
5500001.001.01.50
\n", "
" ], "text/plain": [ " x Phi RF SPhi\n", "0 0 0.00 0.0 0.00\n", "1 5000 0.05 0.2 0.05\n", "2 10000 0.15 0.4 0.20\n", "3 15000 0.30 0.6 0.45\n", "4 20000 0.50 0.8 0.80\n", "5 50000 1.00 1.0 1.50" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$\\Delta RF_n = RF_n - RF_{n-1}$$" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "DRF = df['RF'][1:].values - df['RF'][0:-1].values" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "DRF = np.insert(DRF, 0, 0.0)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "df['DRF'] = DRF" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xPhiRFSPhiDRF
000.000.00.000.0
150000.050.20.050.2
2100000.150.40.200.2
3150000.300.60.450.2
4200000.500.80.800.2
5500001.001.01.500.2
\n", "
" ], "text/plain": [ " x Phi RF SPhi DRF\n", "0 0 0.00 0.0 0.00 0.0\n", "1 5000 0.05 0.2 0.05 0.2\n", "2 10000 0.15 0.4 0.20 0.2\n", "3 15000 0.30 0.6 0.45 0.2\n", "4 20000 0.50 0.8 0.80 0.2\n", "5 50000 1.00 1.0 1.50 0.2" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "df['SPhi_DRF'] = df['SPhi'] * df['DRF']" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xPhiRFSPhiDRFSPhi_DRF
000.000.00.000.00.00
150000.050.20.050.20.01
2100000.150.40.200.20.04
3150000.300.60.450.20.09
4200000.500.80.800.20.16
5500001.001.01.500.20.30
\n", "
" ], "text/plain": [ " x Phi RF SPhi DRF SPhi_DRF\n", "0 0 0.00 0.0 0.00 0.0 0.00\n", "1 5000 0.05 0.2 0.05 0.2 0.01\n", "2 10000 0.15 0.4 0.20 0.2 0.04\n", "3 15000 0.30 0.6 0.45 0.2 0.09\n", "4 20000 0.50 0.8 0.80 0.2 0.16\n", "5 50000 1.00 1.0 1.50 0.2 0.30" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "Gini = 1 - df['SPhi_DRF'].sum()" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.4" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Gini" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.16" } }, "nbformat": 4, "nbformat_minor": 4 }