Central Limit Theorem
Central Limit Theorem¶
We would like to investigate the sampling distribution of \(\bar{X}\) under 1) normal and 2) non-normal distributions.
import warnings
warnings.filterwarnings("ignore")
#import required libraries
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import ipywidgets as widgets
from IPython.display import display
Let’s start with an example where the true population distribution is standard normal.
#np.random.seed(1773)
##an example for clt when the true pop is standard normal
def clt_norm_true(Sim_no = 500,n = 25):
##input paramaters (arguments)
##Sim_no: Number of simulations (experiments)
##n: Number of data points that will be selected from the true population without replacement.
##Fixed values
##true population size
N = 10000
##true population mean
mu = 0
##true population sd
sigma = 1
# Generate true population under normal distribution
data = stats.norm.rvs(loc = mu, scale = sigma, size = N)
##Simulation settings
# select sim_no number of samples of size n from true population
# create an empty data size of sim_no by n
samples = np.empty((Sim_no, n))
for i in range(0, Sim_no):
sample_data = np.random.choice(data, size=n) # select one sample of size n from true pop.
#repeat this process Sim_no times and store the data
samples[i] = sample_data
#calculate the row.means (calculate mean across the columns, axis = 1)
#returns a vector of sim_no by 1
sample_means = np.mean(samples, axis=1)
## figure settings
fig, ax = plt.subplots(figsize = (7, 7))
# Plot the histogram of true population
plt.subplot(1, 2, 1)
plt.hist(data, bins=25, density=True, alpha=0.5, color='b')
plt.title('True population with $\mu=%.1f,\sigma^2=%.1f$' % (mu, sigma*sigma))
plt.xlabel(r'$x$')
# Plot the histogram of sample means
plt.subplot(1, 2, 2)
plt.hist(sample_means, bins=25, density=True, alpha=0.5, color='b')
plt.title(r"Sampling distribution of $\bar{X}$")
plt.xlabel(r'$\bar{x}$')
#plt.ylim(0, 2) ######i need to work on upper ylim and coordinates of plt.text later######
#add a text
plt.text(-0.2,1.5,
"Mean = %.4f \n Standard error = %.4f" % (np.mean(sample_means), np.std(sample_means)))
#side-by-side
fig.tight_layout()
plt.show()
nsample_wid = widgets.IntSlider(min = 0, max = 1000, step=10, value=500, description = "Number of samples drawn")
sample_size_wid = widgets.IntSlider(min = 0, max = 100, step=1, value=10, description = "Sample size")
widgets.interact(clt_norm_true, Sim_no = nsample_wid, n = sample_size_wid);
Let’s start with an example where the true population distribution is exponential distribution.
#np.random.seed(1773)
##an example for clt when the true pop is exponential
def clt_exp_true(Sim_no = 500,n = 25):
##input paramaters (arguments)
##Sim_no: Number of simulations (experiments)
##n:Number of data points that will be selected from the true population without replacement.
##Fixed values
##true population size
N = 10000
##true population scale
lam = 1 #default
# Generate true population under normal distribution
data = stats.expon.rvs(scale = lam, size = N)
##Simulation settings
# select sim_no number of samples of size n from true population
# create an empty data size of sim_no by n
samples = np.empty((Sim_no, n))
for i in range(0, Sim_no):
sample_data = np.random.choice(data, size=n) # select one sample of size n from true pop.
#repeat this process Sim_no times and store the data
samples[i] = sample_data
#calculate the row.means (calculate mean across the columns, axis = 1)
#returns a vector of sim_no by 1
sample_means = np.mean(samples, axis=1)
## figure settings
fig, ax = plt.subplots(figsize = (7, 7))
# Plot the histogram of true population
plt.subplot(1, 2, 1)
plt.hist(data, bins=25, density=True, alpha=0.5, color='b')
plt.title('True population with $\lambda=%.1f$' % (lam))
plt.xlabel(r'$x$')
# Plot the histogram of sample means
plt.subplot(1, 2, 2)
plt.hist(sample_means, bins=25, density=True, alpha=0.5, color='b')
plt.title(r"Sampling distribution of $\bar{X}$")
plt.xlabel(r'$\bar{x}$')
#plt.ylim(0, 2) ######i need to work on upper ylim and coordinates of plt.text later######
#add a text
plt.text(1,1.5,
"Mean = %.4f \n Standard error = %.4f" % (np.mean(sample_means), np.std(sample_means)))
#side-by-side
fig.tight_layout()
plt.show()
nsample_wid = widgets.IntSlider(min = 0, max = 1000, step=10, value=500, description = "Number of samples drawn")
sample_size_wid = widgets.IntSlider(min = 0, max = 100, step=1, value=10, description = "Sample size")
widgets.interact(clt_exp_true, Sim_no = nsample_wid, n = sample_size_wid);
## Turning these widgets into a Dashboard kept as a future work.