Calculating Probabilities Under Normal Distribution
Calculating Probabilities Under Normal Distribution¶
We can use the widget at the bottom to calculate the probabilities under normal distribution.
# import libraries
import ipywidgets as widgets
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from IPython.display import display
def normal_dist_widget(mu = 0, sd = 1, tail = "Lower", bound = 0, interval = [0,1]):
lower_bound = interval[0]
upper_bound = interval[1]
#start off plotting
fig, ax = plt.subplots(figsize = (8, 8))
# for pdf curve
#generate synthetic data to plot the pdf
x = stats.norm.rvs(loc = mu, scale = sd, size = 10000)
x1 = np.sort(x)
plt.plot(x1, stats.norm.pdf(x1, loc = mu, scale = sd),
label = r'$\mu=%.1f,\sigma^2=%.1f$' % (mu, sd*sd))
# arrange x-axis limits based on the min and max values in the data
plt.xlim(int(np.min(x1))-3, int(np.max(x1))+3)
#instead of default tickmarks, define yours.
plt.xticks(np.arange(int(np.min(x1)), int(np.max(x1))+1, 1))
# arrange upper limit of y-axis max values in the pdf values (y's min value is always 0).
plt.ylim(0, np.max(stats.norm.pdf(x1, loc = mu, scale = sd))+0.1)
plt.xlabel('$x$')
plt.ylabel('f(x|$\mu=%.1f,\sigma^2=%.1f$)' % (mu, sd*sd))
plt.title('Probabilities under Normal Distribution')
if tail == "Lower":
#calculate the cumulative probability under given distribution
cum_prob = stats.norm.cdf(x = bound, loc = mu, scale = sd)
#for shading area defined by the cumulative probability
#specifically generate values between int(np.min(x1)) and upper_bound with increments 0.01
#with np.arange(), data comes alread sorted (it creates an increasing series).
x2 = np.arange(int(np.min(x1))-1,bound,0.01)
#go to https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.fill_between.html for more info
plt.fill_between(x2,stats.norm.pdf(x2, loc = mu, scale = sd), color='blue')
#add a text, where x coordinate is a bit less than min x1 and y-coordinate is based on the peak point of the curve.
plt.text(int(np.min(x1))-2,np.max(stats.norm.pdf(x1, loc = mu, scale = sd))+0.05,
"$Pr(X \leq %.3f)=%.3f$" % (bound, cum_prob), fontsize=14)
plt.show()
elif tail == "Upper":
#calculate the cumulative probability under given distribution
complementary_prob = 1-stats.norm.cdf(x = bound, loc = mu, scale = sd)
#for shading area defined by the cumulative probability
#specifically generate values between lower_bound and int(np.max(x1)) with increments 0.01
#with np.arange(), data comes alread sorted (it creates an increasing series).
x2 = np.arange(bound,int(np.max(x1))+1,0.01)
#go to https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.fill_between.html for more info
plt.fill_between(x2,stats.norm.pdf(x2, loc = mu, scale = sd), color='blue')
#add a text, where x coordinate is a bit less than min x1 and y-coordinate is based on the peak point of the curve.
plt.text(int(np.min(x1))-2,np.max(stats.norm.pdf(x1, loc = mu, scale = sd))+0.05,
"$Pr(X \geq %.3f)=%.3f$" % (bound, complementary_prob), fontsize=14)
plt.show()
else:
#calculate the interval probability under given distribution
interval_prob = stats.norm.cdf(x = upper_bound, loc = mu, scale = sd) - stats.norm.cdf(x = lower_bound, loc = mu, scale = sd)
#for shading area defined by the interval probability
#specifically generate values between lower_bound and upper_bound value with increments 0.01
#with np.arange(), data comes alread sorted (it creates an increasing series).
x2 = np.arange(lower_bound, upper_bound, 0.01)
plt.fill_between(x2,stats.norm.pdf(x2, loc = mu, scale = sd), color='blue')
#add a text, where x coordinate is a bit less than min x1 and y-coordinate is based on the peak point of the curve.
plt.text(int(np.min(x1))-2,np.max(stats.norm.pdf(x1, loc = mu, scale = sd))+0.05,
"$Pr(%.3f \leq$ X $\leq %.3f)=%.3f$" % (lower_bound, upper_bound, interval_prob), fontsize=14)
plt.show()
mu_wid = widgets.IntSlider(min = -50, max = 50, step=1, value=0, description = "Mean")
#display(mu_wid)
sd_wid = widgets.FloatSlider(min = 0.1, max = 30, step = 0.1, value = 1, description = "Sd", readout_format='.1f')
#display(sd_wid)
tail_wid = widgets.Dropdown(options = ['Lower', 'Upper', 'Middle'], value = 'Lower', description = 'Find Area:')
bound_wid = widgets.FloatSlider(min = -50, max = 50, step = 0.1, value = 0, description = 'a:', readout_format='.2f')
interval_wid = widgets.FloatRangeSlider(min=-50, max=50.0, value=[-1.96, 1.96], step=0.1, description='a$<$X$<b$:', readout_format='.2f')
Here is the instructions how the widget is working:
For calculating Pr(X < a), select “Lower” and pick a value “a”,
For calculating Pr(X>a), select “Upper” and pick a value “a”,
For calculating Pr(a<X<b), select “Middle” and pick “a” and “b” values.
widgets.interact(normal_dist_widget, mu = mu_wid, sd = sd_wid, tail = tail_wid, bound = bound_wid, interval = interval_wid);