Calculating Probabilities Under Normal Distribution

Calculating Probabilities Under Normal Distribution

We can use the widget at the bottom to calculate the probabilities under normal distribution.

# import libraries
import ipywidgets as widgets
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from IPython.display import display
def normal_dist_widget(mu = 0, sd = 1, tail = "Lower", bound = 0, interval = [0,1]):
    
    lower_bound = interval[0]
    upper_bound = interval[1] 
    
    #start off plotting 
    fig, ax = plt.subplots(figsize = (8, 8))

    # for pdf curve
    #generate synthetic data to plot the pdf
    x = stats.norm.rvs(loc = mu, scale = sd, size = 10000)
    x1 = np.sort(x)

    plt.plot(x1, stats.norm.pdf(x1, loc = mu, scale = sd), 
    label = r'$\mu=%.1f,\sigma^2=%.1f$' % (mu, sd*sd))

    # arrange x-axis limits based on the min and max values in the data
    plt.xlim(int(np.min(x1))-3, int(np.max(x1))+3)
    #instead of default tickmarks, define yours.
    plt.xticks(np.arange(int(np.min(x1)), int(np.max(x1))+1, 1))
    # arrange upper limit of y-axis max values in the pdf values (y's min value is always 0).
    plt.ylim(0, np.max(stats.norm.pdf(x1, loc = mu, scale = sd))+0.1)
    plt.xlabel('$x$')
    plt.ylabel('f(x|$\mu=%.1f,\sigma^2=%.1f$)' % (mu, sd*sd))
    plt.title('Probabilities under Normal Distribution')
        

    if tail == "Lower":

        #calculate the cumulative probability under given distribution
        cum_prob = stats.norm.cdf(x = bound, loc = mu, scale = sd)

        #for shading area defined by the cumulative probability
        #specifically generate values between int(np.min(x1)) and upper_bound with increments 0.01
        #with np.arange(), data comes alread sorted (it creates an increasing series).
        x2 = np.arange(int(np.min(x1))-1,bound,0.01)

        #go to https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.fill_between.html for more info
        plt.fill_between(x2,stats.norm.pdf(x2, loc = mu, scale = sd), color='blue')
        
        #add a text, where x coordinate is a bit less than min x1 and y-coordinate is based on the peak point of the curve.
        plt.text(int(np.min(x1))-2,np.max(stats.norm.pdf(x1, loc = mu, scale = sd))+0.05,
          "$Pr(X \leq %.3f)=%.3f$" % (bound, cum_prob), fontsize=14)
        
        plt.show()
        
    elif tail == "Upper":
        
        #calculate the cumulative probability under given distribution
        complementary_prob = 1-stats.norm.cdf(x = bound, loc = mu, scale = sd)

        #for shading area defined by the cumulative probability
        #specifically generate values between lower_bound and int(np.max(x1)) with increments 0.01
        #with np.arange(), data comes alread sorted (it creates an increasing series).
        x2 = np.arange(bound,int(np.max(x1))+1,0.01)

        #go to https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.fill_between.html for more info
        plt.fill_between(x2,stats.norm.pdf(x2, loc = mu, scale = sd), color='blue')
        
        #add a text, where x coordinate is a bit less than min x1 and y-coordinate is based on the peak point of the curve.
        plt.text(int(np.min(x1))-2,np.max(stats.norm.pdf(x1, loc = mu, scale = sd))+0.05,
                     "$Pr(X \geq %.3f)=%.3f$" % (bound, complementary_prob), fontsize=14)


        plt.show()
     
    else:
        
        #calculate the interval probability under given distribution
        interval_prob = stats.norm.cdf(x = upper_bound, loc = mu, scale = sd) - stats.norm.cdf(x = lower_bound, loc = mu, scale = sd)
        
        #for shading area defined by the interval probability
        #specifically generate values between lower_bound and upper_bound value with increments 0.01
        #with np.arange(), data comes alread sorted (it creates an increasing series).
        x2 = np.arange(lower_bound, upper_bound, 0.01)

        plt.fill_between(x2,stats.norm.pdf(x2, loc = mu, scale = sd), color='blue')
        
        #add a text, where x coordinate is a bit less than min x1 and y-coordinate is based on the peak point of the curve.

        plt.text(int(np.min(x1))-2,np.max(stats.norm.pdf(x1, loc = mu, scale = sd))+0.05,
         "$Pr(%.3f \leq$ X $\leq %.3f)=%.3f$" % (lower_bound, upper_bound, interval_prob), fontsize=14)

        plt.show()
mu_wid = widgets.IntSlider(min = -50, max = 50, step=1, value=0, description = "Mean")
#display(mu_wid)
sd_wid = widgets.FloatSlider(min = 0.1, max = 30, step = 0.1, value = 1, description = "Sd", readout_format='.1f')
#display(sd_wid)
tail_wid = widgets.Dropdown(options = ['Lower', 'Upper', 'Middle'], value = 'Lower', description = 'Find Area:')
bound_wid = widgets.FloatSlider(min = -50, max = 50, step = 0.1, value = 0, description = 'a:', readout_format='.2f')
interval_wid = widgets.FloatRangeSlider(min=-50, max=50.0, value=[-1.96, 1.96], step=0.1, description='a$<$X$<b$:', readout_format='.2f') 

Here is the instructions how the widget is working:

  1. For calculating Pr(X < a), select “Lower” and pick a value “a”,

  2. For calculating Pr(X>a), select “Upper” and pick a value “a”,

  3. For calculating Pr(a<X<b), select “Middle” and pick “a” and “b” values.

widgets.interact(normal_dist_widget, mu = mu_wid, sd = sd_wid, tail = tail_wid, bound = bound_wid, interval = interval_wid);