In [1]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

Swain vs. Alabama¶

In [2]:
population_proportions = make_array(.26, .74)
population_proportions
Out[2]:
array([ 0.26,  0.74])
In [3]:
sample_proportions(100, population_proportions)
Out[3]:
array([ 0.24,  0.76])
In [4]:
def panel_proportion():
    return sample_proportions(100, population_proportions).item(0)
In [5]:
panel_proportion()
Out[5]:
0.27
In [6]:
panels = make_array()

for i in np.arange(10000):
    new_panel = panel_proportion() * 100
    panels = np.append(panels, new_panel)
In [7]:
Table().with_column(
    'Number of Black Men on Panel of 100', panels
).hist(bins=np.arange(5.5,40.))

# Plotting details; ignore this code
plots.ylim(-0.002, 0.09)
plots.scatter(8, 0, color='red', s=30);

Mendel and Pea Flowers¶

In [8]:
## Mendel had 929 plants, of which 709 had purple flowers
observed_purples = 709 / 929
observed_purples
Out[8]:
0.7631862217438106
In [9]:
predicted_proportions = make_array(.75, .25)
sample_proportions(929, predicted_proportions)
Out[9]:
array([ 0.76103337,  0.23896663])
In [10]:
def purple_flowers():
    return sample_proportions(929, predicted_proportions).item(0) * 100
In [11]:
purple_flowers()
Out[11]:
75.1345532831001
In [12]:
purples = make_array()

for i in np.arange(10000):
    new_purple = purple_flowers()
    purples = np.append(purples, new_purple)
In [13]:
Table().with_column('Percent of purple flowers in sample of 929', purples).hist()

# Plotting details; ignore this code
plots.ylim(-0.02, 0.25)
plots.scatter(observed_purples * 100, 0, color='red', s=30);
In [14]:
Table().with_column('Discrepancy in sample of 929 if the model is true', abs(purples- 75)).hist()

# Plotting details; ignore this code
plots.ylim(-0.02, 0.6)
plots.scatter(abs(observed_purples * 100 - 75), 0, color='red', s=30);
In [15]:
abs(observed_purples * 100 - 75)
Out[15]:
1.318622174381062