from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')


population_proportions = make_array(.26, .74)
population_proportions

array([ 0.26,  0.74])


sample_proportions(100, population_proportions)

array([ 0.24,  0.76])


def panel_proportion():
    return sample_proportions(100, population_proportions).item(0)


panel_proportion()

0.27


panels = make_array()

for i in np.arange(10000):
    new_panel = panel_proportion() * 100
    panels = np.append(panels, new_panel)


Table().with_column(
    'Number of Black Men on Panel of 100', panels
).hist(bins=np.arange(5.5,40.))

# Plotting details; ignore this code
plots.ylim(-0.002, 0.09)
plots.scatter(8, 0, color='red', s=30);


## Mendel had 929 plants, of which 709 had purple flowers
observed_purples = 709 / 929
observed_purples

0.7631862217438106


predicted_proportions = make_array(.75, .25)
sample_proportions(929, predicted_proportions)

array([ 0.76103337,  0.23896663])


def purple_flowers():
    return sample_proportions(929, predicted_proportions).item(0) * 100


purple_flowers()

75.1345532831001


purples = make_array()

for i in np.arange(10000):
    new_purple = purple_flowers()
    purples = np.append(purples, new_purple)


Table().with_column('Percent of purple flowers in sample of 929', purples).hist()

# Plotting details; ignore this code
plots.ylim(-0.02, 0.25)
plots.scatter(observed_purples * 100, 0, color='red', s=30);


Table().with_column('Discrepancy in sample of 929 if the model is true', abs(purples- 75)).hist()

# Plotting details; ignore this code
plots.ylim(-0.02, 0.6)
plots.scatter(abs(observed_purples * 100 - 75), 0, color='red', s=30);


abs(observed_purples * 100 - 75)

1.318622174381062

Swain vs. Alabama¶

Mendel and Pea Flowers¶