from datascience import *
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
import numpy as np
Suppose there are 2000 students in Data 8 and each student:
We know that we gave all of them fair coints. How often will they incorrectly reject the null hypothesis?
Could you write code to simulate the process of one student running this hypothesis test?
def simulate_one_hypothesis_test():
num_coin_flips = 100
# Student Collects Data by actually flipping the coin (we simualte here)
# its fair coin but we didn't tell the studnet
obs_flips = np.random.choice(["H", "T"], num_coin_flips)
# Define the test statistic
def test_statistic(flips_dataset):
return ...
# Compute the observed value of the statistic on our actual data
obs_statistic = ...
# Define a function to simulate the statistic under the null hypothesis
def simulate_one_statistic():
sim_flips = ...
sim_statistic = ...
return sim_statistic
# Hypothesis Test: Simulate from Null hypothesis
simulated_statistics = make_array()
for i in np.arange(1000): # Simulate 1000 trials
...
# Compute the P Value
p_value = ...
return ... # The p-value that the simulated student got
Solution below...
In the following we will use simulation to simulate the students running a simulation. Very meta!
def simulate_one_hypothesis_test():
num_coin_flips = 100
num_simulations = 1000
# Student Collects Data by actually flipping the coin (we simualte here)
# its fair coin but we didn't tell the studnet
obs_flips = np.random.choice(["H", "T"], num_coin_flips)
# Define the test statistic
def test_statistic(flips_dataset):
num_heads = sum(flips_dataset == "H")
return np.abs(num_heads - 50)
# Compute the observed value of the statistic on our actual data
obs_statistic = test_statistic(obs_flips)
# Define a function to simulate the statistic under the null hypothesis
def simulate_one_statistic():
sim_flips = np.random.choice(["H", "T"], num_coin_flips)
sim_statistic = test_statistic(sim_flips)
return sim_statistic
# Hypothesis Test: Simulate from Null hypothesis
simulated_statistics = make_array()
for i in np.arange(num_simulations): # Simulate 1000 trials
simulated_statistics = np.append(simulated_statistics, simulate_one_statistic())
# Compute the P Value
p_value = sum(simulated_statistics >= obs_statistic)/len(simulated_statistics)
return p_value
simulate_one_hypothesis_test()
0.76700000000000002
Simulate the entire class running the experiment.
all_data8_students = make_array()
for i in np.arange(2000):
all_data8_students = np.append(all_data8_students, simulate_one_hypothesis_test())
You would seldom do this in practice, but here we can visualize the distribution of p-values that all the students in the class get. Some will conclude that they have an unfair coin.
tbl = Table().with_column("P Values", all_data8_students)
tbl.hist("P Values", bins=np.arange(0,1,0.01), right_end=0.05)
print(sum(all_data8_students <= 0.05), " would falsely reject the null hypothesis.")
70 would falsely reject the null hypothesis.
Manufacturers of Super Soda run a taste test and 91 out of 200 tasters prefer Super Soda over its rival. The boss is upset! He asks:
Do fewer people prefer Super Soda, or is this just chance?
You run a hypothesis test:
You pick a p-value cutoff of 0.05
obs_statistic = 91
Simulating the test_statistic from the null hypothesis
def simulate_one_count(sample_size):
simulated_data = np.random.choice(['Super', 'Rival'], sample_size)
simulated_statistic = np.count_nonzero(simulated_data == "Super")
return simulated_statistic
simulate_one_count(200)
104
Running many simulations of the test statistic
num_simulations = 10_000
counts = make_array()
for i in np.arange(num_simulations):
counts = np.append(counts, simulate_one_count(200))
Plotting the distribution of the test statistic under the null hypothesis:
trials = Table().with_column('Number of Heads', counts)
trials.hist(right_end=91)
plots.ylim(-0.001, 0.055)
plots.scatter(91, 0, color='red', s=40, zorder=3)
plots.title('Prediction Under the Null');
p_value = np.count_nonzero(counts <= 91)/len(counts)
print("The p-value is", p_value)
The p-value is 0.117
Conclusion:
# Keeping the data fixed, we can re-run the test with a new simulation under the null
def simulate_null(num_simulations, sample_size):
counts = make_array()
for i in np.arange(num_simulations):
counts = np.append(counts, simulate_one_count(sample_size))
return counts
# Keeping the data fixed, we can re-run the test with a new simulation under the null
def run_test(num_simulations, sample_size, obs_statistic):
counts = simulate_null(num_simulations, sample_size)
# compute the p value
p_value = np.count_nonzero(counts <= obs_statistic)/len(counts)
return p_value
We can again run multiple simulations of our simulation.
# Let's repeat that 50 times for each number of simulations
tests = Table(['simulations', 'p-value for 91'])
for k in np.arange(100): # will run the simulation 100 times
for num_sims in [100, 1000, 10000]:
p_value = run_test(num_sims, 200, 91)
tests = tests.with_row([num_sims, p_value])
tests.show(3)
simulations | p-value for 91 |
---|---|
100 | 0.13 |
1000 | 0.092 |
10000 | 0.1199 |
... (297 rows omitted)
We can then visualize the distribution of p-values. Notice how as we increase the number of simulations the estimate for the p-value concentrates around a single number.
# For larger numbers of simulations, p-values are more consistent
tests.hist("p-value for 91", group='simulations', bins=20)
The reason the p-values concentrate towards the true p-value is that the emprical distribution under the null is better approximates by increasing the number of simulations. More is better!
t1 = Table().with_columns("Number of Heads", simulate_null(100, 200),
"Simulation Size", 100)
t2 = Table().with_columns("Number of Heads", simulate_null(100_000, 200),
"Simulation Size", 100_000)
t1.append(t2).hist(group='Simulation Size', bins=np.arange(70.5, 131, 1))
Larger samples give us more information about the population and also allow us to test more subtle differences.
Suppose that the true proportion of people who prefer Super Soda is 45%
true_proportion = 0.45
true_distribution = make_array(true_proportion, 1 - true_proportion)
true_distribution
array([ 0.45, 0.55])
Taste tests with 200 people will give varioius numbers of people who prefer Super Soda
sample_size = 200
sample_proportions(sample_size, true_distribution) * sample_size
array([ 89., 111.])
# If you run a taste test for 200 people, what might you conclude?
def run_experiment(num_simulations, sample_size, true_proportion):
# Collect data
true_distribution = make_array(true_proportion, 1 - true_proportion)
taste_test_results = sample_proportions(sample_size, true_distribution) * sample_size
observed_stat_from_this_sample = taste_test_results.item(0)
# Conduct hypothesis test
p_value = run_test(num_simulations, sample_size, observed_stat_from_this_sample)
return p_value
run_experiment(10000, 200, 0.45)
0.3146
Try using different values for the true_proportion and sample size. What happens to as the true proportion gets closer to 0.5? What happens if we increase the sample size.
# Let's imagine running our taste test over and over again to see how often we reject the null
true_proportion = 0.45
sample_size = 100
p_values = make_array()
for k in np.arange(100):
p_value = run_experiment(1000, sample_size, true_proportion)
p_values = np.append(p_values, p_value)
Table().with_column('P-value', p_values).hist(0, right_end=0.05, bins=np.arange(0,1,0.1))
print("Percent that correctly reject the null", 100*np.mean(p_values <= 0.05))
Percent that correctly reject the null 27.0