from datascience import *
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')


3 > 1

True


type(3 > 1)

bool


type(True)

bool

3 = 3.0


3 == 3.0

True


10 != 2

True


x = 14
y = 3


x > 15

False


12 < x

True


x < 20

True


12 < x < 20

True


12 < x and x < 20

True


x > 13 and y < 3.14159

True


10 < x-y < 13

True


pets = make_array('cat', 'cat', 'dog', 'cat', 'dog', 'rabbit')
pets

array(['cat', 'cat', 'dog', 'cat', 'dog', 'rabbit'],
      dtype='<U6')


pets == 'cat'

array([ True,  True, False,  True, False, False], dtype=bool)


sum(pets == 'cat')

3


is_cat = pets == "cat"
is_cat

array([ True,  True, False,  True, False, False], dtype=bool)


is_cat * 2 - 1

array([ 1,  1, -1,  1, -1, -1])


np.mean(is_cat)

0.5


survey = Table.read_table('welcome_survey_sp23.csv')
survey.show(3)


r = survey.row(0)
r

Row(Year='Second Year', Extraversion=2, Number of textees=5, Hours of sleep=9.0, Handedness='Right-handed', Pant leg='Right leg in first', Sleep position='On your right side', Pets='Cat, Dog, Fish, Snake, Lizard', Piercings=-3)


type(r)

datascience.tables.Row


r.item('Year')

'Second Year'


r2 = survey.select("Extraversion", "Number of textees", "Hours of sleep").row(2)
r2

Row(Extraversion=5, Number of textees=5, Hours of sleep=8.0)


sum(r2)

18.0


(
    survey
    .select("Extraversion", "Number of textees", "Hours of sleep")
    .apply(sum)
)

array([ 16.,  13.,  18., ...,  20.,  29.,  14.])


p = survey.pivot("Sleep position", "Hours of sleep")
p.show()


p.with_column("Total", p.drop("Hours of sleep").apply(np.sum)).show()

p.with_column("Total", p.drop("Hours of sleep").apply(np.sum)).show()


p.join("Hours of sleep", survey.group("Hours of sleep")).show()

p.join("Hours of sleep", survey.group("Hours of sleep")).show()


x = 20


if x >= 18:
    print('You can legally vote.')

You can legally vote.

if boolean expression here :
    # body of the if statement goes here and must be indented


print("Can you drink?")

if x >= 21:
    print('You can legally drink.')
    print("This line of code is never run...")
    x = 0

print("This is run")
print("The value of x is", x)

Can you drink?
This is run
The value of x is 20


if x >= 21:
    print('You can legally vote and drink.')
elif x >= 18:
    print('You can legally vote.')
else:
    print('You can legally drink milk.')

You can legally vote.


def age(x):
    if x >= 21:
        return 'You can legally vote and drink.'
    elif x >= 18:
        return 'You can legally vote.'
    else:
        return 'You can legally drink milk.'


age(3)

'You can legally drink milk.'


age(20)

'You can legally vote.'


age(23)

'You can legally vote and drink.'


trip = Table().read_table('trip.csv')
trip.show(3)


def trip_kind(start, end):
    if start == end:
        return 'round trip'
    else:
        return 'one way'


kinds = trip.with_column('Trip Kind', 
                         trip.apply(trip_kind, 'Start Station', 'End Station'))
kinds.show(3)


kinds_pivot = (
    kinds
    .where('Duration', are.below(600))
    .pivot('Trip Kind', 'Start Station')
    .sort("round trip", descending=True)
    .take(np.arange(10))
)
kinds_pivot


mornings = make_array('wake up', 'sleep in')


np.random.choice(mornings)

'sleep in'


np.random.choice(mornings)

'sleep in'


np.random.choice(mornings)

'wake up'


np.random.choice(mornings, 7)

array(['wake up', 'sleep in', 'wake up', 'wake up', 'sleep in', 'sleep in',
       'sleep in'],
      dtype='<U8')


np.random.choice(mornings, 7)

array(['sleep in', 'wake up', 'sleep in', 'sleep in', 'sleep in',
       'sleep in', 'wake up'],
      dtype='<U8')


morning_week = np.random.choice(mornings, 7)
morning_week

array(['sleep in', 'wake up', 'wake up', 'wake up', 'wake up', 'sleep in',
       'wake up'],
      dtype='<U8')


sum(morning_week == 'wake up')

5


sum(morning_week == 'sleep in')

2


np.mean(morning_week == 'sleep in')

0.2857142857142857


np.random.seed(42)
np.sum(np.random.choice(mornings, 7) == "sleep in")

2


die_faces = np.arange(1, 7)
die_faces

array([1, 2, 3, 4, 5, 6])


np.random.choice(die_faces)

5


def simulate_one_round():
    my_roll = np.random.choice(die_faces)
    your_roll = np.random.choice(die_faces)

    if my_roll > your_roll:
        return 1
    elif my_roll < your_roll:
        return -1
    else:
        return 0

def simulate_one_round():
    my_roll = np.random.choice(die_faces)
    your_roll = np.random.choice(die_faces)

    if my_roll > your_roll:
        return 1
    elif my_roll < your_roll:
        return -1
    else:
        return 0


simulate_one_round()

-1


for pet in make_array('cat', 'dog', 'rabbit'):
    print('I love my ' + pet)

I love my cat
I love my dog
I love my rabbit


x = 0
for i in np.arange(1, 4):
    x = x + i
    print(x)

print("The final value of x is:", x)

1
3
6
The final value of x is: 6


N = 10_000
winnings = 0

for i in np.arange(N):
    winnings = winnings + simulate_one_round()
    
print("I win", winnings, "dollars.")

I win 18 dollars.

N = 10_000
winnings = 0

for i in np.arange(N):
    winnings = winnings + simulate_one_round()

print("I win", winnings, "dollars.")


N = 10_000
rolls = Table().with_columns(
    "my roll", np.random.choice(die_faces, N),
    "your roll", np.random.choice(die_faces, N)
)

my_roll = rolls.column("my roll")
your_roll = rolls.column("your roll")
outcome = 1*(my_roll > your_roll) + -1*(my_roll < your_roll)

rolls = rolls.with_column("outcome", outcome)
rolls

N = 10_000
rolls = Table().with_columns(
    "my roll", np.random.choice(die_faces, N),
    "your roll", np.random.choice(die_faces, N)
)

my_roll = rolls.column("my roll")
your_roll = rolls.column("your roll")
outcome = 1*(my_roll > your_roll) + -1*(my_roll < your_roll)

rolls = rolls.with_column("outcome", outcome)
rolls


print("My total winnings:", rolls.column("outcome").sum())

My total winnings: -126


first = np.arange(4)
second = np.arange(10, 17)


np.append(first, 6)

array([0, 1, 2, 3, 6])


first

array([0, 1, 2, 3])


np.append(first, second)

array([ 0,  1,  2,  3, 10, 11, 12, 13, 14, 15, 16])


first

array([0, 1, 2, 3])


second

array([10, 11, 12, 13, 14, 15, 16])


N = 10_000

game_outcomes = make_array()

for i in np.arange(N):
    game_outcomes = np.append(game_outcomes, simulate_one_round())
    
game_outcomes

array([-1.,  0.,  1., ..., -1., -1.,  0.])

N = 10_000

game_outcomes = make_array()

for i in np.arange(N):
    game_outcomes = np.append(game_outcomes, simulate_one_round())

game_outcomes


coin = make_array('heads', 'tails')


sum(np.random.choice(coin, 100) == 'heads')

50


# Simulate one outcome

def num_heads():
    return sum(np.random.choice(coin, 100) == 'heads')


# Decide how many times you want to repeat the experiment

repetitions = 10000


# Simulate that many outcomes

outcomes = make_array()

for i in np.arange(repetitions):
    outcomes = np.append(outcomes, num_heads())


heads = Table().with_column('Heads', outcomes)
heads.hist(bins = np.arange(29.5, 70.6))


ages = make_array(16, 22, 18, 15, 19, 39, 27, 21)
patients = Table().with_columns("Patient Id", np.arange(len(ages))+1000, 'Age', ages,)
patients


older = patients.column("Age") > 21
older

array([False,  True, False, False, False,  True,  True, False], dtype=bool)


patients.where(older)


patients.column("Patient Id") % 2

array([0, 1, 0, 1, 0, 1, 0, 1])


even_ids = patients.column("Patient Id") % 2 == 0
even_ids

array([ True, False,  True, False,  True, False,  True, False], dtype=bool)


patients.where(even_ids)


patients.where(even_ids | older)

Hours of sleep	On your back	On your left side	On your right side	On your stomach
3	1	2	0	1
4	2	4	2	1
4.5	0	0	1	0
5	19	11	25	7
5.5	0	0	1	0
6	71	95	102	25
6.5	8	11	4	0
7	123	165	210	68
7.5	7	10	20	7
8	107	116	137	53
8.5	2	2	0	1
9	17	17	17	9
9.5	0	1	0	0
10	5	5	5	2
12	1	0	0	0
15	0	0	0	1

Hours of sleep	On your back	On your left side	On your right side	On your stomach	Total
3	1	2	0	1	4
4	2	4	2	1	9
4.5	0	0	1	0	1
5	19	11	25	7	62
5.5	0	0	1	0	1
6	71	95	102	25	293
6.5	8	11	4	0	23
7	123	165	210	68	566
7.5	7	10	20	7	44
8	107	116	137	53	413
8.5	2	2	0	1	5
9	17	17	17	9	60
9.5	0	1	0	0	1
10	5	5	5	2	17
12	1	0	0	0	1
15	0	0	0	1	1

Hours of sleep	On your back	On your left side	On your right side	On your stomach	count
3	1	2	0	1	4
4	2	4	2	1	9
4.5	0	0	1	0	1
5	19	11	25	7	62
5.5	0	0	1	0	1
6	71	95	102	25	293
6.5	8	11	4	0	23
7	123	165	210	68	566
7.5	7	10	20	7	44
8	107	116	137	53	413
8.5	2	2	0	1	5
9	17	17	17	9	60
9.5	0	1	0	0	1
10	5	5	5	2	17
12	1	0	0	0	1
15	0	0	0	1	1

Trip ID	Duration	Start Date	Start Station	Start Terminal	End Date	End Station	End Terminal	Bike #	Subscriber Type	Zip Code
913460	765	8/31/2015 23:26	Harry Bridges Plaza (Ferry Building)	50	8/31/2015 23:39	San Francisco Caltrain (Townsend at 4th)	70	288	Subscriber	2139
913459	1036	8/31/2015 23:11	San Antonio Shopping Center	31	8/31/2015 23:28	Mountain View City Hall	27	35	Subscriber	95032
913455	307	8/31/2015 23:13	Post at Kearny	47	8/31/2015 23:18	2nd at South Park	64	468	Subscriber	94107

Trip ID	Duration	Start Date	Start Station	Start Terminal	End Date	End Station	End Terminal	Bike #	Subscriber Type	Zip Code	Trip Kind
913460	765	8/31/2015 23:26	Harry Bridges Plaza (Ferry Building)	50	8/31/2015 23:39	San Francisco Caltrain (Townsend at 4th)	70	288	Subscriber	2139	one way
913459	1036	8/31/2015 23:11	San Antonio Shopping Center	31	8/31/2015 23:28	Mountain View City Hall	27	35	Subscriber	95032	one way
913455	307	8/31/2015 23:13	Post at Kearny	47	8/31/2015 23:18	2nd at South Park	64	468	Subscriber	94107	one way

Lecture 13 Notebook¶

Boolean expressions¶

Boolean Expressions with Arrays¶

Rows & Apply¶

Math On Rows¶

Conditional Statements¶

Putting the peices together¶

Simulation¶

Playing a Game of Chance¶

Simulating the roll of a die¶

`For` Statements¶

Appending Arrays¶

Another example: simulating heads in 100 coin tosses¶

Optional: Advanced `where`¶

Year	Extraversion	Number of textees	Hours of sleep	Handedness	Pant leg	Sleep position	Pets	Piercings
Second Year	2	5	9	Right-handed	Right leg in first	On your right side	Cat, Dog, Fish, Snake, Lizard	-3
First Year	2	3	8	Right-handed	I don't know	On your back	None	-1
First Year	5	5	8	Right-handed	Right leg in first	On your left side	Bearded dragon	0

Hours of sleep	On your back	On your left side	On your right side	On your stomach
3	1	2	0	1
4	2	4	2	1
4.5	0	0	1	0
5	19	11	25	7
5.5	0	0	1	0
6	71	95	102	25
6.5	8	11	4	0
7	123	165	210	68
7.5	7	10	20	7
8	107	116	137	53
8.5	2	2	0	1
9	17	17	17	9
9.5	0	1	0	0
10	5	5	5	2
12	1	0	0	0
15	0	0	0	1

Hours of sleep	On your back	On your left side	On your right side	On your stomach	Total
3	1	2	0	1	4
4	2	4	2	1	9
4.5	0	0	1	0	1
5	19	11	25	7	62
5.5	0	0	1	0	1
6	71	95	102	25	293
6.5	8	11	4	0	23
7	123	165	210	68	566
7.5	7	10	20	7	44
8	107	116	137	53	413
8.5	2	2	0	1	5
9	17	17	17	9	60
9.5	0	1	0	0	1
10	5	5	5	2	17
12	1	0	0	0	1
15	0	0	0	1	1

Hours of sleep	On your back	On your left side	On your right side	On your stomach	count
3	1	2	0	1	4
4	2	4	2	1	9
4.5	0	0	1	0	1
5	19	11	25	7	62
5.5	0	0	1	0	1
6	71	95	102	25	293
6.5	8	11	4	0	23
7	123	165	210	68	566
7.5	7	10	20	7	44
8	107	116	137	53	413
8.5	2	2	0	1	5
9	17	17	17	9	60
9.5	0	1	0	0	1
10	5	5	5	2	17
12	1	0	0	0	1
15	0	0	0	1	1

Start Station	one way	round trip
Embarcadero at Sansome	6938	120
Harry Bridges Plaza (Ferry Building)	8643	105
San Francisco Caltrain 2 (330 Townsend)	12021	104
2nd at South Park	6484	98
San Francisco Caltrain (Townsend at 4th)	11181	95
2nd at Townsend	9513	83
Powell Street BART	7156	81
Market at 10th	6599	80
Civic Center BART (7th at Market)	5179	73
Townsend at 7th	8073	68

Patient Id	Age
1000	16
1001	22
1002	18
1003	15
1004	19
1005	39
1006	27
1007	21

Patient Id	Age
1001	22
1005	39
1006	27

Patient Id	Age
1000	16
1002	18
1004	19
1006	27

Patient Id	Age
1000	16
1001	22
1002	18
1004	19
1005	39
1006	27

Hours of sleep	On your back	On your left side	On your right side	On your stomach
3	1	2	0	1
4	2	4	2	1
4.5	0	0	1	0
5	19	11	25	7
5.5	0	0	1	0
6	71	95	102	25
6.5	8	11	4	0
7	123	165	210	68
7.5	7	10	20	7
8	107	116	137	53
8.5	2	2	0	1
9	17	17	17	9
9.5	0	1	0	0
10	5	5	5	2
12	1	0	0	0
15	0	0	0	1

Hours of sleep	On your back	On your left side	On your right side	On your stomach	Total
3	1	2	0	1	4
4	2	4	2	1	9
4.5	0	0	1	0	1
5	19	11	25	7	62
5.5	0	0	1	0	1
6	71	95	102	25	293
6.5	8	11	4	0	23
7	123	165	210	68	566
7.5	7	10	20	7	44
8	107	116	137	53	413
8.5	2	2	0	1	5
9	17	17	17	9	60
9.5	0	1	0	0	1
10	5	5	5	2	17
12	1	0	0	0	1
15	0	0	0	1	1

Hours of sleep	On your back	On your left side	On your right side	On your stomach	count
3	1	2	0	1	4
4	2	4	2	1	9
4.5	0	0	1	0	1
5	19	11	25	7	62
5.5	0	0	1	0	1
6	71	95	102	25	293
6.5	8	11	4	0	23
7	123	165	210	68	566
7.5	7	10	20	7	44
8	107	116	137	53	413
8.5	2	2	0	1	5
9	17	17	17	9	60
9.5	0	1	0	0	1
10	5	5	5	2	17
12	1	0	0	0	1
15	0	0	0	1	1

Lecture 13 Notebook¶

Boolean expressions¶

Boolean Expressions with Arrays¶

Rows & Apply¶

Math On Rows¶

Conditional Statements¶

Putting the peices together¶

Simulation¶

Playing a Game of Chance¶

Simulating the roll of a die¶

For Statements¶

Appending Arrays¶

Another example: simulating heads in 100 coin tosses¶

Optional: Advanced where¶

`For` Statements¶

Optional: Advanced `where`¶

Hours of sleep	On your back	On your left side	On your right side	On your stomach
3	1	2	0	1
4	2	4	2	1
4.5	0	0	1	0
5	19	11	25	7
5.5	0	0	1	0
6	71	95	102	25
6.5	8	11	4	0
7	123	165	210	68
7.5	7	10	20	7
8	107	116	137	53
8.5	2	2	0	1
9	17	17	17	9
9.5	0	1	0	0
10	5	5	5	2
12	1	0	0	0
15	0	0	0	1

Hours of sleep	On your back	On your left side	On your right side	On your stomach	Total
3	1	2	0	1	4
4	2	4	2	1	9
4.5	0	0	1	0	1
5	19	11	25	7	62
5.5	0	0	1	0	1
6	71	95	102	25	293
6.5	8	11	4	0	23
7	123	165	210	68	566
7.5	7	10	20	7	44
8	107	116	137	53	413
8.5	2	2	0	1	5
9	17	17	17	9	60
9.5	0	1	0	0	1
10	5	5	5	2	17
12	1	0	0	0	1
15	0	0	0	1	1

Hours of sleep	On your back	On your left side	On your right side	On your stomach	count
3	1	2	0	1	4
4	2	4	2	1	9
4.5	0	0	1	0	1
5	19	11	25	7	62
5.5	0	0	1	0	1
6	71	95	102	25	293
6.5	8	11	4	0	23
7	123	165	210	68	566
7.5	7	10	20	7	44
8	107	116	137	53	413
8.5	2	2	0	1	5
9	17	17	17	9	60
9.5	0	1	0	0	1
10	5	5	5	2	17
12	1	0	0	0	1
15	0	0	0	1	1