In [1]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

Words of Caution¶

  • Remember to run the cell above. It's for setting up the environment so you can have access to what's needed for this lecture. For now, don't worry about what it means: we'll learn more about what's inside of it in the next few lectures.
  • Data science is not just about code, so please don't go over this notebook by itself. Have the relevant textbook sections or lecture video at hand so that you can go over the discussion along with the code. Thank you!

Python¶

In [2]:
2 + 9
Out[2]:
11
In [3]:
2 * 9
Out[3]:
18
In [4]:
2 / 9
Out[4]:
0.2222222222222222
In [5]:
2 + 3 * 9
Out[5]:
29
In [6]:
(2 + 3) * 9
Out[6]:
45
In [7]:
# two to the power of four: 2 * 2 * 2 * 2
2 ** 4
Out[7]:
16
In [8]:
'hello'
Out[8]:
'hello'

Names¶

In [9]:
a = 4
In [10]:
a
Out[10]:
4
In [11]:
b = 9
In [12]:
b
Out[12]:
9
In [13]:
a * 3
Out[13]:
12
In [14]:
total = a + b
In [15]:
total
Out[15]:
13
In [16]:
a = 10
In [17]:
total
Out[17]:
13
In [18]:
total = a + b
In [19]:
total
Out[19]:
19
In [20]:
'total'
Out[20]:
'total'

Why Names?¶

In [21]:
# Option 1
40 * 14.00
Out[21]:
560.0
In [22]:
40 * 52 * 14.00
Out[22]:
29120.0
In [23]:
# Option 2
ca_hourly_minimum_wage = 14.00
hours_per_week = 40
weeks_per_year = 52
In [24]:
hours_per_year = hours_per_week * weeks_per_year
In [25]:
hours_per_year
Out[25]:
2080
In [26]:
weekly_wages = hours_per_week * ca_hourly_minimum_wage
weekly_wages
Out[26]:
560.0
In [27]:
yearly_wages = hours_per_year * ca_hourly_minimum_wage
yearly_wages
Out[27]:
29120.0

Functions and Call Expressions¶

In [28]:
abs(-5)
Out[28]:
5
In [29]:
abs(1 - 3)
Out[29]:
2
In [30]:
day_temp = 52
night_temp = 47
abs(night_temp - day_temp)
Out[30]:
5
In [31]:
min(14, 15)
Out[31]:
14
In [32]:
round(123.456)
Out[32]:
123
In [33]:
round(123.456, 1)
Out[33]:
123.5
In [34]:
round(123.456, ndigits=1)
Out[34]:
123.5

Tables¶

In [35]:
cones = Table.read_table('cones.csv')
cones
Out[35]:
Flavor Color Price Rating
strawberry pink 3.55 1
chocolate light brown 4.75 4
chocolate dark brown 5.25 3
strawberry pink 5.25 2
chocolate dark brown 5.25 5
bubblegum pink 4.75 1
In [36]:
cones.show(3)
Flavor Color Price Rating
strawberry pink 3.55 1
chocolate light brown 4.75 4
chocolate dark brown 5.25 3

... (3 rows omitted)

In [37]:
cones.show()
Flavor Color Price Rating
strawberry pink 3.55 1
chocolate light brown 4.75 4
chocolate dark brown 5.25 3
strawberry pink 5.25 2
chocolate dark brown 5.25 5
bubblegum pink 4.75 1
In [38]:
cones.select('Flavor')
Out[38]:
Flavor
strawberry
chocolate
chocolate
strawberry
chocolate
bubblegum
In [39]:
cones.select('Flavor', 'Price')
Out[39]:
Flavor Price
strawberry 3.55
chocolate 4.75
chocolate 5.25
strawberry 5.25
chocolate 5.25
bubblegum 4.75
In [40]:
cones.select(Flavor, 'Price')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In [40], line 1
----> 1 cones.select(Flavor, 'Price')

NameError: name 'Flavor' is not defined
In [ ]:
cones.drop('Price')
In [ ]:
cones
In [ ]:
cones_without_price = cones.drop('Price')
cones_without_price
In [ ]:
cones.where('Flavor', 'chocolate')
In [ ]:
cones.sort('Price')
In [ ]:
cones.sort('Price', descending=True)
In [ ]:
cones.sort('Flavor', descending=True)

A more interesting table¶

In [ ]:
nba = Table.read_table('nba_salaries.csv').relabeled(3, 'SALARY')
nba
In [ ]:
point_guards = nba.where('POSITION', 'PG')
In [ ]:
point_guards
In [ ]:
point_guards.drop('POSITION')
In [ ]:
point_guards
In [ ]:
point_guards = point_guards.drop('POSITION')
In [ ]:
point_guards.sort('SALARY', descending=True).show()
In [ ]:
point_guards.sort('SALARY', descending=True).show(15)
In [ ]:
nba.drop('POSITION').where('POSITION', 'PG')