from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
import warnings
warnings.simplefilter(action='ignore',category=np.VisibleDeprecationWarning)


[1, 5, 'hello', 5.0]

[1, 5, 'hello', 5.0]


another_list = [1, 5, 'hello', 5.0, make_array(1,2,3)]
another_list

[1, 5, 'hello', 5.0, array([1, 2, 3])]


another_list[2]

'hello'


my_array = make_array("Apple", "Banana", "Corn")
my_array[1]

'Banana'


my_array.item(1)

'Banana'


Table().with_columns('Numbers', [1, 2, 3])


drinks = Table(['Drink', 'Cafe', 'Price'])
drinks


drinks = drinks.with_rows([
    ['Milk Tea', 'Asha', 5.5],
    ['Espresso', 'Strada',  1.75],
    ['Latte',    'Strada',  3.25],
    ['Espresso', "FSM",   2]
])
drinks


survey = Table.read_table("welcome_survey_sp23.csv")
survey.show(3)


survey.group("Year")


(
    survey
    .select("Year", "Hours of sleep")
    .group("Year", np.average)
    .take(1,4,5,2,0,3)
    .barh("Year")
)
# plots.xlim([6,7.5])

(
    survey
    .select("Year", "Hours of sleep")
    .group("Year", np.average)
    .take(1,4,5,2,0,3)
    .barh("Year")
)
plots.xlim([6,7.5])


survey.hist('Extraversion')


by_extra = (
    survey
    .select("Extraversion", "Number of textees", "Hours of sleep", "Piercings")
    .group("Extraversion", np.mean)
)
by_extra

by_extra = (
    survey
    .select("Extraversion", "Number of textees", "Hours of sleep", "Piercings")
    .group('Extraversion', np.mean)
)
by_extra


by_extra.plot("Extraversion", "Number of textees mean", marker="*")


by_extra.plot("Extraversion", "Hours of sleep mean", marker="*")


(
    survey
    .group(["Handedness", "Sleep position"])
    .show()
)

(
    survey
    .group(["Handedness", "Sleep position"])
    .show()
)


(
    survey
    .select("Handedness", "Sleep position", "Hours of sleep")
    .group(["Handedness", "Sleep position"], np.average)
    .show()
)

(
    survey
    .select("Handedness", "Sleep position", "Hours of sleep")
    .group(['Handedness', 'Sleep position'], np.average)
    .show()
)


(
    survey
    .pivot("Handedness", "Sleep position", "Hours of sleep", np.average)
)


(
    survey
    .pivot(
        columns = "Handedness", # The column whose unique values become columns
        rows    = "Sleep position", # The column whose unique values become rows
        values  = "Hours of sleep", # The column that will be used to compute the cells
        collect = np.average # How to aggregate the values 
    )
)


survey.pivot("Sleep position", "Handedness")


survey.pivot("Sleep position", "Handedness").barh("Handedness")


drinks


discounts = Table().with_columns(
    'Coupon % off', make_array(10, 25, 5),
    'Location', make_array('Asha', 'Strada', 'Asha')
)
discounts


combined = drinks.join('Cafe', discounts, 'Location')
combined

combined = drinks.join('Cafe', discounts, 'Location')
combined


discount_frac = 1-combined.column("Coupon % off")/100.0

(
    combined
    .with_column("Discounted Price", combined.column("Price") * discount_frac)
    .select("Drink", "Discounted Price")
    .group("Drink", min)
)

combined = drinks.join('Cafe', discounts, 'Location') # from previous question

discount_frac = 1-combined.column("Coupon % off")/100.0

(
    combined
    .with_column("Discounted Price", combined.column("Price") * discount_frac)
    .select("Drink", "Discounted Price")
    .group("Drink", min)
)


drinks.join('Cafe', drinks, 'Cafe')


# From the CORGIS Dataset Project
# By Austin Cory Bart acbart@vt.edu
# Version 2.0.0, created 3/22/2016
# https://corgis-edu.github.io/corgis/csv/skyscrapers/

this_year = 2023
sky = Table.read_table("skyscrapers.csv")
sky = (
    sky
    .with_column("age", this_year - sky.column("completed"))
    .drop("completed")
)
sky


tall_pivot = sky.pivot("material", "city", "height", max)
tall_pivot

tall_pivot = sky.pivot("material", "city", "height", max)
tall_pivot


tall_pivot.barh("city")


num_buildings = sky.group("city")
(
    sky
    .join("city", num_buildings)
    .where("count", are.above_or_equal_to(50))
    .pivot("material", "city", "height", np.mean)
    .barh("city")
)

num_buildings = sky.group("city")
(
    sky
    .join("city", num_buildings)
    .where("count", are.above_or_equal_to(50))
    .pivot("material", "city", "height", np.mean)
    .barh("city")
)


def first(x):
    return x[0]
    
    
(
    sky
    .sort("age", descending=True) 
    .select("city", "material", "name", "age")
    .group(["city", "material"], first)
    #.where("city", "San Francisco") #<- for fun
)

def first(x):
    return x[0]


(
    sky
    .sort("age", descending=True) 
    .select("city", "material", "name", "age")
    .group(["city", "material"], first)
    #.where("city", "San Francisco") #<- for fun
)

Year	Extraversion	Number of textees	Hours of sleep	Handedness	Pant leg	Sleep position	Pets	Piercings
Second Year	2	5	9	Right-handed	Right leg in first	On your right side	Cat, Dog, Fish, Snake, Lizard	-3
First Year	2	3	8	Right-handed	I don't know	On your back	None	-1
First Year	5	5	8	Right-handed	Right leg in first	On your left side	Bearded dragon	0

Year	count
Fifth Year or above	9
First Year	606
Fourth Year	109
Graduate Student	7
Second Year	468
Third Year	302

Extraversion	Number of textees mean	Hours of sleep mean	Piercings mean
1	3.44828	6.60345	1.72414
2	4.08197	6.98361	1.13115
3	4.75263	7.23158	1.41053
4	5.14953	7.11449	1.49533
5	6.14286	7.07576	1.64502
6	6.74257	6.98267	1.87129
7	10.989	7.09158	3.66301e+06
8	9.7027	7.22297	2.14414
9	11.9608	7.29412	2.41176
10	9.25	6.78571	1.89286

Handedness	Sleep position	count
Ambidextrous	On your back	2
Ambidextrous	On your left side	4
Ambidextrous	On your right side	3
Ambidextrous	On your stomach	3
Left-handed	On your back	27
Left-handed	On your left side	40
Left-handed	On your right side	46
Left-handed	On your stomach	9
Right-handed	On your back	334
Right-handed	On your left side	395
Right-handed	On your right side	475
Right-handed	On your stomach	163

Handedness	Sleep position	Hours of sleep average
Ambidextrous	On your back	7
Ambidextrous	On your left side	7.375
Ambidextrous	On your right side	7.33333
Ambidextrous	On your stomach	9
Left-handed	On your back	7
Left-handed	On your left side	7.2125
Left-handed	On your right side	7.30435
Left-handed	On your stomach	6.72222
Right-handed	On your back	7.13323
Right-handed	On your left side	7.05823
Right-handed	On your right side	7.03579
Right-handed	On your stomach	7.24847

Lecture 11¶

Lists¶

Rows from lists¶

Review Grouping Using the Welcome Survey¶

Understanding Extraversion¶

Grouping by Two Columns¶

Pivot Tables¶

Joins¶

Discussion Questions¶

Drink	Cafe	Price
Milk Tea	Asha	5.5
Espresso	Strada	1.75
Latte	Strada	3.25
Espresso	FSM	2

name	material	city	height	age
One World Trade Center	mixed/composite	New York City	541.3	9
Willis Tower	steel	Chicago	442.14	49
432 Park Avenue	concrete	New York City	425.5	8
Trump International Hotel & Tower	concrete	Chicago	423.22	14
Empire State Building	steel	New York City	381	92
Bank of America Tower	mixed/composite	New York City	365.8	14
Stratosphere Tower	concrete	Las Vegas	350.22	27
Aon Center	steel	Chicago	346.26	50
John Hancock Center	steel	Chicago	343.69	54
Chrysler Building	steel	New York City	318.9	93

city	concrete	mixed/composite	steel
Atlanta	264.25	311.8	169.47
Austin	208.15	0	93.6
Baltimore	161.24	0	155.15
Boston	121.92	139	240.79
Charlotte	265.48	239.7	179.23
Chicago	423.22	306.94	442.14
Cincinnati	125	202.69	175
Cleveland	125	288.65	215.8
Columbus	79.25	0	169.3
Dallas	176.48	280.72	270.06

city	material	name first	age first
Atlanta	concrete	Westin Peachtree Plaza	47
Atlanta	mixed/composite	One Atlantic Center	36
Atlanta	steel	FlatironCity	126
Austin	concrete	One American Center	39
Austin	steel	University of Texas Tower	86
Baltimore	concrete	Charles Towers North Apartments	56
Baltimore	steel	Emerson Tower	112
Boston	concrete	Harbor Towers I	52
Boston	mixed/composite	Ellison Building	31
Boston	steel	Marriott's Custom House	108

Numbers
1
2
3

Coupon % off	Location
10	Asha
25	Strada
5	Asha