from datascience import *
import numpy as np


# This command enables plots to appear directly in your notebook.
%matplotlib inline
# This includes the powerful matplotlib plotting library
import matplotlib.pyplot as plots
# This sets the style to mirror that of the popular fivethirtyeight blog ...
plots.style.use('fivethirtyeight')


full = Table.read_table('nc-est2019-agesex-res.csv')
full


data = (
    full
        .relabeled('POPESTIMATE2014', '2014')
        .relabeled('POPESTIMATE2019', '2019')
        .select('SEX', 'AGE', '2014', '2019')
        .where('AGE', are.not_equal_to(999)) # remove aggregates   
)
data

data = (
    full
        .select('SEX', 'AGE', 'POPESTIMATE2014', 'POPESTIMATE2019')
        .relabeled('POPESTIMATE2014', '2014')
        .relabeled('POPESTIMATE2019', '2019')
        .where('AGE', are.not_equal_to(999)) # remove aggregates   
)
data


data.where("SEX", 0).plot('AGE', '2019', marker="o")

data.where("SEX", 0).plot('AGE', '2019')


data.plot("AGE", "2019", marker="o")


data.scatter("AGE", "2019")


data.where("SEX", 0).plot('AGE', '2014')
data.where("SEX", 0).plot('AGE', '2019')


(
    data
    .where("SEX", 0)
    .plot("AGE", make_array('2014', '2019'), marker="o")
)


pop_2019 = Table().with_columns(
    "Age", data.where("SEX", 0).column("AGE"),
    "Males", data.where("SEX", 1).column("2019"),
    "Females", data.where("SEX", 2).column("2019")
)
pop_2019


pop_2019.plot("Age", marker="o")


top_pop_2019 = pop_2019.column("Females") + pop_2019.column("Males")
pop_2019 = pop_2019.with_column("Prop. Female", 
                                pop_2019.column("Females") / top_pop_2019 * 100)
pop_2019.plot("Age", "Prop. Female", marker="o")
#plots.ylim(0, 100);  # Optional for Data 8 --- Should we even do this?...


# Actors and their highest grossing movies
actors = Table.read_table('actors.csv')
actors


actors.scatter('Number of Movies', 'Average per Movie')


actors.plot('Number of Movies', 'Average per Movie')


actors.where('Average per Movie', are.above(400))


top_movies = Table.read_table('top_movies_2017.csv')
top_movies


(
    top_movies
        .select("Studio", "Gross (Adjusted)")
        .group("Studio",collect=sum)
        .relabeled("Gross (Adjusted) sum", "Total Gross (Adjusted)")
        .sort("Total Gross (Adjusted)", descending=True)
        .barh("Studio")
)


(top_movies
 .where("Studio", "Buena Vista")
 .barh("Title", "Gross (Adjusted)")
)


(top_movies
 .where("Studio", "Buena Vista")
 .scatter("Year", "Gross (Adjusted)")
)


import plotly.express as px # Import the powerful plotly viz tool


px.scatter(x = top_movies.column("Year"), 
           y = top_movies.column("Gross (Adjusted)"), 
           color = top_movies.column("Studio"),
           hover_name = top_movies.column("Title"))

AGE	CENSUS2010POP	ESTIMATESBASE2010	POPESTIMATE2010	POPESTIMATE2011	POPESTIMATE2012	POPESTIMATE2013	POPESTIMATE2014	POPESTIMATE2015	POPESTIMATE2016	POPESTIMATE2017	POPESTIMATE2018	POPESTIMATE2019
0	3944153	3944160	3951430	3963092	3926570	3931258	3954787	3983981	3954773	3893990	3815343	3783052
1	3978070	3978090	3957730	3966225	3977549	3942698	3948891	3973133	4002903	3972711	3908830	3829599
2	4096929	4096939	4090621	3970654	3978925	3991740	3958711	3966321	3991349	4020045	3987032	3922044
3	4119040	4119051	4111688	4101644	3981531	3991017	4005928	3974351	3982984	4006946	4033038	3998665
4	4063170	4063186	4077346	4121488	4111490	3992502	4004032	4020292	3989750	3997280	4018719	4043323
5	4056858	4056872	4064521	4087054	4131049	4121876	4004576	4017589	4035033	4003452	4008443	4028281
6	4066381	4066412	4072904	4074531	4096631	4141126	4133372	4017388	4031568	4048018	4014057	4017227
7	4030579	4030594	4042990	4082821	4084175	4106756	4152666	4145872	4030888	4044139	4058370	4022319
8	4046486	4046497	4025501	4052773	4092559	4094513	4118349	4165033	4158848	4042924	4054236	4066194
9	4148353	4148369	4125312	4035319	4062726	4103052	4106068	4130887	4177895	4170813	4053179	4061874

AGE	2014	2019
0	3954787	3783052
1	3948891	3829599
2	3958711	3922044
3	4005928	3998665
4	4004032	4043323
5	4004576	4028281
6	4133372	4017227
7	4152666	4022319
8	4118349	4066194
9	4106068	4061874

Age	Males	Females
0	1935117	1847935
1	1958585	1871014
2	2005544	1916500
3	2043010	1955655
4	2066951	1976372
5	2061200	1967081
6	2052956	1964271
7	2055735	1966584
8	2079723	1986471
9	2073148	1988726

Actor	Total Gross	Number of Movies	Average per Movie	#1 Movie	Gross
Harrison Ford	4871.7	41	118.8	Star Wars: The Force Awakens	936.7
Samuel L. Jackson	4772.8	69	69.2	The Avengers	623.4
Morgan Freeman	4468.3	61	73.3	The Dark Knight	534.9
Tom Hanks	4340.8	44	98.7	Toy Story 3	415
Robert Downey, Jr.	3947.3	53	74.5	The Avengers	623.4
Eddie Murphy	3810.4	38	100.3	Shrek 2	441.2
Tom Cruise	3587.2	36	99.6	War of the Worlds	234.3
Johnny Depp	3368.6	45	74.9	Dead Man's Chest	423.3
Michael Caine	3351.5	58	57.8	The Dark Knight	534.9
Scarlett Johansson	3341.2	37	90.3	The Avengers	623.4

Title	Studio	Gross	Gross (Adjusted)	Year
Gone with the Wind	MGM	198676459	1796176700	1939
Star Wars	Fox	460998007	1583483200	1977
The Sound of Music	Fox	158671368	1266072700	1965
E.T.: The Extra-Terrestrial	Universal	435110554	1261085000	1982
Titanic	Paramount	658672302	1204368000	1997
The Ten Commandments	Paramount	65500000	1164590000	1956
Jaws	Universal	260000000	1138620700	1975
Doctor Zhivago	MGM	111721910	1103564200	1965
The Exorcist	Warner Brothers	232906145	983226600	1973
Snow White and the Seven Dwarves	Disney	184925486	969010000	1937

Lecture 7 : Data Visualization (Part 1)¶

Preparing the Census Data¶

Line Plots¶

Males vs Females (Optional, Skipped in Lecture)¶

Scatter Plots¶

Bar Charts¶

Bonus¶