Feb 19th, 2018
import bq_helper as bq
us_traffic = bq.BigQueryHelper(active_project="bigquery-public-data",
dataset_name="nhtsa_traffic_fatalities")
query = """SELECT state_name AS State,
COUNT(consecutive_number) AS Accidents
FROM `bigquery-public-data.nhtsa_traffic_fatalities.accident_2016`
GROUP BY state_name
ORDER BY accidents DESC
"""
state_accidents = us_traffic.query_to_pandas(query)
state_accidents.head()
import matplotlib.pyplot as plt
import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools
from plotly.offline import iplot, init_notebook_mode
plt.style.use('ggplot')
states_dict = {
'Alaska':'AK',
'Alabama':'AL',
'Arkansas':'AR',
'Arizona':'AZ',
'California':'CA',
'Colorado':'CO',
'Connecticut':'CT',
'District of Columbia':'DC',
'Delaware':'DE',
'Florida':'FL',
'Georgia':'GA',
'Hawaii':'HI',
'Iowa':'IA',
'Idaho':'ID',
'Illinois':'IL',
'Indiana':'IN',
'Kansas':'KS',
'Kentucky':'KY',
'Louisiana':'LA',
'Massachusetts':'MA',
'Maryland':'MD',
'Maine':'ME',
'Michigan':'MI',
'Minnesota':'MN',
'Missouri':'MO',
'Mississippi':'MS',
'Montana':'MT',
'North Carolina':'NC',
'North Dakota':'ND',
'Nebraska':'NE',
'New Hampshire':'NH',
'New Jersey':'NJ',
'New Mexico':'NM',
'Nevada':'NV',
'New York':'NY',
'Ohio':'OH',
'Oklahoma':'OK',
'Oregon':'OR',
'Pennsylvania':'PA',
'Puerto Rico':'PR',
'Rhode Island':'RI',
'South Carolina':'SC',
'South Dakota':'SD',
'Tennessee':'TN',
'Texas':'TX',
'Utah':'UT',
'Virginia':'VA',
'Vermont':'VT',
'Washington':'WA',
'Wisconsin':'WI',
'West Virginia':'WV',
'Wyoming':'WY'
};
state_accidents['State'].replace(states_dict,inplace = True);
init_notebook_mode()
# plotly code for choropleth map
scale = [[0, 'rgb(220, 255, 178)'],[1, 'rgb(81, 127, 25)']]
data = [ dict(
type = 'choropleth',
colorscale = scale,
autocolorscale = False,
showscale = False,
locations = state_accidents['State'],
z = state_accidents['Accidents'],
locationmode = 'USA-states',
marker = dict(
line = dict (
color = 'rgb(255, 255, 255)',
width = 2
) ),
) ]
layout = dict(
title = 'Fatal Traffic Accidents in United States (2016)',
geo = dict(
scope = 'usa',
projection = dict( type='albers usa' ),
showlakes = True,
lakecolor = 'rgb(255, 255, 255)',
countrycolor = 'rgb(255, 255, 255)')
)
figure = dict(data=data, layout=layout)
iplot(figure)
query2 = """SELECT EXTRACT(DAYOFWEEK FROM timestamp_of_crash) AS Day_of_week,
CASE
WHEN EXTRACT(DAYOFWEEK FROM timestamp_of_crash) = 1 THEN 'Sunday'
WHEN EXTRACT(DAYOFWEEK FROM timestamp_of_crash) = 2 THEN 'Monday'
WHEN EXTRACT(DAYOFWEEK FROM timestamp_of_crash) = 3 THEN 'Tuesday'
WHEN EXTRACT(DAYOFWEEK FROM timestamp_of_crash) = 4 THEN 'Wednesday'
WHEN EXTRACT(DAYOFWEEK FROM timestamp_of_crash) = 5 THEN 'Thursday'
WHEN EXTRACT(DAYOFWEEK FROM timestamp_of_crash) = 6 THEN 'Friday'
WHEN EXTRACT(DAYOFWEEK FROM timestamp_of_crash) = 7 THEN 'Saturday'
END Weekday,
COUNT(consecutive_number) AS Accidents
FROM `bigquery-public-data.nhtsa_traffic_fatalities.accident_2016`
GROUP BY Day_of_week,Weekday
ORDER BY Accidents DESC
"""
day_of_week_accidents = us_traffic.query_to_pandas(query2)
day_of_week_accidents
day_of_week_accidents.sort_values('Day_of_week').plot.bar('Weekday','Accidents')
query3 = """SELECT COUNT(consecutive_number) AS Accidents,
EXTRACT(HOUR FROM timestamp_of_crash) AS Hour
FROM `bigquery-public-data.nhtsa_traffic_fatalities.accident_2016`
GROUP BY Hour
ORDER BY Accidents DESC
"""
hour_accidents = us_traffic.query_to_pandas(query3)
hour_accidents
hour_accidents.sort_values('Hour').plot.bar('Hour','Accidents')