import datetime
from jdcal import jd2gcal
from matplotlib import gridspec
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import sqlite3
import xarray as xr
import geopandas as gpd
from matplotlib import pyplot as plt
import cdsapi
import sklearn_pandas as skp
from shapely.geometry import Point
import altair as alt
from vega_datasets import data
import plotly.express as px
import panel as pn
'plotly')
pn.extension(
= ['#68A33E', '#A10702', '#FB9E60', '#FFFF82', '#0F0326'] custom_colors
US wildfires
Import Data and Libraries
Load in Raw Data
= 'FPA_FOD_20170508.sqlite'
input_filename = sqlite3.connect(input_filename)
conn = '''
query SELECT
*
FROM
Fires;
'''
= pd.read_sql_query(query, conn) df_raw
Clean Data
= ['NWCG_REPORTING_AGENCY',
drop_columns 'NWCG_REPORTING_UNIT_ID',
'NWCG_REPORTING_UNIT_NAME',
'FIRE_NAME',
'COMPLEX_NAME',
'OWNER_DESCR',
'OWNER_CODE']
= df_raw.drop(columns= drop_columns)
df_US 'MONTH'] = df_US['DISCOVERY_DATE'].apply(lambda x: jd2gcal(x, 0)[1])
df_US['DAY'] = df_US['DISCOVERY_DATE'].apply(lambda x: jd2gcal(x, 0)[2]) df_US[
= [Point(xy) for xy in zip(df_US['LONGITUDE'], df_US['LATITUDE'])]
geometry = gpd.GeoDataFrame(df_US, geometry=geometry)
df_geo = "EPSG:4326" df_geo.crs
= alt.topo_feature(data.us_10m.url, feature='states')
states = gpd.read_file("cb_2018_us_state_500k/cb_2018_us_state_500k.shp")
us_states = 'states.json' us_states_geojson
import plotly.express as px
import geopandas as gpd
from sklearn.preprocessing import MinMaxScaler
= df_geo.groupby(['STATE'], as_index=False).agg({'LATITUDE': 'mean', 'LONGITUDE': 'mean', 'FIRE_SIZE': 'sum' }) df_grouped
= MinMaxScaler(feature_range=(1, 10))
scaler 'markersize'] = scaler.fit_transform(df_grouped[['FIRE_SIZE']]) df_grouped[
import panel as pn
'plotly')
pn.extension(def create_dashboard():
# Create a scatter_geo figure
= px.scatter_geo(
fig
df_grouped,='STATE',
locations='USA-states',
locationmode='FIRE_SIZE',
color='markersize',
size='STATE',
hover_name='natural earth',
projection='Wildfires in US States',
title='plotly',
template
)
# Update geos settings
fig.update_geos(=dict(lon=-100, lat=40),
center=2,
projection_scale=True,
showcoastlines='black',
coastlinecolor=True,
showsubunits='black',
subunitcolor=2,
subunitwidth='darkgrey',
landcolor=True,
showocean='azure',
oceancolor
)
return pn.pane.Plotly(fig)
# Create the Panel app
= pn.Column("## Wildfires Dashboard", create_dashboard)
app
# Display the app
app.servable()
To begin investigating trends in wildfires across Oregon, it is important to first consider the larger national perspective. The map above details the total acreage burned in each state. Overall, Alaska far surpasses any other state, while California and Idaho show the highest amount burned in the continental US. These trends are expected, however, as these states are larger than others seen throughout the country, particularly along the midwest and eastern coast. To learn more, hover over each state to explore details including the amount of area burned. (Please look at the hyperlinked figure; sometimes the panel and html don’t render correctly).
Fire cause by state
= df_geo.groupby(['STATE', 'STAT_CAUSE_DESCR']).size().reset_index(name='Number_of_Fires')
df_viz
= alt.Chart(df_viz).mark_bar().encode(
chart ='sum(Number_of_Fires):Q',
x=alt.Y('STATE:N', sort='-x'),
y='STAT_CAUSE_DESCR:N',
color=['STATE:N', 'STAT_CAUSE_DESCR:N', 'sum(Number_of_Fires):Q']
tooltip
).interactive()
chart
Monthly/Seasonaly fire frequency
= ['#68A33E','#FFFF82','#FB9E60','#A10702', '#0F0326'] # Add your desired hex colors
custom_colors = df_geo.groupby(['MONTH', 'FIRE_YEAR']).size().unstack()
df_freq_mon "wildfireDataUS.csv")
df_freq_mon.to_csv(# plot monthly frequency of fire events
= 1
counter_fig = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
mon_ticks =[11,5])
plt.figure(figsize=custom_colors, linewidth=.2, linecolor=[.9,.9,.9])
sns.heatmap(df_freq_mon, cmap0.5,12.5), labels=mon_ticks, rotation=0, fontsize=12)
plt.yticks(np.arange(=12);
plt.xticks(fontsize'')
plt.xlabel('Month', fontsize=13)
plt.ylabel(f'Fig {counter_fig}. Number of fire events in US', fontsize=13)
plt.title( plt.tight_layout()
The visualization above shows spread of recorded wildfires across the year, with the darker colors indicating more instances of wildfire. Across the last 24 years, late spring and late summer saw the most burning, with a period of increased fires during the spring of 2006. On average, less than 8,000 fires burn per month across the US - however, certain months tend to tell another story, where more than 16,000 burned in the same time period. There appears to be a slight dip in fires during the early summer months of May and June.
# plot fire frequency by cause and day of year
= df_geo.groupby(['STAT_CAUSE_DESCR','DISCOVERY_DOY']).size().unstack()
cause_by_doy +=1
counter_fig =[10,5])
plt.figure(figsize= sns.heatmap(cause_by_doy,cmap=custom_colors,vmin=0,vmax=500) #'CMRmap_r' <- old color scheme
ax 0.5,366.5,20), labels=range(1,366,20), rotation=0, fontsize=11)
plt.xticks(np.arange(=11)
plt.yticks(fontsize'Fire Cause', fontsize=12)
plt.ylabel('Day of year', fontsize=12)
plt.xlabel(for borders in ["top","right","left","bottom"]:
True)
ax.spines[borders].set_visible(f'Fig {counter_fig}. Distribution of US fires by cause & day of year')
plt.title( plt.tight_layout()
Next, we observe which activities are the most likely causes of wildfires across the country depending on the time of year. According to the visualization above, arson, debris burning, and other mixed causes are commonplace throughout the year, while other common reasons tend to spike in specific seasons. For example, lightning strikes and equipment use are common causes during the summertime, when summer storms bring seasonal spikes in thundering clouds, and when people are more likely to get outside and run types of equipment that have the ability to catch ablaze. An interesting observation lies in the missing and undefined category, which is most likely to be recorded in the first half of the year.
Causes by year
= px.sunburst(df_geo, path=['FIRE_YEAR', 'MONTH', 'STAT_CAUSE_DESCR'], title='Main Causes of Fire by Month and Year')
fig_q =dict(l=0, r=0, b=0, t=40)) # Adjust layout if needed
fig_q.update_layout(margin
# Save the figure as an HTML file
"sunburst_chart.html") fig_q.write_html(
Finally, we observe trends in common causes of wildfires by year. To interact with the visualization, select a year from the center of the wheel. The information becomes increasingly detailed as the wheel is explored outwards.