import folium
from io import BytesIO
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import os
import pathlib
import requests
import subprocess
import pandas as pd

from matplotlib.dates import DateFormatter

# Folium map of gage location on Iowa River at Iowa City, IA

# Define lat/lon of gage location
sg_lat = 41.6566833
sg_lon = -91.541

# Initialize map and tweak settings
my_map = folium.Map(
    # Location to display
    location=(sg_lat, sg_lon),
    # Turns off annoying zooming while trying to scroll to the next cell
    scrollWheelZoom=False)

# Put a marker at the stream gauge location
folium.Marker([sg_lat, sg_lon], popup="USGS River Gage; Iowa River; Iowa City, IA").add_to(my_map)

# Display the map
my_map

# Define URL to data
iihr_url = ('https://waterdata.usgs.gov/nwis/dv?cb_00060=on&format=rdb'
            '&site_no=05454500&legacy=&referred_module=sw&period='
            '&begin_date=1903-06-01&end_date=2023-06-08')
iihr_url

'https://waterdata.usgs.gov/nwis/dv?cb_00060=on&format=rdb&site_no=05454500&legacy=&referred_module=sw&period=&begin_date=1903-06-01&end_date=2023-06-08'

# Download data using a GET HTTP Request
iihr_data = requests.get(iihr_url)
iihr_data

<Response [200]>

# Take a look at the data. What got downloaded?
for i, line in enumerate(iihr_data.content.splitlines()[:35]):
    if not line.startswith(b'#'):
        print(i, line)

27 b'agency_cd\tsite_no\tdatetime\t42884_00060_00003\t42884_00060_00003_cd'
28 b'5s\t15s\t20d\t14n\t10s'
29 b'USGS\t05454500\t1903-06-01\t17800\tA'
30 b'USGS\t05454500\t1903-06-02\t21700\tA'
31 b'USGS\t05454500\t1903-06-03\t21700\tA'
32 b'USGS\t05454500\t1903-06-04\t20000\tA'
33 b'USGS\t05454500\t1903-06-05\t18000\tA'
34 b'USGS\t05454500\t1903-06-06\t15600\tA'

# Open data using pandas
iowa_river_flow = pd.read_csv(
    BytesIO(iihr_data.content),
    comment='#',
    delimiter='\t', 
    skiprows=[27,28],
    names=['Agency', 'Site ID', 'Date', 'Discharge', 'Code'],
    index_col='Date',
    parse_dates=True,
)

iowa_river_flow

# Plot the data using .plot
iowa_river_flow.plot(y='Discharge',
                     ylabel='Discharge (cfs)',
                     legend=False,
                     xlabel='Year',
                     title='Iowa River @ Iowa City, IA (1903-2023)')
plt.show()

# Subset data to 2008
ia_river_2008 = iowa_river_flow.loc['2008']
ia_river_2008

# What is the maximum value of this data?
ia_river_2008.max()

Agency          USGS
Site ID      5454500
Discharge    40900.0
Code             A:e
dtype: object

# Plot the data using pandas
ia_river_2008.plot(y='Discharge')

<Axes: xlabel='Date'>

# Resample data to look at annual maximum discharge
ia_river_annual_max = iowa_river_flow[['Discharge']].resample('Y').max()
ia_river_annual_max

/tmp/ipykernel_5288/3625236627.py:2: FutureWarning: 'Y' is deprecated and will be removed in a future version, please use 'YE' instead.
  ia_river_annual_max = iowa_river_flow[['Discharge']].resample('Y').max()

# What is the maximum value?
ia_river_annual_max.max()

Discharge    40900.0
dtype: float64

# Plot the data using matplotlib

# Define figure (fig) and axis (ax) objects
fig, ax = plt.subplots(figsize=(8,4))

# Create bar plot using ax.bar
ax.bar(ia_river_annual_max.index.year.values,
       ia_river_annual_max['Discharge'],
       color = 'gold',
       edgecolor='black')

# Define axis title and labels
ax.set(title="Annual Maximum Discharge\nIowa River @ Iowa City, IA (1903-2023)",
       xlabel="Year",
       ylabel='Discharge (cfs)')

plt.show()

# What is the value of the df @ row 105 (aka 2008)?
ia_river_annual_max.iloc[105,:]

Discharge    40900.0
Name: 2008-12-31 00:00:00, dtype: float64

# How many rows in this data?
len(ia_river_annual_max)

121

# Sort data smallest to largest
ia_river_discharge_sorted = ia_river_annual_max.sort_values(by="Discharge")

# Count total obervations
n = ia_river_discharge_sorted.shape[0]

# Add a numbered column 1 -> n to use in return calculation for rank
ia_river_discharge_sorted.insert(0, 'rank', range(1, 1 + n))

# Calculate probability - note you may need to adjust this value based upon the time period of your data
ia_river_discharge_sorted["probability"] = (
    (n - ia_river_discharge_sorted["rank"] + 1) / (n + 1))
ia_river_discharge_sorted["return-years"] = (
    1 / ia_river_discharge_sorted["probability"])

ia_river_discharge_sorted

%%capture
%%bash
jupyter nbconvert 08-iowa-flood.ipynb --to html

	Discharge
Date
1903-12-31	21700.0
1904-12-31	8880.0
1905-12-31	8540.0
1906-12-31	11400.0
1907-12-31	4900.0
...	...
2019-12-31	12600.0
2020-12-31	10400.0
2021-12-31	7910.0
2022-12-31	6770.0
2023-12-31	6480.0

	rank	Discharge	probability	return-years
Date
1934-12-31	1	1840.0	0.991803	1.008264
1956-12-31	2	2100.0	0.983607	1.016667
1989-12-31	3	2670.0	0.975410	1.025210
1940-12-31	4	2800.0	0.967213	1.033898
1968-12-31	5	2920.0	0.959016	1.042735
...	...	...	...	...
1993-12-31	117	26200.0	0.040984	24.400000
1944-12-31	118	30100.0	0.032787	30.500000
1947-12-31	119	32600.0	0.024590	40.666667
1918-12-31	120	40500.0	0.016393	61.000000
2008-12-31	121	40900.0	0.008197	122.000000

The 2008 Iowa River Flood¶

Plot description¶

	Agency	Site ID	Discharge	Code
Date
1903-06-01	USGS	5454500	17800.0	A
1903-06-02	USGS	5454500	21700.0	A
1903-06-03	USGS	5454500	21700.0	A
1903-06-04	USGS	5454500	20000.0	A
1903-06-05	USGS	5454500	18000.0	A
...	...	...	...	...
2023-06-04	USGS	5454500	1260.0	A
2023-06-05	USGS	5454500	1350.0	A
2023-06-06	USGS	5454500	1540.0	A
2023-06-07	USGS	5454500	1390.0	A
2023-06-08	USGS	5454500	1110.0	A

	Agency	Site ID	Discharge	Code
Date
2008-01-01	USGS	5454500	1660.0	A
2008-01-02	USGS	5454500	1490.0	A
2008-01-03	USGS	5454500	1250.0	A
2008-01-04	USGS	5454500	1100.0	A
2008-01-05	USGS	5454500	1110.0	A
...	...	...	...	...
2008-12-27	USGS	5454500	2240.0	A
2008-12-28	USGS	5454500	2330.0	A
2008-12-29	USGS	5454500	2910.0	A
2008-12-30	USGS	5454500	5090.0	A
2008-12-31	USGS	5454500	5230.0	A