"""Plot hydrological signatures.
Plots includes daily, monthly and annual hydrograph as well as regime
curve (monthly mean) and flow duration curve.
"""
import calendar
import os
from pathlib import Path
from typing import Dict, Iterable, List, NamedTuple, Optional, Tuple, Union, ValuesView
import matplotlib
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.colors import BoundaryNorm, ListedColormap
from . import helpers
from .exceptions import InvalidInputType
[docs]def signatures(
daily: Union[pd.DataFrame, pd.Series],
precipitation: Optional[pd.Series] = None,
title: Optional[str] = None,
title_ypos: float = 1.02,
figsize: Tuple[int, int] = (14, 13),
threshold: float = 1e-3,
output: Optional[Union[str, Path]] = None,
) -> None:
"""Plot hydrological signatures with w/ or w/o precipitation.
Plots includes daily, monthly and annual hydrograph as well as
regime curve (mean monthly) and flow duration curve. The input
discharges are converted from cms to mm/day based on the watershed
area, if provided.
Parameters
----------
daily : pd.DataFrame or pd.Series
The streamflows in mm/day. The column names are used as labels
on the plot and the column values should be daily streamflow.
precipitation : pd.Series, optional
Daily precipitation time series in mm/day. If given, the data is
plotted on the second x-axis at the top.
title : str, optional
The plot supertitle.
title_ypos : float
The vertical position of the plot title, default to 1.02
figsize : tuple, optional
Width and height of the plot in inches, defaults to (14, 13) inches.
threshold : float, optional
The threshold for cutting off the discharge for the flow duration
curve to deal with log 0 issue, defaults to :math:`1^{-3}` mm/day.
output : str, optional
Path to save the plot as png, defaults to ``None`` which means
the plot is not saved to a file.
"""
discharge, prcp = _prepare_plot_data(daily, precipitation)
fig = plt.figure(figsize=figsize)
gs = fig.add_gridspec(4, 2)
sub_ax = [gs[0, :], gs[1, :], gs[2, 0], gs[2, 1], gs[3, :]]
for sp, f in zip(sub_ax[:-1], discharge._fields[:-2]):
ax = fig.add_subplot(sp)
_discharge = getattr(discharge, f) # noqa: B009
_title = discharge.titles[f]
_unit = discharge.units[f]
qxval = _discharge.index
ax.plot(qxval, _discharge)
ax.set_ylabel(f"$Q$ ({_unit})")
if prcp is not None:
_prcp = getattr(prcp, f) # noqa: B009
_prcp = _prcp.loc[_prcp.index.intersection(qxval)]
ax_p = ax.twinx()
if _prcp.shape[0] > 1000:
ax_p.plot(_prcp.index, _prcp.to_numpy().ravel(), alpha=0.7, color="g")
else:
ax_p.bar(
_prcp.index,
_prcp.to_numpy().ravel(),
alpha=0.7,
width=prcp.bar_width[f],
color="g",
align="edge",
)
ax_p.set_ylim(_prcp.max().to_numpy()[0] * 2.5, 0)
ax_p.set_ylabel(f"$P$ ({_unit})")
ax.set_xlim(qxval[0], qxval[-1])
ax.set_xlabel("")
ax.set_title(_title)
if len(_discharge.columns) > 1 and f == "daily":
ax.legend(
_discharge.columns,
bbox_to_anchor=(0.0, 1.02, 1.0, 0.102),
loc="lower right",
ncol=len(_discharge),
)
if f == "annual":
ax.xaxis.set_major_locator(matplotlib.dates.YearLocator())
ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y"))
ax = fig.add_subplot(sub_ax[-1])
for col in discharge.daily:
dc = discharge.ranked[[col, f"{col}_rank"]]
dc = dc[dc > threshold]
ax.plot(dc[f"{col}_rank"], dc[col], label=col)
ax.set_yscale("log")
ax.set_xlim(0, 100)
ax.set_xlabel("% Exceedance")
ax.set_ylabel(fr"$\log(Q)$ ({discharge.units['ranked']})")
ax.set_title("Flow Duration Curve")
plt.tight_layout()
plt.suptitle(title, size=16, y=title_ypos)
if output is not None:
_check_dir(output)
plt.savefig(output, dpi=300, bbox_inches="tight")
[docs]class PlotDataType(NamedTuple):
"""Data structure for plotting hydrologic signatures."""
daily: pd.DataFrame
monthly: pd.DataFrame
annual: pd.DataFrame
mean_monthly: pd.DataFrame
ranked: pd.DataFrame
bar_width: Dict[str, int]
titles: Dict[str, str]
units: Dict[str, str]
def _prepare_plot_data(
daily: Union[pd.DataFrame, pd.Series],
precipitation: Optional[Union[pd.DataFrame, pd.Series]] = None,
) -> Tuple[PlotDataType, Optional[PlotDataType]]:
if not isinstance(daily, (pd.DataFrame, pd.Series)):
raise InvalidInputType("daily", "pd.DataFrame or pd.Series")
discharge = prepare_plot_data(daily)
if not isinstance(precipitation, (pd.DataFrame, pd.Series)) and precipitation is not None:
raise InvalidInputType("precipitation", "pd.DataFrame or pd.Series")
prcp = None if precipitation is None else prepare_plot_data(precipitation)
return discharge, prcp
[docs]def prepare_plot_data(daily: Union[pd.DataFrame, pd.Series]) -> PlotDataType:
"""Generae a structured data for plotting hydrologic signatures.
Parameters
----------
daily : pandas.Series or pandas.DataFrame
The data to be processed
ranked : bool, optional
Whether to sort the data by rank for plotting flow duration curve, defaults to False.
Returns
-------
NamedTuple
Containing ``daily, ``monthly``, ``annual``, ``mean_monthly``, ``ranked`` fields.
"""
if isinstance(daily, pd.Series):
daily = daily.to_frame()
monthly = daily.groupby(pd.Grouper(freq="M")).sum()
annual = daily.groupby(pd.Grouper(freq="Y")).sum()
month_abbr = dict(enumerate(calendar.month_abbr))
mean_month = daily.groupby(daily.index.month).mean()
mean_month.index = mean_month.index.map(month_abbr)
ranked = exceedance(daily)
_titles = [
"Total Hydrograph (daily)",
"Total Hydrograph (monthly)",
"Total Hydrograph (annual)",
"Regime Curve (monthly mean)",
"Flow Duration Curve",
]
_units = [
"mm/day",
"mm/month",
"mm/year",
"mm/month",
"mm/day",
]
fields = PlotDataType._fields
titles = dict(zip(fields[:-1], _titles))
units = dict(zip(fields[:-1], _units))
bar_width = dict(zip(fields[:-2], [1, 30, 365, 1]))
return PlotDataType(daily, monthly, annual, mean_month, ranked, bar_width, titles, units)
[docs]def cover_legends() -> Tuple[ListedColormap, BoundaryNorm, List[float]]:
"""Colormap (cmap) and their respective values (norm) for land cover data legends."""
nlcd_meta = helpers.nlcd_helper()
bounds = list(nlcd_meta["colors"].keys())
cmap = ListedColormap(list(nlcd_meta["colors"].values()))
norm = BoundaryNorm(bounds, cmap.N)
levels = bounds + [100]
return cmap, norm, levels
[docs]def exceedance(daily: Union[pd.DataFrame, pd.Series]) -> Union[pd.DataFrame, pd.Series]:
"""Compute Flow duration (rank, sorted obs)."""
if isinstance(daily, pd.Series):
daily = daily.to_frame()
ranks = daily.rank(ascending=False, pct=True) * 100
fdc = [
pd.DataFrame({c: daily[c], f"{c}_rank": ranks[c]})
.sort_values(by=f"{c}_rank")
.reset_index(drop=True)
for c in daily
]
return pd.concat(fdc, axis=1)
def _check_dir(
fpath_itr: Optional[
Union[ValuesView[Optional[Union[str, Path]]], List[Optional[Union[str, Path]]], str, Path]
]
) -> None:
"""Create parent directory for a file if doesn't exist."""
if isinstance(fpath_itr, (str, Path)):
fpath_itr = [fpath_itr]
elif not isinstance(fpath_itr, Iterable):
raise InvalidInputType("fpath_itr", "str or iterable")
for f in fpath_itr:
if f is None:
continue
parent = Path(f).parent
if not parent.is_dir():
try:
os.makedirs(parent)
except OSError:
raise OSError(f"Parent directory cannot be created: {parent}")