diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml
new file mode 100644
index 0000000..745b79a
--- /dev/null
+++ b/.github/workflows/docker-build.yaml
@@ -0,0 +1,78 @@
+name: Docker Build
+
+on:
+ workflow_call:
+ inputs:
+ push_to_registry:
+ type: boolean
+ default: false
+ required: false
+ version:
+ type: string
+ required: true
+ branch:
+ type: string
+ required: true
+ platforms:
+ type: string
+ default: 'linux/amd64'
+ required: false
+ secrets:
+ AWS_ACCOUNT_NUMBER:
+ required: true
+ AWS_ROLE:
+ required: true
+ AWS_REGION:
+ required: true
+
+permissions:
+ contents: read
+ packages: write
+ id-token: write
+
+jobs:
+ build-docker:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Configure AWS Credentials
+ uses: aws-actions/configure-aws-credentials@v4
+ with:
+ role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_NUMBER }}:role/${{ secrets.AWS_ROLE }}
+ role-session-name: updateimage
+ aws-region: ${{ secrets.AWS_REGION }}
+
+ - name: Login to Amazon ECR
+ id: login-ecr
+ uses: aws-actions/amazon-ecr-login@v2
+ with:
+ registries: ${{ secrets.AWS_ACCOUNT_NUMBER }}
+
+ - name: Set Docker tags based on branch
+ id: set_tags
+ env:
+ ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
+ run: |
+ if [[ "${{ inputs.branch }}" == "main" ]]; then
+ echo "TAGS=$ECR_REGISTRY/hydroshift:latest,$ECR_REGISTRY/hydroshift:${{ inputs.version }}" >> $GITHUB_OUTPUT
+ else
+ echo "TAGS=$ECR_REGISTRY/hydroshift:pr-${{ github.event.pull_request.number }}" >> $GITHUB_OUTPUT
+ fi
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Build and push Docker image
+ uses: docker/build-push-action@v5
+ with:
+ context: .
+ platforms: ${{ inputs.platforms }}
+ push: ${{ inputs.push_to_registry }}
+ tags: ${{ steps.set_tags.outputs.TAGS }}
+ cache-from: type=gha
+ cache-to: type=gha,mode=max
diff --git a/.github/workflows/get-version.yaml b/.github/workflows/get-version.yaml
new file mode 100644
index 0000000..952d9ae
--- /dev/null
+++ b/.github/workflows/get-version.yaml
@@ -0,0 +1,44 @@
+name: Get Version
+
+on:
+ workflow_call:
+ outputs:
+ version:
+ description: "The version string extracted from hydroshift.__version__"
+ value: ${{ jobs.get-version.outputs.version }}
+ is_new_version:
+ description: "Whether this version is new (not previously tagged)"
+ value: ${{ jobs.get-version.outputs.is_new_version }}
+
+jobs:
+ get-version:
+ runs-on: ubuntu-latest
+ outputs:
+ version: ${{ steps.get_version.outputs.VERSION }}
+ is_new_version: ${{ steps.check_tag.outputs.IS_NEW_VERSION }}
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.12'
+
+ - name: Extract version from hydroshift
+ id: get_version
+ run: |
+ VERSION=$(python -c "import hydroshift; print(hydroshift.__version__)")
+ echo "VERSION=$VERSION" >> "$GITHUB_OUTPUT"
+
+ - name: Check if version exists as tag
+ id: check_tag
+ run: |
+ if git tag | grep -q "v${{ steps.get_version.outputs.VERSION }}"; then
+ echo "Version ${{ steps.get_version.outputs.VERSION }} already exists as a tag."
+ echo "IS_NEW_VERSION=false" >> "$GITHUB_OUTPUT"
+ else
+ echo "Version ${{ steps.get_version.outputs.VERSION }} does not exist as a tag."
+ echo "IS_NEW_VERSION=true" >> "$GITHUB_OUTPUT"
+ fi
diff --git a/.github/workflows/main-push.yaml b/.github/workflows/main-push.yaml
new file mode 100644
index 0000000..48f6820
--- /dev/null
+++ b/.github/workflows/main-push.yaml
@@ -0,0 +1,47 @@
+name: Main Branch Push
+
+on:
+ push:
+ branches: [ "main" ]
+ paths:
+ - 'hydroshift/**'
+ - 'tests/**'
+ - '.devcontainer/Dockerfile'
+ - '.dockerignore'
+ - '.github/workflows/**'
+ - 'pyproject.toml'
+ workflow_dispatch:
+
+permissions:
+ contents: write
+ packages: write
+ id-token: write
+
+jobs:
+ get-version:
+ name: Extract Package Version
+ uses: ./.github/workflows/get-version.yaml
+
+ check-version-exists:
+ name: Verify Version is New
+ needs: get-version
+ runs-on: ubuntu-latest
+ steps:
+ - name: Fail if version already exists
+ if: needs.get-version.outputs.is_new_version != 'true'
+ run: |
+ echo "::error::ERROR: Version ${{ needs.get-version.outputs.version }} already exists as a tag."
+ echo "::error::Please update the version in hydroshift/__init__.py before releasing to main."
+ exit 1
+ - name: Version check passed
+ run: echo "Version ${{ needs.get-version.outputs.version }} is new. Proceeding with release."
+
+ docker-build-push:
+ name: Build and Push Docker Image
+ needs: [get-version, check-version-exists]
+ uses: ./.github/workflows/docker-build.yaml
+ with:
+ push_to_registry: true
+ version: ${{ needs.get-version.outputs.version }}
+ branch: 'main'
+ platforms: 'linux/amd64'
diff --git a/.github/workflows/pr-check.yaml b/.github/workflows/pr-check.yaml
new file mode 100644
index 0000000..e742487
--- /dev/null
+++ b/.github/workflows/pr-check.yaml
@@ -0,0 +1,33 @@
+name: PR Checks
+
+on:
+ pull_request:
+ branches: [ "main" ]
+ paths:
+ - 'hydroshift/**'
+ - 'tests/**'
+ - '/.devcontainer/Dockerfile'
+ - '.dockerignore'
+ - '.github/workflows/**'
+ - 'pyproject.toml'
+
+permissions:
+ contents: read
+ packages: write
+ id-token: write
+
+jobs:
+ get-version:
+ name: Extract Package Version
+ uses: ./.github/workflows/get-version.yaml
+
+ docker-build-test:
+ name: Test Docker Build
+ needs: get-version
+ uses: ./.github/workflows/docker-build.yaml
+ with:
+ push_to_registry: false
+ version: ${{ needs.get-version.outputs.version }}
+ branch: ${{ github.base_ref }}
+ platforms: 'linux/amd64'
+ secrets: inherit
diff --git a/.devcontainer/Dockerfile b/Dockerfile
similarity index 100%
rename from .devcontainer/Dockerfile
rename to Dockerfile
diff --git a/hydroshift/__init__.py b/hydroshift/__init__.py
index e69de29..3dc1f76 100644
--- a/hydroshift/__init__.py
+++ b/hydroshift/__init__.py
@@ -0,0 +1 @@
+__version__ = "0.1.0"
diff --git a/hydroshift/_pages/__init__.py b/hydroshift/_pages/__init__.py
index f8097f9..6548600 100644
--- a/hydroshift/_pages/__init__.py
+++ b/hydroshift/_pages/__init__.py
@@ -1,2 +1,3 @@
from hydroshift._pages.changepoint import changepoint
+from hydroshift._pages.homepage import homepage
from hydroshift._pages.summary import summary
diff --git a/hydroshift/_pages/changepoint.py b/hydroshift/_pages/changepoint.py
index 319a1df..8f23cf6 100644
--- a/hydroshift/_pages/changepoint.py
+++ b/hydroshift/_pages/changepoint.py
@@ -1,43 +1,43 @@
"""A tool to identify changepoints in hydrologic timeseries."""
import traceback
+import uuid
from collections import defaultdict
from dataclasses import dataclass, field
from io import BytesIO
-import numpy as np
import pandas as pd
import streamlit as st
-from data_retrieval import (
- get_ams,
-)
from docx import Document
from docx.shared import Inches
-from plotly import graph_objects
-from plots import combo_cpm, plot_lp3
-
-from hydroshift.consts import METRICS, VALID_ARL0S
-from hydroshift.data_retrieval import log_pearson_iii
-from hydroshift.stats.tests import cp_pvalue_batch, cpm_process_stream
-from hydroshift.text.changepoint import references, test_description
+from plotly.graph_objects import Figure
+
+from hydroshift.consts import (
+ CP_F1_CAPTION,
+ CP_F2_CAPTION,
+ CP_T1_CAPTION,
+ CP_T2_CAPTION,
+ METRICS,
+ VALID_ARL0S,
+)
+from hydroshift.utils.changepoint import cp_pvalue_batch, cpm_process_stream
+from hydroshift.utils.common import num_2_word
+from hydroshift.utils.data_retrieval import Gage
+from hydroshift.utils.ffa import LP3Analysis
+from hydroshift.utils.jinja import render_template, write_template
+from hydroshift.utils.plots import combo_cpm, plot_lp3
@dataclass
class ChangePointAnalysis:
"""OOP representation of the changepoint analysis."""
- data: dict = field(default_factory=pd.DataFrame)
- missing_years: int = 0
+ gage: Gage = field(default_factory=Gage)
pval_df: dict = None
cp_dict: dict = field(default_factory=dict)
ffa_plot = None
ffa_df: dict = field(default_factory=pd.DataFrame)
- @property
- def ts(self) -> np.ndarray:
- """Timeseries from data."""
- return self.data["peak_va"].values
-
@property
def nonstationary(self) -> bool:
"""Whether or not a nonstationarity was identified."""
@@ -70,7 +70,9 @@ def get_change_windows(self, max_dist: float = 10) -> tuple:
current_group_tests = set(test_dict[dates[0]])
for i in range(1, len(dates)):
- if ((dates[i] - current_group[-1]).days / 365) < max_dist: # 10 years in days
+ if (
+ (dates[i] - current_group[-1]).days / 365
+ ) < max_dist: # 10 years in days
current_group.append(dates[i])
current_group_tests = current_group_tests.union(test_dict[dates[i]])
else:
@@ -85,43 +87,28 @@ def get_change_windows(self, max_dist: float = 10) -> tuple:
return groups, test_counts
- def get_max_pvalue(self):
+ def get_max_pvalue(self) -> tuple[float, int]:
"""Get the minimum p value where all tests agree and count how often that occurred."""
all_tests = self.pval_df.fillna(1).max(axis=1).to_frame(name="pval")
all_tests["run"] = ((all_tests != all_tests.shift(1)) * 1).cumsum()
- for ind, r in all_tests.groupby("pval").agg({"run": pd.Series.nunique}).sort_index().iterrows():
+ for ind, r in (
+ all_tests.groupby("pval")
+ .agg({"run": pd.Series.nunique})
+ .sort_index()
+ .iterrows()
+ ):
if r.run > 0:
min_p = ind
count = r.run
break
return min_p, count
- def num_2_word(self, number: int) -> str:
- """Convert numbers less than 10 to words."""
- if number > 9:
- return number
- else:
- d = {
- 0: "no",
- 1: "one",
- 2: "two",
- 3: "three",
- 4: "four",
- 5: "five",
- 6: "six",
- 7: "seven",
- 8: "eight",
- 9: "nine",
- }
- return d[number]
-
@property
- def summary_plot(self) -> graph_objects:
+ def summary_plot(self) -> Figure:
"""Create plotly plot to summarize analysis."""
- return combo_cpm(self.data, self.pval_df, self.cp_dict)
+ return combo_cpm(self.gage.ams, self.pval_df, self.cp_dict)
@property
- @st.cache_resource
def summary_png(self) -> BytesIO:
"""Export summary plot to png in memory."""
bio = BytesIO()
@@ -136,10 +123,11 @@ def ffa_png(self):
return bio
@property
- @st.cache_resource
def cp_df(self):
"""Get a dataframe representing changepoints identified in the streaming analysis."""
- cpa_df = pd.DataFrame.from_dict(self.cp_dict, orient="index", columns=["Tests Identifying Change"])
+ cpa_df = pd.DataFrame.from_dict(
+ self.cp_dict, orient="index", columns=["Tests Identifying Change"]
+ )
cpa_df.index = cpa_df.index.date
return cpa_df
@@ -151,95 +139,66 @@ def title(self) -> str:
@property
def summary_text(self) -> str:
"""A text summary of the changepoint analysis."""
- if self.nonstationary:
- end_text = """
- some form of nonstationarity (e.g., land use change, climate change, flow regulation, etc) may be influencing flow patterns at this site.
- """
- else:
- end_text = """an assumption of nonstationary conditions is likely reasonable."""
- cp_count = self.num_2_word(len(self.cp_dict))
- if len(self.cp_dict) == 1:
- plural_text = "point was"
- else:
- plural_text = "points were"
- return (
- """
- There is **{}** evidence that the annual maximum series data at USGS gage {} are nonstationary in time. Four change
- point detection tests were completed to assess changes in the mean, variance, and overall distribution of flood
- peaks across the period of record. Significant change points were identified using a Type I error rate of 1 in
- {} and ignoring significant changes in the first {} years of data. {} statistically significant change
- {} identified, indicating that {}
- """
- ).format(
- self.evidence_level,
- st.session_state.gage_id,
- st.session_state.arlo_slider,
- st.session_state.burn_in,
- cp_count,
- plural_text,
- end_text,
- )
+ payload = {
+ "evidence_level": self.evidence_level,
+ "gage_id": self.gage.gage_id,
+ "arl0": st.session_state.arlo_slider,
+ "burn_in": st.session_state.burn_in,
+ "cp_count": num_2_word(len(self.cp_dict)),
+ "plural": len(self.cp_dict) != 1,
+ "nonstationary": self.nonstationary,
+ }
+ return render_template("changepoint_summary_1.md", payload)
+
+ @property
+ def test_description(self):
+ """Analysis methodology."""
+ payload = {
+ "alr0": st.session_state.arlo_slider,
+ "burn_in": st.session_state.burn_in,
+ }
+ return render_template("changepoint_description.md", payload)
@property
def results_text(self) -> str:
"""A detailed description of the results."""
- # Static analysis
+ # Gather stats
min_p, p_count = self.get_max_pvalue()
if min_p == 0.001:
evidence = "strong"
elif min_p == 0.005:
evidence = "moderate"
+ elif self.pval_df.isna().all().all():
+ evidence = "no"
elif min_p < 1:
evidence = "minor"
- if p_count > 1:
- plural = "s"
- else:
- plural = ""
- if min_p < 1:
- sentence = "A minimum p-value of {} was obtained at {} contiguous time period{}.".format(
- min_p, self.num_2_word(p_count), plural
- )
- else:
- sentence = "There were no time-periods where a p-value less than 0.05 was observed in all tests."
- if self.pval_df.isna().all().all():
- evidence = "no"
- else:
- evidence = "minimal"
- p1 = """
- The static changepoint test showed {} evidence of a changepoint in the timeseries. {} The p-value reflects the probability that the distribution of
- flood peaks before that date has the **same** distribution as the flood peaks after the date.
- """.format(
- evidence, sentence
- )
-
- # Streaming analysis
- if len(self.cp_dict) == 1:
- p2 = """
- The streaming analysis identified one statistically significant changepoint. This changepoint was identified
- by {} distinct tests: {}."""
- else:
- groups, test_counts = self.get_change_windows()
- if len(groups) > 0:
- plural = "s"
- else:
- plural = ""
- p2 = """
- The streaming analysis identified {} statistically significant changepoints. These changepoints broadly fell
- into {} window{} where tests identified changepoints not more than 10 years apart. For a full summary of which
- tests identified changes at which dates, see table 2.
- """.format(
- self.num_2_word(len(self.cp_dict)), self.num_2_word(len(groups)), plural
- )
- return "\n\n".join([p1, p2])
+ groups, _ = self.get_change_windows()
+
+ # format
+ payload = {
+ "evidence": evidence,
+ "min_p": min_p,
+ "p_count": num_2_word(p_count),
+ "plural": p_count > 1,
+ "len_cp": len(self.cp_dict),
+ "len_cp_str": num_2_word(len(self.cp_dict)),
+ "test_count": num_2_word(
+ len(self.cp_dict[next(iter(self.cp_dict))].split(","))
+ ),
+ "grp_count": num_2_word(len(groups)),
+ "plural_2": len(groups) > 0,
+ }
+ return render_template("changepoint_summary_2.md", payload)
@property
def ffa_text(self) -> str:
- return """
- Based on the changepoint analysis results, a modified flood frequency analysis was conducted
- using truncated periods of record. The truncated periods correspond with times when the
- hydrologic regime appears to be stationary across time. Results from this analysis are shown
- in Figure 2 and Table 2.
- """
+ """Methodology for the modified FFA."""
+ return render_template("changepoint_ffa.md")
+
+ @property
+ def references(self) -> str:
+ """Citations."""
+ return render_template("changepoint_references.md")
@property
def word_data(self) -> BytesIO:
@@ -250,10 +209,7 @@ def word_data(self) -> BytesIO:
self.add_markdown_to_doc(document, self.summary_text)
document.add_picture(self.summary_png, width=Inches(6.5))
document.add_heading("Changepoint detection method", level=2)
- self.add_markdown_to_doc(
- document,
- test_description.format(st.session_state.arlo_slider, st.session_state.burn_in, st.session_state.burn_in),
- )
+ self.add_markdown_to_doc(document, self.test_description)
if len(self.cp_dict) > 0:
document.add_heading("Changepoint detection results", level=2)
self.add_markdown_to_doc(document, self.results_text)
@@ -264,18 +220,22 @@ def word_data(self) -> BytesIO:
document.add_heading("Modified flood frequency analysis", level=2)
self.add_markdown_to_doc(document, self.ffa_text)
document.add_picture(self.ffa_png, width=Inches(6.5))
- self.add_markdown_to_doc(document, "**Figure 2.** Modified flood frequency analysis.")
+ self.add_markdown_to_doc(
+ document, "**Figure 2.** Modified flood frequency analysis."
+ )
self.add_markdown_to_doc(document, "**Table 2.** Modified flood quantiles.")
self.add_table_from_df(self.ffa_df, document, index_name="Regime Period")
document.add_heading("References", level=2)
- self.add_markdown_to_doc(document, references)
+ self.add_markdown_to_doc(document, self.references)
out = BytesIO()
document.save(out)
return out
- def add_table_from_df(self, df: pd.DataFrame, document: Document, index_name: str = None):
+ def add_table_from_df(
+ self, df: pd.DataFrame, document: Document, index_name: str = None
+ ):
if index_name is not None:
df = df.copy()
cols = df.columns
@@ -301,26 +261,20 @@ def add_markdown_to_doc(self, document: Document, text: str):
p.add_run(r).bold = bold
bold = not bold
- def get_data(self, gage_id: int):
+ def validate_data(self):
"""Cache results of get_ams."""
- data = get_ams(gage_id)
+ data = self.gage.ams
# Validate
if data is None:
st.session_state.valid_data = False
st.session_state.data_comment = "Unable to retrieve data."
- elif data["peaks"] is None:
- st.session_state.valid_data = False
- st.session_state.data_comment = "Unable to retrieve data."
- elif len(data["peaks"]) < st.session_state.burn_in:
+ elif len(data) < st.session_state.burn_in:
st.session_state.valid_data = False
- st.session_state.data_comment = (
- "Not enough peaks available for analysis. {} peaks found, but burn-in length was {}".format(
- len(data["peaks"]), st.session_state.burn_in
- )
+ st.session_state.data_comment = "Not enough peaks available for analysis. {} peaks found, but burn-in length was {}".format(
+ len(data["peaks"]), st.session_state.burn_in
)
else:
- self.data, self.missing_years = data["peaks"], data["missing_years"]
st.session_state.valid_data = True
@@ -338,51 +292,73 @@ def define_variables():
# Instantiate analysis class and get data
if "changepoint" not in st.session_state:
- st.session_state.changepoint = ChangePointAnalysis(st.session_state.gage_id)
- st.session_state.changepoint.get_data(st.session_state.gage_id)
+ st.session_state.changepoint = ChangePointAnalysis(st.session_state.gage)
+ st.session_state.changepoint.validate_data()
+
+
+def refresh_data_editor():
+ """Set a new uuid for the data entry widget."""
+ st.session_state.data_editor_key = str(uuid.uuid4())
def make_sidebar():
"""User control for analysis."""
with st.sidebar:
st.title("Settings")
- st.session_state["gage_id"] = st.text_input("Enter USGS Gage Number:", st.session_state["gage_id"])
- st.select_slider(
- "False Positive Rate (1 in #)",
- options=VALID_ARL0S,
- value=1000,
- key="arlo_slider",
- label_visibility="visible",
+ st.session_state["gage_id"] = st.text_input(
+ "Enter USGS Gage Number:",
+ st.session_state["gage_id"],
+ on_change=refresh_data_editor,
)
+ st.session_state.gage = Gage(st.session_state["gage_id"])
+ with st.form("changepoint_params"):
+ st.select_slider(
+ "False Positive Rate (1 in #)",
+ options=VALID_ARL0S,
+ value=1000,
+ key="arlo_slider",
+ label_visibility="visible",
+ )
- st.number_input(
- "Burn-in Period",
- 0,
- 100,
- 20,
- key="burn_in",
- label_visibility="visible",
- )
- st.text("Flood Frequency Analysis")
- init_data = pd.DataFrame(columns=["Regime Start", "Regime End"])
- init_data["Regime Start"] = pd.to_datetime(init_data["Regime Start"])
- init_data["Regime End"] = pd.to_datetime(init_data["Regime End"])
-
- start_config = st.column_config.DateColumn("Regime Start", format="D/M/YYYY")
- end_config = st.column_config.DateColumn("Regime End", format="D/M/YYYY")
- st.data_editor(
- init_data,
- num_rows="dynamic",
- key="ffa_regimes",
- column_config={"Regime Start": start_config, "Regime End": end_config},
- )
+ st.number_input(
+ "Burn-in Period",
+ 0,
+ 100,
+ 20,
+ key="burn_in",
+ label_visibility="visible",
+ )
+ st.text("Flood Frequency Analysis")
+ init_data = pd.DataFrame(columns=["Regime Start", "Regime End"])
+ init_data["Regime Start"] = pd.to_datetime(init_data["Regime Start"])
+ init_data["Regime End"] = pd.to_datetime(init_data["Regime End"])
+
+ # Make data editor. Unique key allows for refreshing
+ if "data_editor_key" not in st.session_state:
+ refresh_data_editor()
+ start_config = st.column_config.DateColumn(
+ "Regime Start", format="D/M/YYYY"
+ )
+ end_config = st.column_config.DateColumn("Regime End", format="D/M/YYYY")
+ st.data_editor(
+ init_data,
+ num_rows="dynamic",
+ key=st.session_state.data_editor_key,
+ column_config={"Regime Start": start_config, "Regime End": end_config},
+ )
+ st.form_submit_button(label="Run Analysis")
+
+ st.divider()
+ write_template("data_sources_side_bar.html")
def run_analysis():
"""Run the change point model analysis."""
cpa = st.session_state.changepoint
- cpa.pval_df = get_pvalues(cpa.data)
- cpa.cp_dict = get_changepoints(cpa.data, st.session_state.arlo_slider, st.session_state.burn_in)
+ cpa.pval_df = get_pvalues(cpa.gage.ams)
+ cpa.cp_dict = get_changepoints(
+ cpa.gage.ams, st.session_state.arlo_slider, st.session_state.burn_in
+ )
@st.cache_data
@@ -412,19 +388,26 @@ def get_changepoints(data: pd.DataFrame, arl0: int, burn_in: int) -> dict:
@st.cache_data
def ffa_analysis(data: pd.DataFrame, regimes: list):
"""Run multiple flood frequency analyses for different regimes."""
- ffa_dict = {}
+ ffas = []
for r in regimes:
if "Regime Start" in r and "Regime End" in r:
sub = data.loc[r["Regime Start"] : r["Regime End"]].copy()
- peaks = sub["peak_va"]
- lp3 = log_pearson_iii(peaks)
+ peaks = sub["peak_va"].values
label = f'{r["Regime Start"]} - {r["Regime End"]}'
- ffa_dict[label] = {"peaks": sub, "lp3": lp3}
- if len(ffa_dict) > 0:
- ffa_plot = plot_lp3(ffa_dict, st.session_state.gage_id, multi_series=True)
- ffa_df = pd.DataFrame.from_dict({k: ffa_dict[k]["lp3"] for k in ffa_dict}, orient="index")
- renames = {c: f"Q{c}" for c in ffa_df.columns}
- ffa_df = ffa_df.rename(columns=renames)
+ lp3 = LP3Analysis(
+ st.session_state.gage_id,
+ peaks,
+ use_map_skew=False,
+ est_method="MOM",
+ label=label,
+ )
+ ffas.append(lp3)
+ if len(ffas) > 0:
+ ffa_plot = plot_lp3(ffas)
+ ffa_df = ffas[0].quantile_df[["Recurrence Interval (years)"]]
+ ffa_df = ffa_df.set_index("Recurrence Interval (years)")
+ for i in ffas:
+ ffa_df[i.label] = i.quantile_df["Discharge (cfs)"].values
return ffa_plot, ffa_df
else:
return None, None
@@ -434,38 +417,37 @@ def make_body():
"""Assemble main app body."""
left_col, right_col = st.columns([2, 1]) # Formatting
with left_col:
- cpa = st.session_state.changepoint
+ cpa: ChangePointAnalysis = st.session_state.changepoint
st.title(cpa.title)
warnings()
st.header("Summary")
st.markdown(cpa.summary_text)
st.plotly_chart(cpa.summary_plot, use_container_width=True)
- st.markdown("**Figure 1.** Statistical changepoint analysis.")
+ st.markdown(CP_F1_CAPTION)
st.header("Changepoint detection method")
- st.markdown(
- test_description.format(st.session_state.arlo_slider, st.session_state.burn_in, st.session_state.burn_in)
- )
+ st.markdown(cpa.test_description)
if len(cpa.cp_dict) > 0:
st.header("Changepoint detection results")
st.markdown(cpa.results_text)
if len(cpa.cp_dict) > 1:
st.table(cpa.cp_df)
- st.markdown(
- "**Table 1.** Results of the changepoint analysis, listing dates when a significant change was identified for each test statistic."
- )
+ st.markdown(CP_T1_CAPTION)
st.header("Modified flood frequency analysis")
- if len(st.session_state.ffa_regimes["added_rows"]) > 0:
- ffa_plot, ffa_df = ffa_analysis(cpa.data, st.session_state.ffa_regimes["added_rows"])
+ if len(st.session_state[st.session_state.data_editor_key]["added_rows"]) > 0:
+ ffa_plot, ffa_df = ffa_analysis(
+ cpa.gage.ams,
+ st.session_state[st.session_state.data_editor_key]["added_rows"],
+ )
if ffa_plot is not None and ffa_df is not None:
st.markdown(cpa.ffa_text)
cpa.ffa_plot = ffa_plot
cpa.ffa_df = ffa_df
st.plotly_chart(ffa_plot, use_container_width=True)
- st.markdown("**Figure 2.** Modified flood frequency analysis.")
- st.markdown("**Table 2.** Modified flood quantiles.")
+ st.markdown(CP_F2_CAPTION)
+ st.markdown(CP_T2_CAPTION)
st.table(ffa_df)
else:
st.info(
@@ -473,17 +455,28 @@ def make_body():
)
st.header("References")
- st.markdown(references)
+ st.markdown(cpa.references)
- st.download_button("Download analysis", cpa.word_data, f"changepoint_analysis_{st.session_state.gage_id}.docx")
+ st.download_button(
+ "Download analysis",
+ cpa.word_data,
+ f"changepoint_analysis_{st.session_state.gage_id}.docx",
+ )
def warnings():
"""Print warnings on data validity etc."""
- if st.session_state.changepoint.data is not None and "peak_va" in st.session_state.changepoint.data.columns:
- if st.session_state.changepoint.missing_years:
+ if (
+ st.session_state.changepoint.gage.ams is not None
+ and "peak_va" in st.session_state.changepoint.gage.ams.columns
+ ):
+ if st.session_state.changepoint.gage.missing_dates_ams:
st.warning(
- f"Missing {len(st.session_state.changepoint.missing_years)} dates between {st.session_state.changepoint.data.index.min()} and {st.session_state.changepoint.data.index.max()}"
+ "Missing {} dates between {} and {}".format(
+ len(st.session_state.changepoint.gage.missing_dates_ams),
+ st.session_state.changepoint.gage.ams.index.min(),
+ st.session_state.changepoint.gage.ams.index.max(),
+ )
)
diff --git a/hydroshift/_pages/homepage.py b/hydroshift/_pages/homepage.py
new file mode 100644
index 0000000..f3c2a00
--- /dev/null
+++ b/hydroshift/_pages/homepage.py
@@ -0,0 +1,40 @@
+import streamlit as st
+
+from hydroshift.consts import DEFAULT_GAGE
+from hydroshift.utils.jinja import write_template
+
+
+def homepage():
+ """Landing page for app."""
+ st.set_page_config(page_title="HydroShift", layout="wide")
+
+ left_col, right_col, _ = st.columns([2, 1.5, 0.2], gap="large")
+
+ with left_col:
+ st.title("HydroShift")
+ st.subheader("USGS Streamflow Change Detection Tool")
+ write_template("app_description.md")
+
+ st.write("") # blank line for more space
+ gage_input = st.text_input("Enter a USGS Gage Number to begin:")
+ col1, col2 = st.columns([1, 8])
+ with col1:
+ submit = st.button("Submit")
+ with col2:
+ demo = st.button("Use Demo Data")
+
+ if submit and gage_input:
+ st.session_state["gage_id"] = gage_input
+ if demo:
+ st.session_state["gage_id"] = DEFAULT_GAGE
+ if st.session_state["gage_id"] is not None:
+ # try:
+ # st.session_state["site_data"] = load_site_data(st.session_state["gage_id"])
+ # except ValueError:
+ # st.error(f"Data not found for gage: {st.session_state['gage_id']}")
+ st.rerun()
+ with right_col:
+ st.title("")
+ st.image("hydroshift/images/logo_base.png")
+
+ write_template("footer.html")
diff --git a/hydroshift/_pages/summary.py b/hydroshift/_pages/summary.py
index f1259b1..59caf23 100644
--- a/hydroshift/_pages/summary.py
+++ b/hydroshift/_pages/summary.py
@@ -1,12 +1,13 @@
+from datetime import date
+
import folium
import streamlit as st
-from data_retrieval import (
- get_ams,
- get_daily_values,
- get_flow_stats,
- get_monthly_values,
-)
-from plots import (
+from streamlit_folium import st_folium
+
+from hydroshift.utils.data_retrieval import Gage
+from hydroshift.utils.ffa import LP3Analysis
+from hydroshift.utils.jinja import write_template
+from hydroshift.utils.plots import (
plot_ams,
plot_ams_seasonal,
plot_daily_mean,
@@ -14,7 +15,118 @@
plot_lp3,
plot_monthly_mean,
)
-from streamlit_folium import st_folium
+
+
+def section_ams(gage: Gage):
+ """Display the AMS section."""
+ if gage.ams is not None:
+ if gage.missing_dates_ams is not None and len(gage.missing_dates_ams) > 0:
+ st.warning(f"Missing {len(gage.missing_dates_ams)} AMS records")
+ st.plotly_chart(plot_ams(gage.ams, gage.gage_id))
+ show_data = st.checkbox("Show AMS Data Table")
+ if show_data:
+ st.dataframe(gage.ams)
+
+
+def section_flow_stats(gage: Gage):
+ """Display the flow statistics section."""
+ if gage.flow_stats is not None:
+ st.plotly_chart(plot_flow_stats(gage.flow_stats, gage.gage_id))
+ show_data = st.checkbox("Show Daily Stats Data Table")
+ if show_data:
+ st.dataframe(gage.flow_stats)
+
+
+def section_lp3(gage: Gage):
+ """Display the FFA section."""
+ if gage.ams is not None:
+ # Options
+ opt_col_1, opt_col_2 = st.columns(2)
+ with opt_col_1:
+ est_method = st.selectbox(
+ "Estimation Method",
+ ["L-moments", "Method of moments", "Maximum Likelihood"],
+ index=1,
+ )
+ est_method = {
+ "L-moments": "LMOM",
+ "Method of moments": "MOM",
+ "Maximum Likelihood": "MLE",
+ }[est_method]
+ with opt_col_2:
+ st_skew = st.toggle(
+ "Use regional skew", value=False, disabled=not gage.has_regional_skew
+ )
+ if st_skew:
+ use_map = True
+ else:
+ use_map = False
+
+ # Analysis and display
+ lp3 = LP3Analysis(gage.gage_id, gage.ams_vals, use_map, est_method, "")
+ if gage.missing_dates_ams is not None and len(gage.missing_dates_ams) > 0:
+ st.warning(f"Missing {len(gage.missing_dates_ams)} LP3 records")
+ st.plotly_chart(plot_lp3(lp3), use_container_width=True)
+ show_data = st.checkbox("Show LP3 Data Table")
+ if show_data:
+ st.dataframe(lp3.quantile_df)
+
+
+def section_ams_seasonal(gage: Gage):
+ """Display the ams with seasonal attributes section."""
+ if gage.ams is not None:
+ if gage.missing_dates_ams:
+ st.warning(f"Missing {len(gage.missing_dates_ams)} AMS seasonal records")
+ st.plotly_chart(
+ plot_ams_seasonal(gage.ams, gage.gage_id), use_container_width=True
+ )
+ show_data = st.checkbox("Show Ranked Seasonal Data Table")
+ if show_data:
+ st.dataframe(gage.ams)
+
+
+def section_daily_mean(gage: Gage):
+ """Display the daily mean discharge section."""
+ plot_col, input_col = st.columns([8, 2])
+
+ with input_col:
+ st.write("") # blank line for more space
+ st.write("Daily Mean Input Dates")
+ start_date = st.date_input("Start Date", value=date(2024, 1, 1))
+ end_date = st.date_input("End Date", value=date(2024, 12, 31))
+
+ data, missing_dates = gage.get_daily_values(
+ start_date.strftime("%Y-%m-%d"),
+ end_date.strftime("%Y-%m-%d"),
+ )
+ missing_dates = gage.missing_dates_daily_values(start_date, end_date)
+ with plot_col:
+ if data is not None:
+ if missing_dates:
+ st.warning(f"Missing {len(missing_dates)} daily mean records")
+ st.plotly_chart(
+ plot_daily_mean(data, gage.gage_id), use_container_width=True
+ )
+ show_data = st.checkbox("Show Daily Mean Data Table")
+ if show_data:
+ st.dataframe(data)
+
+
+def section_monthly_mean(gage: Gage):
+ """Display the monthly mean discharge section."""
+ data = gage.get_monthly_values()
+ missing_dates = gage.missing_dates_monthly_values
+ if data is not None and "mean_va" in data.columns:
+ if missing_dates:
+ st.warning(f"Missing {len(missing_dates)} monthly records")
+ st.plotly_chart(
+ plot_monthly_mean(data, st.session_state["gage_id"]),
+ use_container_width=True,
+ )
+
+ show_data = st.checkbox("Show Monthly Mean Data Table")
+ if show_data:
+ st.dataframe(data)
def summary():
@@ -23,7 +135,10 @@ def summary():
# Sidebar for input
with st.sidebar:
st.title("Settings")
- st.session_state["gage_id"] = st.text_input("Enter USGS Gage Number:", st.session_state["gage_id"])
+ st.session_state["gage_id"] = st.text_input(
+ "Enter USGS Gage Number:", st.session_state["gage_id"]
+ )
+ gage = Gage(st.session_state["gage_id"])
# Toggle plots
st.markdown("### Toggle Plots")
@@ -34,119 +149,51 @@ def summary():
show_daily_mean = st.checkbox("Daily Mean Streamflow", value=True)
show_monthly_mean = st.checkbox("Monthly Mean Streamflow", value=True)
+ # Data sources
+ st.divider()
+ write_template("data_sources_side_bar.html")
+
if st.session_state["gage_id"]:
- with st.spinner("Loading gage data..."): # This is mainly here to clear previous pages while data loads.
- try:
- if st.session_state["gage_id"] == "testing":
- site_data = {
- "Site Number": "-99999",
- "Station Name": "Wet River",
- "Latitude": 45,
- "Longitude": -103,
- "Drainage Area": 0,
- "HUC Code": 0,
- "Elevation Datum": "NAVD88",
- }
- else:
- site_data = st.session_state["site_data"]
- lat, lon = site_data["Latitude"], site_data["Longitude"]
- except ValueError as e:
- lat, lon = None, None
- st.error(f"{e}")
+ with st.spinner(
+ "Loading gage data..."
+ ): # This is here to clear previous pages while data loads.
+ pass
col2, col3 = st.columns([6, 2], gap="large")
- if lat and lon:
- with col3:
+ if gage.latitude and gage.longitude:
+ with col3: # Site map
st.subheader("Gage Location")
# Create Folium Map
- mini_map = folium.Map(location=[lat, lon], zoom_start=7, width=200, height=200)
+ mini_map = folium.Map(
+ location=[gage.latitude, gage.longitude],
+ zoom_start=7,
+ width=200,
+ height=200,
+ )
folium.Marker(
- [lat, lon], popup=f"Gage {st.session_state['gage_id']}", icon=folium.Icon(color="green")
+ [gage.latitude, gage.longitude],
+ popup=f"Gage {st.session_state['gage_id']}",
+ icon=folium.Icon(color="green"),
).add_to(mini_map)
st_folium(mini_map, width=250, height=250)
# Display site metadata
st.subheader("Site Information")
- st.markdown(
- f"""
- **Site Number:** {site_data["Site Number"]}
- **Station Name:** {site_data["Station Name"]}
- **Latitude:** {site_data["Latitude"]}
- **Longitude:** {site_data["Longitude"]}
- **Drainage Area (sq.mi.):** {site_data["Drainage Area"]}
- **HUC Code:** {site_data["HUC Code"]}
- """,
- unsafe_allow_html=True,
- )
+ write_template("site_summary.md", gage.site_data)
with col2: # Center column for plots
+ gage.raise_warnings()
if show_ams:
- ams = get_ams(st.session_state["gage_id"])
- data, missing_years = ams["peaks"], ams["missing_years"]
- if data is not None and "peak_va" in data.columns:
- if missing_years:
- st.warning(f"Missing {len(missing_years)} AMS records")
- st.plotly_chart(plot_ams(data, st.session_state["gage_id"]), use_container_width=True)
- show_data = st.checkbox("Show AMS Data Table")
- if show_data:
- st.dataframe(data)
-
+ section_ams(gage)
if show_daily_stats:
- data = get_flow_stats(st.session_state["gage_id"])
- if data is not None and "mean_va" in data.columns:
- st.plotly_chart(plot_flow_stats(data, st.session_state["gage_id"]), use_container_width=True)
- show_data = st.checkbox("Show Daily Stats Data Table")
- if show_data:
- st.dataframe(data)
-
+ section_flow_stats(gage)
if show_lp3:
- ams = get_ams(st.session_state["gage_id"])
- if ams["peaks"] is not None:
- if ams["missing_years"]:
- st.warning(f"Missing {len(ams["missing_years"])} LP3 records")
- st.plotly_chart(plot_lp3(ams, st.session_state["gage_id"]), use_container_width=True)
- show_data = st.checkbox("Show LP3 Data Table")
- if show_data:
- st.dataframe(ams["lp3"])
+ section_lp3(gage)
if show_ams_seasonal:
- ams = get_ams(st.session_state["gage_id"])
- data, missing_years = ams["peaks"], ams["missing_years"]
- if data is not None and "peak_va" in data.columns:
- if missing_years:
- st.warning(f"Missing {len(missing_years)} AMS seasonal records")
- st.plotly_chart(plot_ams_seasonal(data, st.session_state["gage_id"]), use_container_width=True)
- show_data = st.checkbox("Show Ranked Seasonal Data Table")
- if show_data:
- st.dataframe(data)
-
+ section_ams_seasonal(gage)
if show_daily_mean:
- plot_col, input_col = st.columns([8, 2])
-
- with input_col:
- st.write("") # blank line for more space
- st.write("Daily Mean Input Dates")
- start_date = st.text_input("Start Date (YYYY-MM-DD)", "2024-01-01")
- end_date = st.text_input("End Date (YYYY-MM-DD)", "2024-12-31")
-
- data, missing_dates = get_daily_values(st.session_state["gage_id"], start_date, end_date)
- with plot_col:
- if data is not None:
- if missing_dates:
- st.warning(f"Missing {len(missing_dates)} daily mean records")
- st.plotly_chart(plot_daily_mean(data, st.session_state["gage_id"]), use_container_width=True)
- show_data = st.checkbox("Show Daily Mean Data Table")
- if show_data:
- st.dataframe(data)
-
+ section_daily_mean(gage)
if show_monthly_mean:
- data, missing_dates = get_monthly_values(st.session_state["gage_id"])
- if data is not None and "mean_va" in data.columns:
- if missing_dates:
- st.warning(f"Missing {len(missing_dates)} monthly records")
- st.plotly_chart(plot_monthly_mean(data, st.session_state["gage_id"]), use_container_width=True)
-
- show_data = st.checkbox("Show Monthly Mean Data Table")
- if show_data:
- st.dataframe(data)
+ section_monthly_mean(gage)
diff --git a/hydroshift/consts.py b/hydroshift/consts.py
index d239930..8eaa36e 100644
--- a/hydroshift/consts.py
+++ b/hydroshift/consts.py
@@ -7,6 +7,8 @@
R_SERVER_PORT = 9999
R_SERVER_URL = f"http://127.0.0.1:{R_SERVER_PORT}"
+### FFA ###
+
### Changepoint Analysis ###
VALID_ARL0S = [
370,
@@ -31,3 +33,24 @@
50000,
]
METRICS = ["Cramer-von-Mises", "Kolmogorov-Smirnov", "Lepage", "Mann-Whitney", "Mood"]
+
+### External URLs ###
+NWIS_URL = "https://waterdata.usgs.gov/nwis?"
+PEAKFQ_URL = "https://www.usgs.gov/tools/peakfq"
+
+### Text snippets ###
+DATA_SOURCES_STR = f"Data: [USGS NWIS]({NWIS_URL}) and [USGS PEAKFQ]({PEAKFQ_URL})"
+
+CP_F1_CAPTION = "**Figure 1.** Statistical changepoint analysis."
+CP_T1_CAPTION = "**Table 1.** Results of the changepoint analysis, listing dates when a significant change was identified for each test statistic."
+CP_F2_CAPTION = "**Figure 2.** Modified flood frequency analysis."
+CP_T2_CAPTION = "**Table 2.** Modified flood quantiles."
+
+### MISC ###
+REGULATION_MAP = {
+ "3": "Discharge affected by Dam Failure",
+ "5": "Discharge affected to unknown degree by Regulation or Diversion",
+ "6": "Discharge affected by Regulation or Diversion",
+ "9": "Discharge due to Snowmelt, Hurricane, Ice-Jam or Debris Dam breakup",
+ "C": "All or part of the record affected by Urbanization, Mining, Agricultural changes, Channelization, or other",
+}
diff --git a/hydroshift/data/skewmap_4326.tif b/hydroshift/data/skewmap_4326.tif
new file mode 100644
index 0000000..53b0fb7
Binary files /dev/null and b/hydroshift/data/skewmap_4326.tif differ
diff --git a/hydroshift/data_retrieval.py b/hydroshift/data_retrieval.py
deleted file mode 100644
index 2b41355..0000000
--- a/hydroshift/data_retrieval.py
+++ /dev/null
@@ -1,166 +0,0 @@
-import logging
-
-import numpy as np
-import pandas as pd
-import scipy.stats as stats
-import streamlit as st
-from dataretrieval import NoSitesError, nwis
-from scipy.stats import genpareto
-
-
-@st.cache_data
-def get_ams(gage_id):
- """Fetches Annual Maximum Series (AMS) peak flow data for a given gage."""
- try:
- if gage_id == "testing":
- df = fake_ams()
- else:
- df = nwis.get_record(service="peaks", sites=[gage_id], ssl_check=True)
- except NoSitesError:
- logging.warning(f"Peaks could not be found for gage id: {gage_id}")
- return {"peaks": None, "lp3": None, "missing_years": None}
-
- df["season"] = ((df.index.month % 12 + 3) // 3).map({1: "Winter", 2: "Spring", 3: "Summer", 4: "Fall"})
-
- missing_years = check_missing_dates(df, "water_year")
-
- df = df.dropna(subset="peak_va")
- return {"peaks": df, "lp3": log_pearson_iii(df["peak_va"]), "missing_years": missing_years}
-
-
-@st.cache_data
-def get_flow_stats(gage_id):
- """Fetches flow statistics for a given gage."""
- try:
- df = nwis.get_stats(sites=gage_id, ssl_check=True)[0]
- except IndexError:
- logging.warning(f"Flow stats could not be found for gage_id: {gage_id}")
- return None
-
- return df
-
-
-@st.cache_data
-def load_site_data(gage_number: str) -> dict:
- """Query NWIS for site information"""
- try:
- resp = nwis.get_record(sites=gage_number, service="site", ssl_check=True)
-
- except ValueError:
- raise ValueError(f"Gage {gage_number} not found")
-
- return {
- "Site Number": resp["site_no"].iloc[0],
- "Station Name": resp["station_nm"].iloc[0],
- "Latitude": float(resp["dec_lat_va"].iloc[0]),
- "Longitude": float(resp["dec_long_va"].iloc[0]),
- "Drainage Area": resp["drain_area_va"].iloc[0],
- "HUC Code": resp["huc_cd"].iloc[0],
- "Elevation Datum": resp["alt_datum_cd"].iloc[0],
- }
-
-
-@st.cache_data
-def log_pearson_iii(peak_flows: pd.Series, standard_return_periods: list = [2, 5, 10, 25, 50, 100, 500]):
- log_flows = np.log10(peak_flows.values)
- mean_log = np.mean(log_flows)
- std_log = np.std(log_flows, ddof=1)
- skew_log = stats.skew(log_flows)
-
- return {
- str(rp): int(10 ** (mean_log + stats.pearson3.ppf(1 - 1 / rp, skew_log) * std_log))
- for rp in standard_return_periods
- }
-
-
-@st.cache_data
-def get_daily_values(gage_id, start_date, end_date):
- """Fetches mean daily flow values for a given gage."""
- try:
- dv = nwis.get_dv(gage_id, start_date, end_date, ssl_check=True)[0]
- except Exception:
- logging.warning(f"Daily Values could not be found for gage_id: {gage_id}")
- return None
-
- missing_dates = check_missing_dates(dv, "daily")
-
- return dv, missing_dates
-
-
-@st.cache_data
-def get_monthly_values(gage_id):
- """Fetches mean monthly flow values for a given gage and assigns a datetime column based on the year and month."""
- try:
- mv = nwis.get_stats(gage_id, statReportType="monthly", ssl_check=True)[0]
- except Exception:
- logging.warning(f"Monthly Values could not be found for gage_id: {gage_id}")
- return None
-
- mv = mv.rename(columns={"year_nu": "year", "month_nu": "month"})
-
- mv["date"] = pd.to_datetime(mv[["year", "month"]].assign(day=1))
-
- mv = mv.sort_values("date")
- missing_dates = check_missing_dates(mv, "monthly")
-
- return mv, missing_dates
-
-
-def check_missing_dates(df, freq):
- """Checks for missing dates in a DataFrame.
-
- Parameters
- ----------
- df (pd.DataFrame): The DataFrame containing time-series data.
- freq (str): Either 'daily', 'monthly', or 'water_year' to specify the type of data.
-
- """
- if freq == "daily":
- if not isinstance(df.index, pd.DatetimeIndex):
- df = df.set_index("datetime")
- full_range = pd.date_range(start=df.index.min(), end=df.index.max(), freq="D")
-
- elif freq == "monthly":
- df["date"] = pd.to_datetime(df["date"])
- full_range = pd.date_range(start=df["date"].min(), end=df["date"].max(), freq="MS")
-
- elif freq == "water_year":
- if not isinstance(df.index, pd.DatetimeIndex):
- df = df.set_index("datetime")
-
- df["water_year"] = df.index.year.where(df.index.month < 10, df.index.year + 1)
-
- full_water_years = set(range(df["water_year"].min(), df["water_year"].max() + 1))
- existing_water_years = set(df["water_year"])
- missing_years = sorted(full_water_years - existing_water_years)
-
- return missing_years
- else:
- raise ValueError("Invalid frequency. Use 'daily', 'monthly', or 'water_year'.")
-
- missing_dates = full_range.difference(df.index if freq == "daily" else df["date"])
-
- return list(missing_dates)
-
-
-def fake_ams() -> pd.DataFrame:
- """Generate a timeseries from two distributions."""
- rs = 1
- rvs = []
- segments = [
- {"length": 50, "loc": 1000, "scale": 50, "c": 0.1},
- {"length": 50, "loc": 900, "scale": 500, "c": 0.5},
- {"length": 50, "loc": 1000, "scale": 100, "c": 0.1},
- ]
- for s in segments:
- length = s["length"]
- params = {k: v for k, v in s.items() if k != "length"}
- dist = genpareto(**params)
- rvs.append(dist.rvs(size=length, random_state=rs))
- rs += 1
-
- rvs = np.concatenate(rvs, axis=0)
- dates = pd.date_range(start="1900-01-01", periods=len(rvs), freq="YE")
- water_year = dates.year
- df = pd.DataFrame({"datetime": dates, "peak_va": rvs, "water_year": water_year}).set_index("datetime")
- return df
diff --git a/hydroshift/stats/__init__.py b/hydroshift/stats/__init__.py
deleted file mode 100644
index 53b9be3..0000000
--- a/hydroshift/stats/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Statistical methods for tool."""
diff --git a/hydroshift/streamlit_app.py b/hydroshift/streamlit_app.py
index d6c7d36..8647b21 100644
--- a/hydroshift/streamlit_app.py
+++ b/hydroshift/streamlit_app.py
@@ -1,51 +1,7 @@
import streamlit as st
-from data_retrieval import load_site_data
from session import init_session_state
-from hydroshift._pages import changepoint, summary
-from hydroshift.consts import DEFAULT_GAGE
-
-
-def homepage():
- """Landing page for app."""
- st.set_page_config(page_title="HydroShift", layout="wide")
-
- left_col, right_col, _ = st.columns([2, 1.5, 0.2], gap="large")
-
- with left_col:
- st.title("HydroShift")
- st.subheader("USGS Streamflow Change Detection Tool")
- st.markdown(
- """
- **HydroShift** is a tool for exploring streamflow trends in USGS gage data. This web app
- provides interactive plots of annual peak discharge, seasonality of flood peaks, and daily and monthly discharge
- trends. Beyond data summary, the application includes a statistical changepoint analysis that will detect when
- significant changes in the distribution of flood peaks have occured.
- """
- )
-
- st.write("") # blank line for more space
- gage_input = st.text_input("Enter a USGS Gage Number to begin:")
- col1, col2 = st.columns([1, 8])
- with col1:
- submit = st.button("Submit")
- with col2:
- demo = st.button("Use Demo Data")
-
- if submit and gage_input:
- st.session_state["gage_id"] = gage_input
- if demo:
- st.session_state["gage_id"] = DEFAULT_GAGE
-
- if st.session_state["gage_id"] is not None:
- try:
- st.session_state["site_data"] = load_site_data(st.session_state["gage_id"])
- except ValueError:
- st.error(f"Data not found for gage: {st.session_state['gage_id']}")
- st.rerun()
- with right_col:
- st.title("")
- st.image("hydroshift/images/logo_base.png")
+from hydroshift._pages import changepoint, homepage, summary
def navigator():
diff --git a/hydroshift/templates/app_description.md b/hydroshift/templates/app_description.md
new file mode 100644
index 0000000..cf6f1e0
--- /dev/null
+++ b/hydroshift/templates/app_description.md
@@ -0,0 +1,4 @@
+**HydroShift** is a tool for exploring streamflow trends in USGS gage data. This web app
+provides interactive plots of annual peak discharge, seasonality of flood peaks, and daily and monthly discharge
+trends. Beyond data summary, the application includes a statistical changepoint analysis that will detect when
+significant changes in the distribution of flood peaks have occured.
diff --git a/hydroshift/text/changepoint.py b/hydroshift/templates/changepoint_description.md
similarity index 79%
rename from hydroshift/text/changepoint.py
rename to hydroshift/templates/changepoint_description.md
index a88be2f..8668eae 100644
--- a/hydroshift/text/changepoint.py
+++ b/hydroshift/templates/changepoint_description.md
@@ -1,6 +1,3 @@
-"""Storage for large blocks of changepoint app text."""
-
-test_description = """
A changepoint analysis was performed on the series of annual flood peaks Using the change point model of Ross (2015). For each date in the timeseries, the distribution of flood peaks leading up to that date were compared with the distribution after that date using a variety of statistical tests
- **Mood**: Measures changes in the variance of the peak series.
@@ -12,10 +9,8 @@
These test statistics were utilized in two distinct ways in this analysis. A static analysis was performed by evaluating the test statistic at each date of the timeseries and using a two-sample test to determine the statistical significance of the distribution differences. The P-values from this analysis are shown in the second panel of Figure 1.
A streaming analysis was performed by treating the data as a stream of values and repeating the static analysis after each new value was added. If the test statistic exceeds a specified threshold at any point within the subseries, a changepoint is marked, and a new data series is initialized for all further test statistics. The resulting change points are shown as dashed red lines in the top panel of Figure 1.
- - The threshold for identifying changepoints in the streaming analysis is defined using an Average Run Length (ARL0) parameter. ARL0 reflects the frequency with which a false positive would be raised on a stationary timeseries (e.g., for an ARL0 of 1,000 a false changepoint would be identified on a stationary timeseries on average every 1,000 samples.). For this analysis, an ARL0 of {} was used.
- - A burn-in period of {} years was selected to ignore singificant change points in the first {} years of the record due to the influence of small sample sizes.
+ - The threshold for identifying changepoints in the streaming analysis is defined using an Average Run Length (ARL0) parameter. ARL0 reflects the frequency with which a false positive would be raised on a stationary timeseries (e.g., for an ARL0 of 1,000 a false changepoint would be identified on a stationary timeseries on average every 1,000 samples.). For this analysis, an ARL0 of {{ arl0 }} was used.
+ - A burn-in period of {{ burn_in }} years was selected to ignore singificant change points in the first {{ burn_in }} years of the record due to the influence of small sample sizes.
+
+If the flood peak series contained any gaps, the data around the gap was joined as if there was no gap between the data points. For large data gaps, this assumption could lead to inaccurate results.
-"""
-references = """
-Gordon J. Ross (2015)., "Parametric and Nonparametric Sequential Change Detection in R: The cpm Package.", Journal of Statistical Software, 66(3), 1-20., https://www.jstatsoft.org/v66/i03/.
-"""
diff --git a/hydroshift/templates/changepoint_ffa.md b/hydroshift/templates/changepoint_ffa.md
new file mode 100644
index 0000000..5d7fd26
--- /dev/null
+++ b/hydroshift/templates/changepoint_ffa.md
@@ -0,0 +1,2 @@
+Based on the changepoint analysis results, a modified flood frequency analysis was conducted using truncated periods of record. The truncated periods correspond with times when the hydrologic regime appears to be stationary across time. Results from this analysis are shown
+in Figure 2 and Table 2.
diff --git a/hydroshift/templates/changepoint_references.md b/hydroshift/templates/changepoint_references.md
new file mode 100644
index 0000000..078a43d
--- /dev/null
+++ b/hydroshift/templates/changepoint_references.md
@@ -0,0 +1 @@
+Gordon J. Ross (2015)., "Parametric and Nonparametric Sequential Change Detection in R: The cpm Package.", Journal of Statistical Software, 66(3), 1-20., [https://www.jstatsoft.org/v66/i03/](https://www.jstatsoft.org/v66/i03/).
diff --git a/hydroshift/templates/changepoint_summary_1.md b/hydroshift/templates/changepoint_summary_1.md
new file mode 100644
index 0000000..d556d6d
--- /dev/null
+++ b/hydroshift/templates/changepoint_summary_1.md
@@ -0,0 +1,15 @@
+There is **{{ evidence_level }}** evidence that the annual maximum series data at USGS gage {{ gage_id }} are nonstationary in time. Four change
+point detection tests were completed to assess changes in the mean, variance, and overall distribution of flood
+peaks across the period of record. Significant change points were identified using a Type I error rate of 1 in
+{{ arl0 }} and ignoring significant changes in the first {{ burn_in }} years of data. {{ cp_count }} statistically significant
+{% if plural -%}
+changepoints were
+{%- else -%}
+changepoint was
+{%- endif %}
+identified, indicating that
+{% if nonstationary -%}
+some form of nonstationarity (e.g., land use change, climate change, flow regulation, etc) may be influencing flow patterns at this site.
+{%- else -%}
+an assumption of nonstationary conditions is likely reasonable.
+{%- endif %}
diff --git a/hydroshift/templates/changepoint_summary_2.md b/hydroshift/templates/changepoint_summary_2.md
new file mode 100644
index 0000000..ef4445d
--- /dev/null
+++ b/hydroshift/templates/changepoint_summary_2.md
@@ -0,0 +1,16 @@
+The static changepoint test showed {{ evidence }} evidence of a changepoint in the timeseries.
+{% if min_p < 1 -%}
+A minimum p-value of {{ min_p }} was obtained at {{ p_count }} contiguous time period{{ 's' if plural else '' }}.
+{%- else -%}
+There were no time-periods where a p-value less than 0.05 was observed in all tests.
+{%- endif %}
+The p-value reflects the probability that the distribution of
+flood peaks before that date has the **same** distribution as the flood peaks after the date.
+
+{% if len_cp < 1 -%}
+The streaming analysis identified one statistically significant changepoint. This changepoint was identified by {{ test_count }} distinct tests: {{ test_list }}.
+{%- else -%}
+The streaming analysis identified {{ len_cp_str }} statistically significant changepoints. These changepoints broadly fell
+into {{ grp_count }} window{{ 's' if plural_2 else '' }} where tests identified changepoints not more than 10 years apart. For a full summary of which
+tests identified changes at which dates, see table 2.
+{%- endif %}
diff --git a/hydroshift/templates/data_sources_side_bar.html b/hydroshift/templates/data_sources_side_bar.html
new file mode 100644
index 0000000..dcec97b
--- /dev/null
+++ b/hydroshift/templates/data_sources_side_bar.html
@@ -0,0 +1,10 @@
+