diff --git a/.cursor/rules/python-jupyter.mdc b/.cursor/rules/python-jupyter.mdc
index 6fa418e..3b8d0b4 100644
--- a/.cursor/rules/python-jupyter.mdc
+++ b/.cursor/rules/python-jupyter.mdc
@@ -15,6 +15,7 @@ alwaysApply: false
     - Prefer vectorized operations over explicit loops for better performance.
     - Use descriptive variable names that reflect the data they contain.
     - Follow PEP 8 style guidelines for Python code.
+    - Only import things that are used 
 
     Data Analysis and Manipulation:
     - Use pandas for data manipulation and analysis.
diff --git a/README.md b/README.md
deleted file mode 100644
index 5c212eb..0000000
--- a/README.md
+++ /dev/null
@@ -1,83 +0,0 @@
-# Data Exploration Project
-
-This project provides a structured environment for data exploration and analysis using Jupyter notebooks.
-
-## Getting Started
-
-### Prerequisites
-
-- Python 3.8 or higher
-- pip (Python package installer)
-
-### Setup Instructions
-
-1. **Clone or download this repository**
-
-2. **Create a virtual environment (recommended)**
-
-   ```bash
-   # On macOS/Linux
-   python3 -m venv venv
-   source venv/bin/activate
-
-   # On Windows
-   python -m venv venv
-   venv\Scripts\activate
-   ```
-
-3. **Install the required packages**
-
-   ```bash
-   pip install -r requirements.txt
-   ```
-
-4. **Launch Jupyter Notebook**
-
-   ```bash
-   jupyter notebook
-   ```
-
-5. **Open the data_exploration.ipynb notebook**
-   - A browser window should open automatically. If not, copy the URL displayed in the terminal and paste it into your web browser.
-   - Navigate to and click on `data_exploration.ipynb` to open the notebook.
-
-## Project Structure
-
-- `data_exploration.ipynb`: Starter Jupyter notebook for data analysis
-- `requirements.txt`: Contains all the Python dependencies
-- `data/`: Directory where you can store your datasets (create this as needed)
-
-## Adding Your Data
-
-You can add your data files to the project:
-
-1. Create a `data` directory (if not already present):
-
-   ```bash
-   mkdir data
-   ```
-
-2. Place your data files (CSV, Excel, etc.) in the `data` directory.
-
-3. In the notebook, load your data:
-   ```python
-   df = pd.read_csv('data/your_file.csv')
-   ```
-
-## Common Tasks
-
-- **Data Loading**: Use pandas to read different file formats (CSV, Excel, JSON, etc.)
-- **Data Cleaning**: Handle missing values, duplicates, outliers
-- **Exploratory Analysis**: Descriptive statistics, correlation analysis
-- **Data Visualization**: Create charts and plots using matplotlib and seaborn
-- **Feature Engineering**: Create new features or transform existing ones
-- **Model Building**: Build and evaluate machine learning models using scikit-learn
-
-## Useful Extensions
-
-Consider installing these additional Jupyter extensions for enhanced productivity:
-
-```bash
-pip install jupyterlab  # JupyterLab interface
-pip install nbextensions  # Notebook extensions
-```
diff --git a/analysis.ipynb b/analysis.ipynb
deleted file mode 100644
index 9638289..0000000
--- a/analysis.ipynb
+++ /dev/null
@@ -1,43 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "cad336dc-cdc4-4dc0-8a6a-e4df4cacea45",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Hello World\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(\"Hello World\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/analysis.py b/analysis.py
new file mode 100644
index 0000000..6959587
--- /dev/null
+++ b/analysis.py
@@ -0,0 +1,692 @@
+import pandas as pd
+import os
+import glob
+from dme_dictionary import DATA_DICTIONARY
+import numpy as np
+import locale
+
+# Set locale for currency formatting
+locale.setlocale(locale.LC_ALL, '')
+
+# Set pandas display options to show all columns
+pd.set_option('display.max_columns', None)  # Show all columns
+pd.set_option('display.width', None)        # Don't wrap the output
+# Don't add new lines in wide DataFrames
+pd.set_option('display.expand_frame_repr', False)
+
+# Path to data directories
+data_dir = 'data'
+years = range(2018, 2023)  # 2018 to 2022
+
+# Initialize an empty list to store DataFrames
+dfs = []
+
+# Loop through each year
+for year in years:
+    # Get the CSV file path
+    csv_files = glob.glob(f"{data_dir}/{year}/*.csv")
+
+    if not csv_files:
+        print(f"No CSV files found for year {year}")
+        continue
+
+    # Get the first CSV file (there should be only one per year based on our observation)
+    csv_file = csv_files[0]
+    print(f"Loading data from {csv_file}")
+
+    # Read the CSV into a DataFrame with mixed type handling
+    df = pd.read_csv(csv_file, low_memory=False)
+
+    # Add a 'year' column
+    df['year'] = year
+
+    # Append to our list of DataFrames
+    dfs.append(df)
+
+# Function to format dollar amounts (K or M based on size)
+
+
+def format_dollar_amount(amount):
+    if amount >= 1000000:
+        return f"${amount/1000000:.1f}M"
+    else:
+        return f"${amount/1000:.1f}K"
+
+
+# Combine all DataFrames into one
+if dfs:
+    combined_df = pd.concat(dfs, ignore_index=True)
+    print(f"Combined DataFrame shape: {combined_df.shape}")
+
+    # Display the first few rows with all columns
+    # print("\nFirst few rows of the combined DataFrame:")
+    # print(combined_df.head())
+
+    # Create a dictionary to store column information
+    column_info = {}
+
+    # Check which columns from our data are in the data dictionary
+    for column in combined_df.columns:
+        if column in DATA_DICTIONARY:
+            column_info[column] = DATA_DICTIONARY[column]
+        else:
+            column_info[column] = "Description not available"
+
+    # Display column information
+    print("\nColumn Information:")
+    # for column, count in zip(combined_df.columns, combined_df.count()):
+    #     description = column_info.get(column, "Description not available")
+    #     print(f"Column: {column}")
+    #     print(f"  Description: {description}")
+    #     print(f"  Non-null count: {count}/{len(combined_df)} entries")
+    #     print(f"  Data type: {combined_df[column].dtype}")
+    #     print()
+
+    # Add data dictionary descriptions as attributes
+    combined_df.attrs['column_descriptions'] = column_info
+
+    # Summary statistics for numerical columns
+    # print("\nSummary statistics for numerical columns:")
+    # print(combined_df.describe())
+
+    print("\n" + "="*100)
+    print("Year-over-Year Growth Rate Analysis")
+    print("="*100)
+
+    # Create a DataFrame to analyze suppliers by year-over-year growth rate
+    # First, group by supplier and year to get annual totals
+    supplier_yearly = combined_df.groupby(['Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org', 'year']).agg({
+        'Suplr_Sbmtd_Chrgs': 'sum',
+        'Suplr_Mdcr_Pymt_Amt': 'sum',
+        'Tot_Suplr_Benes': 'mean',  # Average number of beneficiaries
+        'Tot_Suplr_Clms': 'sum'     # Total claims
+    }).reset_index()
+
+    # Create a pivot table to have years as columns
+    pivot_charges = supplier_yearly.pivot_table(
+        index=['Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org'],
+        columns='year',
+        values='Suplr_Mdcr_Pymt_Amt',
+        fill_value=0
+    )
+
+    # Calculate year-over-year growth rates
+    growth_rates = pd.DataFrame(index=pivot_charges.index)
+
+    # Calculate growth rate for each year pair (2019/2018, 2020/2019, etc.)
+    for year_pair in [(2019, 2018), (2020, 2019), (2021, 2020), (2022, 2021)]:
+        current, previous = year_pair
+        growth_rates[f'growth_{current}'] = (
+            (pivot_charges[current] - pivot_charges[previous]) /
+            pivot_charges[previous].replace(0, float('nan'))
+        ) * 100  # Convert to percentage
+
+    # Calculate average growth rate across all years
+    growth_cols = [
+        col for col in growth_rates.columns if col.startswith('growth_')]
+    growth_rates['avg_growth'] = growth_rates[growth_cols].mean(axis=1)
+
+    # Filter out suppliers that weren't present in all years
+    valid_suppliers = pivot_charges[(pivot_charges[2018] > 0) &
+                                    (pivot_charges[2019] > 0) &
+                                    (pivot_charges[2020] > 0) &
+                                    (pivot_charges[2021] > 0) &
+                                    (pivot_charges[2022] > 0)]
+
+    # Filter suppliers with significant payment amounts (at least $100K in the last year)
+    significant_suppliers = valid_suppliers[valid_suppliers[2022] >= 100000]
+    print(
+        f"Filtering to {len(significant_suppliers)} suppliers with at least $100,000 in payments in 2022")
+
+    # Merge growth rates with valid and significant suppliers
+    valid_growth = growth_rates.loc[significant_suppliers.index].reset_index()
+
+    # Sort by average growth rate in descending order
+    top_growth = valid_growth.sort_values('avg_growth', ascending=False)
+
+    # Merge with additional data for reporting
+    supplier_totals = supplier_yearly.groupby(['Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org']).agg({
+        'Suplr_Sbmtd_Chrgs': 'sum',
+        'Suplr_Mdcr_Pymt_Amt': 'sum',
+        'Tot_Suplr_Benes': 'mean',
+        'Tot_Suplr_Clms': 'sum'
+    }).reset_index()
+
+    top_growth_with_data = pd.merge(
+        top_growth,
+        supplier_totals,
+        on=['Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org']
+    )
+
+    # Format the output for the top 10 suppliers
+    print("The analysis identified suppliers with the highest growth rates based on Medicare payment amounts from 2018 to 2022.")
+    print("Here are the top 10 suppliers with extraordinary growth (minimum $100K in 2022 payments):\n")
+
+    # Get top 10 suppliers
+    top_10_suppliers = top_growth_with_data.head(10)
+    top_10_npi = top_10_suppliers['Suplr_NPI'].tolist()
+
+    # Filter the original data for just these suppliers
+    top_supplier_data = supplier_yearly[supplier_yearly['Suplr_NPI'].isin(
+        top_10_npi)]
+
+    # Format and display each supplier's information
+    for i, (_, supplier) in enumerate(top_10_suppliers.iterrows(), 1):
+        npi = supplier['Suplr_NPI']
+        name = supplier['Suplr_Prvdr_Last_Name_Org']
+        avg_growth = supplier['avg_growth']
+        total_payments = supplier['Suplr_Mdcr_Pymt_Amt']
+
+        # Get yearly data for this supplier
+        yearly_data = top_supplier_data[top_supplier_data['Suplr_NPI'] == npi].sort_values(
+            'year')
+
+        print(f"{i}. **{name}** (NPI: {npi})")
+        print(f"   - Average growth rate: {avg_growth:.2f}%")
+        print(
+            f"   - Total Medicare payments: ${total_payments/1000000:.2f} million")
+
+        # Show yearly payment amounts
+        yearly_payments = []
+        for year in range(2018, 2023):
+            year_data = yearly_data[yearly_data['year'] == year]
+            if not year_data.empty:
+                payment = year_data['Suplr_Mdcr_Pymt_Amt'].values[0]
+                yearly_payments.append(format_dollar_amount(payment))
+            else:
+                yearly_payments.append("$0")
+
+        print(
+            f"   - Yearly payments: 2018: {yearly_payments[0]}, 2019: {yearly_payments[1]}, 2020: {yearly_payments[2]}, 2021: {yearly_payments[3]}, 2022: {yearly_payments[4]}")
+
+        # Analyze growth pattern
+        payment_pattern = yearly_data['Suplr_Mdcr_Pymt_Amt'].tolist()
+        years_list = yearly_data['year'].tolist()
+        benes_pattern = yearly_data['Tot_Suplr_Benes'].tolist()
+
+        # Identify the largest year-over-year jump
+        max_jump = 0
+        max_jump_year_idx = 0
+        for j in range(1, len(payment_pattern)):
+            if payment_pattern[j-1] > 0:
+                jump_pct = (
+                    payment_pattern[j] - payment_pattern[j-1]) / payment_pattern[j-1] * 100
+                if jump_pct > max_jump:
+                    max_jump = jump_pct
+                    max_jump_year_idx = j
+
+        if max_jump_year_idx > 0:
+            from_year = years_list[max_jump_year_idx-1]
+            to_year = years_list[max_jump_year_idx]
+            from_amount = payment_pattern[max_jump_year_idx-1]
+            to_amount = payment_pattern[max_jump_year_idx]
+
+            # Format amounts with K or M suffix based on size
+            from_amount_str = format_dollar_amount(from_amount)
+            to_amount_str = format_dollar_amount(to_amount)
+
+            print(
+                f"   - Growth pattern: Major increase from {from_year} to {to_year} ({from_amount_str} to {to_amount_str})")
+
+        # Check for consistent growth
+        growth_consistent = True
+        for j in range(1, len(payment_pattern)):
+            if payment_pattern[j] <= payment_pattern[j-1]:
+                growth_consistent = False
+                break
+
+        if growth_consistent and len(payment_pattern) > 2:
+            print("   - Pattern shows consistent year-over-year growth")
+
+        # Check for beneficiary growth
+        if not pd.isna(benes_pattern).all() and len(benes_pattern) >= 2:
+            first_valid_idx = next((i for i, x in enumerate(
+                benes_pattern) if not pd.isna(x)), None)
+            last_valid_idx = next((i for i, x in enumerate(
+                reversed(benes_pattern)) if not pd.isna(x)), None)
+            if first_valid_idx is not None and last_valid_idx is not None:
+                last_valid_idx = len(benes_pattern) - 1 - last_valid_idx
+                first_benes = benes_pattern[first_valid_idx]
+                last_benes = benes_pattern[last_valid_idx]
+                if not pd.isna(first_benes) and not pd.isna(last_benes) and first_benes > 0:
+                    bene_growth = (last_benes - first_benes) / \
+                        first_benes * 100
+                    print(
+                        f"   - Beneficiary growth: {bene_growth:.1f}% increase (from {first_benes:.0f} to {last_benes:.0f})")
+
+        print("")  # Add a blank line between suppliers
+
+    # =====================================
+    # Analysis of High Submitted Charges vs Low Allowed/Paid Amounts
+    # =====================================
+    print("\n" + "="*100)
+    print("Analysis of High Submitted Charges with Low Allowed/Paid Amounts")
+    print("="*100)
+
+    # Aggregate data by supplier across all years
+    supplier_totals_with_allowed = combined_df.groupby(['Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org']).agg({
+        'Suplr_Sbmtd_Chrgs': 'sum',
+        'Suplr_Mdcr_Alowd_Amt': 'sum',
+        'Suplr_Mdcr_Pymt_Amt': 'sum',
+        'Tot_Suplr_Benes': 'mean',
+        'Tot_Suplr_Clms': 'sum'
+    }).reset_index()
+
+    # Calculate ratios
+    supplier_totals_with_allowed['submitted_allowed_ratio'] = supplier_totals_with_allowed['Suplr_Sbmtd_Chrgs'] / \
+        supplier_totals_with_allowed['Suplr_Mdcr_Alowd_Amt']
+    supplier_totals_with_allowed['submitted_paid_ratio'] = supplier_totals_with_allowed['Suplr_Sbmtd_Chrgs'] / \
+        supplier_totals_with_allowed['Suplr_Mdcr_Pymt_Amt']
+
+    # Filter for suppliers with substantial submitted charges (at least $100,000) to focus on meaningful outliers
+    significant_suppliers = supplier_totals_with_allowed[
+        supplier_totals_with_allowed['Suplr_Sbmtd_Chrgs'] >= 100000]
+
+    # Find outliers with highest submitted-to-allowed ratio
+    top_submitted_allowed_outliers = significant_suppliers.sort_values(
+        'submitted_allowed_ratio', ascending=False).head(10)
+
+    print("Top 10 Suppliers with Highest Submitted Charges to Allowed Amount Ratio:\n")
+
+    for i, (_, supplier) in enumerate(top_submitted_allowed_outliers.iterrows(), 1):
+        npi = supplier['Suplr_NPI']
+        name = supplier['Suplr_Prvdr_Last_Name_Org']
+        submitted = supplier['Suplr_Sbmtd_Chrgs']
+        allowed = supplier['Suplr_Mdcr_Alowd_Amt']
+        paid = supplier['Suplr_Mdcr_Pymt_Amt']
+        ratio = supplier['submitted_allowed_ratio']
+
+        # Format amounts with K or M suffix based on size
+        submitted_str = format_dollar_amount(submitted)
+        allowed_str = format_dollar_amount(allowed)
+        paid_str = format_dollar_amount(paid)
+
+        print(f"{i}. **{name}** (NPI: {npi})")
+        print(f"   - Submitted charges: {submitted_str}")
+        print(f"   - Allowed amount: {allowed_str}")
+        print(f"   - Paid amount: {paid_str}")
+        print(f"   - Submitted to allowed ratio: {ratio:.2f}x")
+        print(
+            f"   - Allowed amount is {(allowed/submitted)*100:.1f}% of submitted charges")
+        print(
+            f"   - Paid amount is {(paid/submitted)*100:.1f}% of submitted charges")
+        print("")  # Add a blank line between suppliers
+
+    # Find outliers with highest submitted-to-paid ratio
+    top_submitted_paid_outliers = significant_suppliers.sort_values(
+        'submitted_paid_ratio', ascending=False).head(10)
+
+    print("\nTop 10 Suppliers with Highest Submitted Charges to Paid Amount Ratio:\n")
+
+    for i, (_, supplier) in enumerate(top_submitted_paid_outliers.iterrows(), 1):
+        npi = supplier['Suplr_NPI']
+        name = supplier['Suplr_Prvdr_Last_Name_Org']
+        submitted = supplier['Suplr_Sbmtd_Chrgs']
+        allowed = supplier['Suplr_Mdcr_Alowd_Amt']
+        paid = supplier['Suplr_Mdcr_Pymt_Amt']
+        ratio = supplier['submitted_paid_ratio']
+
+        # Format amounts with K or M suffix based on size
+        submitted_str = format_dollar_amount(submitted)
+        allowed_str = format_dollar_amount(allowed)
+        paid_str = format_dollar_amount(paid)
+
+        print(f"{i}. **{name}** (NPI: {npi})")
+        print(f"   - Submitted charges: {submitted_str}")
+        print(f"   - Allowed amount: {allowed_str}")
+        print(f"   - Paid amount: {paid_str}")
+        print(f"   - Submitted to paid ratio: {ratio:.2f}x")
+        print(
+            f"   - Paid amount is {(paid/submitted)*100:.1f}% of submitted charges")
+        print("")  # Add a blank line between suppliers
+
+    # =====================================
+    # Peer Group Analysis for Fraud Detection
+    # =====================================
+    print("\n" + "="*100)
+    print("Peer Group Analysis for Fraud Detection")
+    print("="*100)
+
+    if dfs:
+        # Ensure we have the required columns for analysis
+        required_columns = ['Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org', 'Suplr_Prvdr_Spclty_Desc',
+                            'Suplr_Prvdr_State_Abrvtn', 'Suplr_Sbmtd_Chrgs', 'Suplr_Mdcr_Pymt_Amt',
+                            'Tot_Suplr_Clms', 'Tot_Suplr_Srvcs']
+
+        # Check if all required columns exist in the combined dataframe
+        missing_columns = [
+            col for col in required_columns if col not in combined_df.columns]
+        if missing_columns:
+            print(
+                f"Warning: Missing columns needed for peer group analysis: {missing_columns}")
+            print("Skipping peer group analysis.")
+        else:
+            # Calculate aggregated metrics by supplier for analysis
+            supplier_metrics = combined_df.groupby(['Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org',
+                                                    'Suplr_Prvdr_Spclty_Desc', 'Suplr_Prvdr_State_Abrvtn']).agg({
+                                                        'Suplr_Sbmtd_Chrgs': 'sum',
+                                                        'Suplr_Mdcr_Pymt_Amt': 'sum',
+                                                        'Tot_Suplr_Clms': 'sum',
+                                                        'Tot_Suplr_Srvcs': 'sum'
+                                                    }).reset_index()
+
+            # Add derived metrics
+            supplier_metrics['Avg_Chrg_Per_Clm'] = supplier_metrics['Suplr_Sbmtd_Chrgs'] / \
+                supplier_metrics['Tot_Suplr_Clms']
+            supplier_metrics['Avg_Pymt_Per_Clm'] = supplier_metrics['Suplr_Mdcr_Pymt_Amt'] / \
+                supplier_metrics['Tot_Suplr_Clms']
+            supplier_metrics['Avg_Srvcs_Per_Clm'] = supplier_metrics['Tot_Suplr_Srvcs'] / \
+                supplier_metrics['Tot_Suplr_Clms']
+
+            # 1. Analysis by Specialty
+            print("\nAnalysis by Specialty:")
+            print("-" * 50)
+
+            # Get the specialties with at least 5 suppliers for meaningful comparison
+            specialty_counts = supplier_metrics['Suplr_Prvdr_Spclty_Desc'].value_counts(
+            )
+            valid_specialties = specialty_counts[specialty_counts >= 5].index.tolist(
+            )
+
+            if valid_specialties:
+                print(
+                    f"Found {len(valid_specialties)} specialties with at least 5 suppliers for peer comparison.")
+
+                # Calculate peer group metrics for each specialty
+                peer_specialty_metrics = supplier_metrics[supplier_metrics['Suplr_Prvdr_Spclty_Desc'].isin(valid_specialties)].groupby(
+                    'Suplr_Prvdr_Spclty_Desc').agg({
+                        'Suplr_Sbmtd_Chrgs': ['median', 'mean', 'std'],
+                        'Suplr_Mdcr_Pymt_Amt': ['median', 'mean', 'std'],
+                        'Tot_Suplr_Clms': ['median', 'mean', 'std'],
+                        'Tot_Suplr_Srvcs': ['median', 'mean', 'std'],
+                        'Avg_Chrg_Per_Clm': ['median', 'mean', 'std'],
+                        'Avg_Pymt_Per_Clm': ['median', 'mean', 'std'],
+                        'Avg_Srvcs_Per_Clm': ['median', 'mean', 'std']
+                    })
+
+                # Find outliers within each specialty (suppliers with metrics > 3x the median)
+                outliers_by_specialty = []
+
+                for specialty in valid_specialties:
+                    specialty_group = supplier_metrics[supplier_metrics['Suplr_Prvdr_Spclty_Desc'] == specialty]
+                    specialty_medians = peer_specialty_metrics.loc[specialty]
+
+                    # Check for outliers in claims, charges, and payments
+                    claim_outliers = specialty_group[specialty_group['Tot_Suplr_Clms']
+                                                     > 3 * specialty_medians[('Tot_Suplr_Clms', 'median')]]
+                    charge_outliers = specialty_group[specialty_group['Suplr_Sbmtd_Chrgs']
+                                                      > 3 * specialty_medians[('Suplr_Sbmtd_Chrgs', 'median')]]
+                    payment_outliers = specialty_group[specialty_group['Suplr_Mdcr_Pymt_Amt']
+                                                       > 3 * specialty_medians[('Suplr_Mdcr_Pymt_Amt', 'median')]]
+
+                    # Find suppliers that are outliers in at least two categories
+                    all_outliers = pd.concat([
+                        claim_outliers[['Suplr_NPI']].assign(metric='claims'),
+                        charge_outliers[['Suplr_NPI']].assign(
+                            metric='charges'),
+                        payment_outliers[['Suplr_NPI']].assign(
+                            metric='payments')
+                    ])
+
+                    outlier_counts = all_outliers.groupby('Suplr_NPI').size()
+                    multiple_outliers = outlier_counts[outlier_counts >= 2].index.tolist(
+                    )
+
+                    if multiple_outliers:
+                        for npi in multiple_outliers:
+                            supplier = specialty_group[specialty_group['Suplr_NPI']
+                                                       == npi].iloc[0]
+                            outliers_by_specialty.append({
+                                'NPI': npi,
+                                'Name': supplier['Suplr_Prvdr_Last_Name_Org'],
+                                'Specialty': specialty,
+                                'State': supplier['Suplr_Prvdr_State_Abrvtn'],
+                                'Total_Claims': supplier['Tot_Suplr_Clms'],
+                                'Claim_Ratio': supplier['Tot_Suplr_Clms'] / specialty_medians[('Tot_Suplr_Clms', 'median')],
+                                'Total_Charges': supplier['Suplr_Sbmtd_Chrgs'],
+                                'Charge_Ratio': supplier['Suplr_Sbmtd_Chrgs'] / specialty_medians[('Suplr_Sbmtd_Chrgs', 'median')],
+                                'Total_Payments': supplier['Suplr_Mdcr_Pymt_Amt'],
+                                'Payment_Ratio': supplier['Suplr_Mdcr_Pymt_Amt'] / specialty_medians[('Suplr_Mdcr_Pymt_Amt', 'median')]
+                            })
+
+                # Display the top outliers by specialty
+                if outliers_by_specialty:
+                    # Sort by highest combined ratio (sum of all ratios)
+                    for outlier in sorted(outliers_by_specialty,
+                                          key=lambda x: (
+                                              x['Claim_Ratio'] + x['Charge_Ratio'] + x['Payment_Ratio']),
+                                          reverse=True)[:10]:
+                        print(
+                            f"\n**{outlier['Name']}** (NPI: {outlier['NPI']})")
+                        print(
+                            f"  Specialty: {outlier['Specialty']} | State: {outlier['State']}")
+                        print(
+                            f"  Total Claims: {outlier['Total_Claims']:.0f} ({outlier['Claim_Ratio']:.1f}x specialty median)")
+
+                        # Format monetary values
+                        charges_str = format_dollar_amount(
+                            outlier['Total_Charges'])
+                        payments_str = format_dollar_amount(
+                            outlier['Total_Payments'])
+
+                        print(
+                            f"  Total Charges: {charges_str} ({outlier['Charge_Ratio']:.1f}x specialty median)")
+                        print(
+                            f"  Total Payments: {payments_str} ({outlier['Payment_Ratio']:.1f}x specialty median)")
+                else:
+                    print("No significant specialty outliers found.")
+            else:
+                print(
+                    "No specialties with enough suppliers for meaningful peer comparison.")
+
+            # 2. Analysis by State
+            print("\nAnalysis by State:")
+            print("-" * 50)
+
+            # Get the states with at least 5 suppliers for meaningful comparison
+            state_counts = supplier_metrics['Suplr_Prvdr_State_Abrvtn'].value_counts(
+            )
+            valid_states = state_counts[state_counts >= 5].index.tolist()
+
+            if valid_states:
+                print(
+                    f"Found {len(valid_states)} states with at least 5 suppliers for peer comparison.")
+
+                # Calculate peer group metrics for each state
+                peer_state_metrics = supplier_metrics[supplier_metrics['Suplr_Prvdr_State_Abrvtn'].isin(valid_states)].groupby(
+                    'Suplr_Prvdr_State_Abrvtn').agg({
+                        'Suplr_Sbmtd_Chrgs': ['median', 'mean', 'std'],
+                        'Suplr_Mdcr_Pymt_Amt': ['median', 'mean', 'std'],
+                        'Tot_Suplr_Clms': ['median', 'mean', 'std'],
+                        'Tot_Suplr_Srvcs': ['median', 'mean', 'std'],
+                        'Avg_Chrg_Per_Clm': ['median', 'mean', 'std'],
+                        'Avg_Pymt_Per_Clm': ['median', 'mean', 'std'],
+                        'Avg_Srvcs_Per_Clm': ['median', 'mean', 'std']
+                    })
+
+                # Find outliers within each state (suppliers with metrics > 3x the median)
+                outliers_by_state = []
+
+                for state in valid_states:
+                    state_group = supplier_metrics[supplier_metrics['Suplr_Prvdr_State_Abrvtn'] == state]
+                    state_medians = peer_state_metrics.loc[state]
+
+                    # Check for outliers in claims, charges, and payments
+                    claim_outliers = state_group[state_group['Tot_Suplr_Clms']
+                                                 > 3 * state_medians[('Tot_Suplr_Clms', 'median')]]
+                    charge_outliers = state_group[state_group['Suplr_Sbmtd_Chrgs']
+                                                  > 3 * state_medians[('Suplr_Sbmtd_Chrgs', 'median')]]
+                    payment_outliers = state_group[state_group['Suplr_Mdcr_Pymt_Amt']
+                                                   > 3 * state_medians[('Suplr_Mdcr_Pymt_Amt', 'median')]]
+
+                    # Find suppliers that are outliers in at least two categories
+                    all_outliers = pd.concat([
+                        claim_outliers[['Suplr_NPI']].assign(metric='claims'),
+                        charge_outliers[['Suplr_NPI']].assign(
+                            metric='charges'),
+                        payment_outliers[['Suplr_NPI']].assign(
+                            metric='payments')
+                    ])
+
+                    outlier_counts = all_outliers.groupby('Suplr_NPI').size()
+                    multiple_outliers = outlier_counts[outlier_counts >= 2].index.tolist(
+                    )
+
+                    if multiple_outliers:
+                        for npi in multiple_outliers:
+                            supplier = state_group[state_group['Suplr_NPI']
+                                                   == npi].iloc[0]
+                            outliers_by_state.append({
+                                'NPI': npi,
+                                'Name': supplier['Suplr_Prvdr_Last_Name_Org'],
+                                'Specialty': supplier['Suplr_Prvdr_Spclty_Desc'],
+                                'State': state,
+                                'Total_Claims': supplier['Tot_Suplr_Clms'],
+                                'Claim_Ratio': supplier['Tot_Suplr_Clms'] / state_medians[('Tot_Suplr_Clms', 'median')],
+                                'Total_Charges': supplier['Suplr_Sbmtd_Chrgs'],
+                                'Charge_Ratio': supplier['Suplr_Sbmtd_Chrgs'] / state_medians[('Suplr_Sbmtd_Chrgs', 'median')],
+                                'Total_Payments': supplier['Suplr_Mdcr_Pymt_Amt'],
+                                'Payment_Ratio': supplier['Suplr_Mdcr_Pymt_Amt'] / state_medians[('Suplr_Mdcr_Pymt_Amt', 'median')]
+                            })
+
+                # Display the top outliers by state
+                if outliers_by_state:
+                    # Sort by highest combined ratio (sum of all ratios)
+                    for outlier in sorted(outliers_by_state,
+                                          key=lambda x: (
+                                              x['Claim_Ratio'] + x['Charge_Ratio'] + x['Payment_Ratio']),
+                                          reverse=True)[:10]:
+                        print(
+                            f"\n**{outlier['Name']}** (NPI: {outlier['NPI']})")
+                        print(
+                            f"  State: {outlier['State']} | Specialty: {outlier['Specialty']}")
+                        print(
+                            f"  Total Claims: {outlier['Total_Claims']:.0f} ({outlier['Claim_Ratio']:.1f}x state median)")
+
+                        # Format monetary values
+                        charges_str = format_dollar_amount(
+                            outlier['Total_Charges'])
+                        payments_str = format_dollar_amount(
+                            outlier['Total_Payments'])
+
+                        print(
+                            f"  Total Charges: {charges_str} ({outlier['Charge_Ratio']:.1f}x state median)")
+                        print(
+                            f"  Total Payments: {payments_str} ({outlier['Payment_Ratio']:.1f}x state median)")
+                else:
+                    print("No significant state outliers found.")
+            else:
+                print("No states with enough suppliers for meaningful peer comparison.")
+
+            # 3. Combined specialty-state analysis for the most precise peer grouping
+            print("\nAnalysis by Combined Specialty-State Groups:")
+            print("-" * 50)
+
+            # Create specialty-state combination for more precise peer groups
+            supplier_metrics['Specialty_State'] = supplier_metrics['Suplr_Prvdr_Spclty_Desc'] + \
+                ' - ' + supplier_metrics['Suplr_Prvdr_State_Abrvtn']
+
+            # Get specialty-state combinations with at least 5 suppliers
+            specialty_state_counts = supplier_metrics['Specialty_State'].value_counts(
+            )
+            valid_specialty_states = specialty_state_counts[specialty_state_counts >= 5].index.tolist(
+            )
+
+            if valid_specialty_states:
+                print(
+                    f"Found {len(valid_specialty_states)} specialty-state combinations with at least 5 suppliers.")
+
+                # Calculate metrics for each specialty-state combination
+                peer_combined_metrics = supplier_metrics[supplier_metrics['Specialty_State'].isin(valid_specialty_states)].groupby(
+                    'Specialty_State').agg({
+                        'Suplr_Sbmtd_Chrgs': ['median', 'mean', 'std'],
+                        'Suplr_Mdcr_Pymt_Amt': ['median', 'mean', 'std'],
+                        'Tot_Suplr_Clms': ['median', 'mean', 'std'],
+                        'Tot_Suplr_Srvcs': ['median', 'mean', 'std'],
+                        'Avg_Chrg_Per_Clm': ['median', 'mean', 'std'],
+                        'Avg_Pymt_Per_Clm': ['median', 'mean', 'std'],
+                        'Avg_Srvcs_Per_Clm': ['median', 'mean', 'std']
+                    })
+
+                # Find outliers within each specialty-state group
+                outliers_combined = []
+
+                for group in valid_specialty_states:
+                    combined_group = supplier_metrics[supplier_metrics['Specialty_State'] == group]
+                    combined_medians = peer_combined_metrics.loc[group]
+
+                    # Check for outliers in claims, charges, and payments
+                    claim_outliers = combined_group[combined_group['Tot_Suplr_Clms']
+                                                    > 3 * combined_medians[('Tot_Suplr_Clms', 'median')]]
+                    charge_outliers = combined_group[combined_group['Suplr_Sbmtd_Chrgs']
+                                                     > 3 * combined_medians[('Suplr_Sbmtd_Chrgs', 'median')]]
+                    payment_outliers = combined_group[combined_group['Suplr_Mdcr_Pymt_Amt']
+                                                      > 3 * combined_medians[('Suplr_Mdcr_Pymt_Amt', 'median')]]
+
+                    # Find suppliers that are outliers in at least two categories
+                    all_outliers = pd.concat([
+                        claim_outliers[['Suplr_NPI']].assign(metric='claims'),
+                        charge_outliers[['Suplr_NPI']].assign(
+                            metric='charges'),
+                        payment_outliers[['Suplr_NPI']].assign(
+                            metric='payments')
+                    ])
+
+                    outlier_counts = all_outliers.groupby('Suplr_NPI').size()
+                    multiple_outliers = outlier_counts[outlier_counts >= 2].index.tolist(
+                    )
+
+                    if multiple_outliers:
+                        for npi in multiple_outliers:
+                            supplier = combined_group[combined_group['Suplr_NPI']
+                                                      == npi].iloc[0]
+                            outliers_combined.append({
+                                'NPI': npi,
+                                'Name': supplier['Suplr_Prvdr_Last_Name_Org'],
+                                'Specialty': supplier['Suplr_Prvdr_Spclty_Desc'],
+                                'State': supplier['Suplr_Prvdr_State_Abrvtn'],
+                                'Group': group,
+                                'Total_Claims': supplier['Tot_Suplr_Clms'],
+                                'Claim_Ratio': supplier['Tot_Suplr_Clms'] / combined_medians[('Tot_Suplr_Clms', 'median')],
+                                'Total_Charges': supplier['Suplr_Sbmtd_Chrgs'],
+                                'Charge_Ratio': supplier['Suplr_Sbmtd_Chrgs'] / combined_medians[('Suplr_Sbmtd_Chrgs', 'median')],
+                                'Total_Payments': supplier['Suplr_Mdcr_Pymt_Amt'],
+                                'Payment_Ratio': supplier['Suplr_Mdcr_Pymt_Amt'] / combined_medians[('Suplr_Mdcr_Pymt_Amt', 'median')]
+                            })
+
+                # Display the top outliers by combined group
+                if outliers_combined:
+                    print(
+                        "\nMost Significant Outliers by Combined Specialty-State Group:")
+                    # Sort by highest combined ratio (sum of all ratios)
+                    for outlier in sorted(outliers_combined,
+                                          key=lambda x: (
+                                              x['Claim_Ratio'] + x['Charge_Ratio'] + x['Payment_Ratio']),
+                                          reverse=True)[:10]:
+                        print(
+                            f"\n**{outlier['Name']}** (NPI: {outlier['NPI']})")
+                        print(
+                            f"  Specialty: {outlier['Specialty']} | State: {outlier['State']}")
+                        print(
+                            f"  Total Claims: {outlier['Total_Claims']:.0f} ({outlier['Claim_Ratio']:.1f}x peer group median)")
+
+                        # Format monetary values
+                        charges_str = format_dollar_amount(
+                            outlier['Total_Charges'])
+                        payments_str = format_dollar_amount(
+                            outlier['Total_Payments'])
+
+                        print(
+                            f"  Total Charges: {charges_str} ({outlier['Charge_Ratio']:.1f}x peer group median)")
+                        print(
+                            f"  Total Payments: {payments_str} ({outlier['Payment_Ratio']:.1f}x peer group median)")
+                else:
+                    print("No significant combined specialty-state outliers found.")
+            else:
+                print(
+                    "No specialty-state combinations with enough suppliers for meaningful peer comparison.")
+else:
+    print("No data was loaded. Please check if the CSV files exist.")
+
+
+print("Stopping here")
diff --git a/dme_analysis.ipynb b/dme_analysis.ipynb
index 9638289..0432eb2 100644
--- a/dme_analysis.ipynb
+++ b/dme_analysis.ipynb
@@ -3,25 +3,2796 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "id": "cad336dc-cdc4-4dc0-8a6a-e4df4cacea45",
+   "id": "373321ec",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# 1. Imports & Settings\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import glob\n",
+    "import locale\n",
+    "from dme_dictionary import DATA_DICTIONARY  # Assuming you have a Python file that defines DATA_DICTIONARY\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.rcParams[\"figure.figsize\"] = (8, 5)\n",
+    "\n",
+    "# Set locale for currency formatting if desired\n",
+    "locale.setlocale(locale.LC_ALL, '')\n",
+    "\n",
+    "# Pandas display options\n",
+    "pd.set_option('display.max_columns', None)  # Show all columns\n",
+    "pd.set_option('display.width', None)        # Avoid wrapping output\n",
+    "pd.set_option('display.expand_frame_repr', False)  # Single-line output for wide DataFrames"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "68dda7d8",
+   "metadata": {},
+   "source": [
+    "# Medicare DME Supplier Analysis\n",
+    "\n",
+    "This notebook demonstrates how to:\n",
+    "1. Load Medicare Durable Medical Equipment (DME) supplier data spanning multiple years (2018–2022).\n",
+    "2. Analyze key metrics (submitted charges, Medicare payments, beneficiary counts) over time.\n",
+    "3. Compute year-over-year growth rates and identify significant spikes.\n",
+    "4. Examine high submitted vs. low allowed or paid amounts.\n",
+    "5. Perform peer-group analyses by specialty, state, and combined specialty–state.\n",
+    "\n",
+    "We'll highlight outliers that may be worth investigating for potential fraud or anomalies."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9bc5b0ae",
+   "metadata": {},
+   "source": [
+    "## 2. Data Loading\n",
+    "We'll load each year's CSV file from 2018 to 2022, then combine them into a single DataFrame."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c40d3ac0",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading data from data/2018/mup_dme_ry24_p05_v10_dy18_supr.csv\n",
+      "Loading data from data/2019/mup_dme_ry24_p05_v10_dy19_supr.csv\n",
+      "Loading data from data/2020/mup_dme_ry24_p05_v10_dy20_supr.csv\n",
+      "Loading data from data/2021/mup_dme_ry24_p05_v10_dy21_supr.csv\n",
+      "Loading data from data/2022/mup_dme_ry24_p05_v10_dy22_supr.csv\n",
+      "\n",
+      "Combined DataFrame shape: (352611, 95)\n"
+     ]
+    }
+   ],
+   "source": [
+    "data_dir = 'data'       # Adjust if your data folder is elsewhere\n",
+    "years = range(2018, 2023)  # 2018 to 2022\n",
+    "\n",
+    "dfs = []\n",
+    "\n",
+    "for year in years:\n",
+    "    csv_files = glob.glob(f\"{data_dir}/{year}/*.csv\")\n",
+    "    if not csv_files:\n",
+    "        print(f\"No CSV files found for year {year}\")\n",
+    "        continue\n",
+    "    \n",
+    "    # Take the first CSV found\n",
+    "    csv_file = csv_files[0]\n",
+    "    print(f\"Loading data from {csv_file}\")\n",
+    "    \n",
+    "    # Read the CSV, then add a 'year' column\n",
+    "    df = pd.read_csv(csv_file, low_memory=False)\n",
+    "    df['year'] = year\n",
+    "    \n",
+    "    dfs.append(df)\n",
+    "\n",
+    "if dfs:\n",
+    "    combined_df = pd.concat(dfs, ignore_index=True)\n",
+    "    print(f\"\\nCombined DataFrame shape: {combined_df.shape}\")\n",
+    "else:\n",
+    "    combined_df = pd.DataFrame()\n",
+    "    print(\"No data was loaded.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "37a28f37",
+   "metadata": {},
+   "source": [
+    "### Basic Exploration\n",
+    "Let's do a quick look at the combined DataFrame's structure, and ensure we have the columns we expect."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "c7036199",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "First few rows:\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Suplr_NPI</th>\n",
+       "      <th>Suplr_Prvdr_Last_Name_Org</th>\n",
+       "      <th>Suplr_Prvdr_First_Name</th>\n",
+       "      <th>Suplr_Prvdr_MI</th>\n",
+       "      <th>Suplr_Prvdr_Crdntls</th>\n",
+       "      <th>Suplr_Prvdr_Gndr</th>\n",
+       "      <th>Suplr_Prvdr_Ent_Cd</th>\n",
+       "      <th>Suplr_Prvdr_St1</th>\n",
+       "      <th>Suplr_Prvdr_St2</th>\n",
+       "      <th>Suplr_Prvdr_City</th>\n",
+       "      <th>Suplr_Prvdr_State_Abrvtn</th>\n",
+       "      <th>Suplr_Prvdr_State_FIPS</th>\n",
+       "      <th>Suplr_Prvdr_Zip5</th>\n",
+       "      <th>Suplr_Prvdr_RUCA</th>\n",
+       "      <th>Suplr_Prvdr_RUCA_Desc</th>\n",
+       "      <th>Suplr_Prvdr_Cntry</th>\n",
+       "      <th>Suplr_Prvdr_Spclty_Desc</th>\n",
+       "      <th>Suplr_Prvdr_Spclty_Srce</th>\n",
+       "      <th>Tot_Suplr_HCPCS_Cds</th>\n",
+       "      <th>Tot_Suplr_Benes</th>\n",
+       "      <th>Tot_Suplr_Clms</th>\n",
+       "      <th>Tot_Suplr_Srvcs</th>\n",
+       "      <th>Suplr_Sbmtd_Chrgs</th>\n",
+       "      <th>Suplr_Mdcr_Alowd_Amt</th>\n",
+       "      <th>Suplr_Mdcr_Pymt_Amt</th>\n",
+       "      <th>Suplr_Mdcr_Stdzd_Pymt_Amt</th>\n",
+       "      <th>DME_Sprsn_Ind</th>\n",
+       "      <th>DME_Tot_Suplr_HCPCS_Cds</th>\n",
+       "      <th>DME_Tot_Suplr_Benes</th>\n",
+       "      <th>DME_Tot_Suplr_Clms</th>\n",
+       "      <th>DME_Tot_Suplr_Srvcs</th>\n",
+       "      <th>DME_Suplr_Sbmtd_Chrgs</th>\n",
+       "      <th>DME_Suplr_Mdcr_Alowd_Amt</th>\n",
+       "      <th>DME_Suplr_Mdcr_Pymt_Amt</th>\n",
+       "      <th>DME_Suplr_Mdcr_Stdzd_Pymt_Amt</th>\n",
+       "      <th>POS_Sprsn_Ind</th>\n",
+       "      <th>POS_Tot_Suplr_HCPCS_Cds</th>\n",
+       "      <th>POS_Tot_Suplr_Benes</th>\n",
+       "      <th>POS_Tot_Suplr_Clms</th>\n",
+       "      <th>POS_Tot_Suplr_Srvcs</th>\n",
+       "      <th>POS_Suplr_Sbmtd_Chrgs</th>\n",
+       "      <th>POS_Suplr_Mdcr_Alowd_Amt</th>\n",
+       "      <th>POS_Suplr_Mdcr_Pymt_Amt</th>\n",
+       "      <th>POS_Suplr_Mdcr_Stdzd_Pymt_Amt</th>\n",
+       "      <th>Drug_Sprsn_Ind</th>\n",
+       "      <th>Drug_Tot_Suplr_HCPCS_Cds</th>\n",
+       "      <th>Drug_Tot_Suplr_Benes</th>\n",
+       "      <th>Drug_Tot_Suplr_Clms</th>\n",
+       "      <th>Drug_Tot_Suplr_Srvcs</th>\n",
+       "      <th>Drug_Suplr_Sbmtd_Chrgs</th>\n",
+       "      <th>Drug_Suplr_Mdcr_Alowd_Amt</th>\n",
+       "      <th>Drug_Suplr_Mdcr_Pymt_Amt</th>\n",
+       "      <th>Drug_Suplr_Mdcr_Stdzd_Pymt_Amt</th>\n",
+       "      <th>Bene_Avg_Age</th>\n",
+       "      <th>Bene_Age_LT_65_Cnt</th>\n",
+       "      <th>Bene_Age_65_74_Cnt</th>\n",
+       "      <th>Bene_Age_75_84_Cnt</th>\n",
+       "      <th>Bene_Age_GT_84_Cnt</th>\n",
+       "      <th>Bene_Feml_Cnt</th>\n",
+       "      <th>Bene_Male_Cnt</th>\n",
+       "      <th>Bene_Race_Wht_Cnt</th>\n",
+       "      <th>Bene_Race_Black_Cnt</th>\n",
+       "      <th>Bene_Race_Api_Cnt</th>\n",
+       "      <th>Bene_Race_Hspnc_Cnt</th>\n",
+       "      <th>Bene_Race_Natind_Cnt</th>\n",
+       "      <th>Bene_Race_Othr_Cnt</th>\n",
+       "      <th>Bene_Ndual_Cnt</th>\n",
+       "      <th>Bene_Dual_Cnt</th>\n",
+       "      <th>Bene_CC_BH_ADHD_OthCD_V1_Pct</th>\n",
+       "      <th>Bene_CC_BH_Alcohol_Drug_V1_Pct</th>\n",
+       "      <th>Bene_CC_BH_Tobacco_V1_Pct</th>\n",
+       "      <th>Bene_CC_BH_Alz_NonAlzdem_V2_Pct</th>\n",
+       "      <th>Bene_CC_BH_Anxiety_V1_Pct</th>\n",
+       "      <th>Bene_CC_BH_Bipolar_V1_Pct</th>\n",
+       "      <th>Bene_CC_BH_Mood_V2_Pct</th>\n",
+       "      <th>Bene_CC_BH_Depress_V1_Pct</th>\n",
+       "      <th>Bene_CC_BH_PD_V1_Pct</th>\n",
+       "      <th>Bene_CC_BH_PTSD_V1_Pct</th>\n",
+       "      <th>Bene_CC_BH_Schizo_OthPsy_V1_Pct</th>\n",
+       "      <th>Bene_CC_PH_Asthma_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Afib_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Cancer6_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_CKD_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_COPD_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Diabetes_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_HF_NonIHD_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Hyperlipidemia_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Hypertension_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_IschemicHeart_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Osteoporosis_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Parkinson_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Arthritis_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Stroke_TIA_V2_Pct</th>\n",
+       "      <th>Bene_Avg_Risk_Scre</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1003000399</td>\n",
+       "      <td>Reconstructive Hand To Shoulder Of Indiana, Llc</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>O</td>\n",
+       "      <td>13431 Old Meridian Street</td>\n",
+       "      <td>Suite 225</td>\n",
+       "      <td>Carmel</td>\n",
+       "      <td>IN</td>\n",
+       "      <td>18</td>\n",
+       "      <td>46032</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>Metropolitan area core: primary flow within an...</td>\n",
+       "      <td>US</td>\n",
+       "      <td>General Surgery</td>\n",
+       "      <td>Claim-Specialty</td>\n",
+       "      <td>15</td>\n",
+       "      <td>235.0</td>\n",
+       "      <td>301</td>\n",
+       "      <td>340</td>\n",
+       "      <td>83033.00</td>\n",
+       "      <td>70600.40</td>\n",
+       "      <td>54545.85</td>\n",
+       "      <td>56320.86</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>235.0</td>\n",
+       "      <td>301.0</td>\n",
+       "      <td>340.0</td>\n",
+       "      <td>83033.00</td>\n",
+       "      <td>70600.40</td>\n",
+       "      <td>54545.85</td>\n",
+       "      <td>56320.86</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>72.862661</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>120.0</td>\n",
+       "      <td>74.0</td>\n",
+       "      <td>21.0</td>\n",
+       "      <td>148.0</td>\n",
+       "      <td>87.0</td>\n",
+       "      <td>222.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>220.0</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.051064</td>\n",
+       "      <td>0.102128</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.200000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.255319</td>\n",
+       "      <td>0.234043</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.085106</td>\n",
+       "      <td>0.085106</td>\n",
+       "      <td>0.131915</td>\n",
+       "      <td>0.102128</td>\n",
+       "      <td>0.165957</td>\n",
+       "      <td>0.217021</td>\n",
+       "      <td>0.093617</td>\n",
+       "      <td>0.676596</td>\n",
+       "      <td>0.668085</td>\n",
+       "      <td>0.229787</td>\n",
+       "      <td>0.144681</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.646809</td>\n",
+       "      <td>0.046809</td>\n",
+       "      <td>0.975801</td>\n",
+       "      <td>2018</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1003000845</td>\n",
+       "      <td>James D.Schlenker Mdsc</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>O</td>\n",
+       "      <td>6311 W 95th St</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Oak Lawn</td>\n",
+       "      <td>IL</td>\n",
+       "      <td>17</td>\n",
+       "      <td>60453</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>Metropolitan area core: primary flow within an...</td>\n",
+       "      <td>US</td>\n",
+       "      <td>Plastic and Reconstructive Surgery</td>\n",
+       "      <td>Claim-Specialty</td>\n",
+       "      <td>8</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>22</td>\n",
+       "      <td>22</td>\n",
+       "      <td>4168.00</td>\n",
+       "      <td>4034.22</td>\n",
+       "      <td>3138.12</td>\n",
+       "      <td>4635.72</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>4168.00</td>\n",
+       "      <td>4034.22</td>\n",
+       "      <td>3138.12</td>\n",
+       "      <td>4635.72</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>74.631579</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>16.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.842105</td>\n",
+       "      <td>0.736842</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.736842</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.065053</td>\n",
+       "      <td>2018</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1003001934</td>\n",
+       "      <td>Yi Rui International Corp</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>O</td>\n",
+       "      <td>4307 8th Ave</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Brooklyn</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>36</td>\n",
+       "      <td>11232</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>Metropolitan area core: primary flow within an...</td>\n",
+       "      <td>US</td>\n",
+       "      <td>Pharmacy</td>\n",
+       "      <td>Claim-Specialty</td>\n",
+       "      <td>5</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>37</td>\n",
+       "      <td>796</td>\n",
+       "      <td>2739.60</td>\n",
+       "      <td>549.08</td>\n",
+       "      <td>339.39</td>\n",
+       "      <td>407.47</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>46.0</td>\n",
+       "      <td>2448.28</td>\n",
+       "      <td>512.46</td>\n",
+       "      <td>321.75</td>\n",
+       "      <td>389.83</td>\n",
+       "      <td>#</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>*</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>75.285714</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3.668578</td>\n",
+       "      <td>2018</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1003002254</td>\n",
+       "      <td>Walgreen Co.</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>O</td>\n",
+       "      <td>5104 Bobby Hicks Hwy</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Gray</td>\n",
+       "      <td>TN</td>\n",
+       "      <td>47</td>\n",
+       "      <td>37615</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>Metropolitan area core: primary flow within an...</td>\n",
+       "      <td>US</td>\n",
+       "      <td>Centralized Flu</td>\n",
+       "      <td>Claim-Specialty</td>\n",
+       "      <td>10</td>\n",
+       "      <td>56.0</td>\n",
+       "      <td>150</td>\n",
+       "      <td>3681</td>\n",
+       "      <td>31078.36</td>\n",
+       "      <td>5276.87</td>\n",
+       "      <td>3699.85</td>\n",
+       "      <td>3835.41</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>56.0</td>\n",
+       "      <td>148.0</td>\n",
+       "      <td>390.0</td>\n",
+       "      <td>26475.28</td>\n",
+       "      <td>3226.01</td>\n",
+       "      <td>2111.05</td>\n",
+       "      <td>2246.61</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>3291.0</td>\n",
+       "      <td>4603.08</td>\n",
+       "      <td>2050.86</td>\n",
+       "      <td>1588.8</td>\n",
+       "      <td>1588.8</td>\n",
+       "      <td>71.240000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>28.0</td>\n",
+       "      <td>16.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>37.0</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>56.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.214286</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.285714</td>\n",
+       "      <td>0.267857</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.196429</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.196429</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.821429</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.714286</td>\n",
+       "      <td>0.839286</td>\n",
+       "      <td>0.232143</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.446429</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.171945</td>\n",
+       "      <td>2018</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1003002767</td>\n",
+       "      <td>Thomas J Mcelligott Md Pc</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>O</td>\n",
+       "      <td>2415 Wall St Se</td>\n",
+       "      <td>Suite B</td>\n",
+       "      <td>Conyers</td>\n",
+       "      <td>GA</td>\n",
+       "      <td>13</td>\n",
+       "      <td>30013</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>Metropolitan area core: primary flow within an...</td>\n",
+       "      <td>US</td>\n",
+       "      <td>Orthopedic Surgery</td>\n",
+       "      <td>Claim-Specialty</td>\n",
+       "      <td>10</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>44</td>\n",
+       "      <td>45</td>\n",
+       "      <td>4920.71</td>\n",
+       "      <td>4808.81</td>\n",
+       "      <td>3344.82</td>\n",
+       "      <td>3420.32</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>44.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>4920.71</td>\n",
+       "      <td>4808.81</td>\n",
+       "      <td>3344.82</td>\n",
+       "      <td>3420.32</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>73.727273</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>13.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.631579</td>\n",
+       "      <td>0.631579</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.447368</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.302857</td>\n",
+       "      <td>2018</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    Suplr_NPI                        Suplr_Prvdr_Last_Name_Org Suplr_Prvdr_First_Name Suplr_Prvdr_MI Suplr_Prvdr_Crdntls Suplr_Prvdr_Gndr Suplr_Prvdr_Ent_Cd            Suplr_Prvdr_St1 Suplr_Prvdr_St2 Suplr_Prvdr_City Suplr_Prvdr_State_Abrvtn Suplr_Prvdr_State_FIPS  Suplr_Prvdr_Zip5  Suplr_Prvdr_RUCA                              Suplr_Prvdr_RUCA_Desc Suplr_Prvdr_Cntry             Suplr_Prvdr_Spclty_Desc Suplr_Prvdr_Spclty_Srce  Tot_Suplr_HCPCS_Cds  Tot_Suplr_Benes  Tot_Suplr_Clms  Tot_Suplr_Srvcs  Suplr_Sbmtd_Chrgs  Suplr_Mdcr_Alowd_Amt  Suplr_Mdcr_Pymt_Amt  Suplr_Mdcr_Stdzd_Pymt_Amt DME_Sprsn_Ind  DME_Tot_Suplr_HCPCS_Cds  DME_Tot_Suplr_Benes  DME_Tot_Suplr_Clms  DME_Tot_Suplr_Srvcs  DME_Suplr_Sbmtd_Chrgs  DME_Suplr_Mdcr_Alowd_Amt  DME_Suplr_Mdcr_Pymt_Amt  DME_Suplr_Mdcr_Stdzd_Pymt_Amt POS_Sprsn_Ind  POS_Tot_Suplr_HCPCS_Cds  POS_Tot_Suplr_Benes  POS_Tot_Suplr_Clms  POS_Tot_Suplr_Srvcs  POS_Suplr_Sbmtd_Chrgs  POS_Suplr_Mdcr_Alowd_Amt  POS_Suplr_Mdcr_Pymt_Amt  POS_Suplr_Mdcr_Stdzd_Pymt_Amt Drug_Sprsn_Ind  Drug_Tot_Suplr_HCPCS_Cds  Drug_Tot_Suplr_Benes  Drug_Tot_Suplr_Clms  Drug_Tot_Suplr_Srvcs  Drug_Suplr_Sbmtd_Chrgs  Drug_Suplr_Mdcr_Alowd_Amt  Drug_Suplr_Mdcr_Pymt_Amt  Drug_Suplr_Mdcr_Stdzd_Pymt_Amt  Bene_Avg_Age  Bene_Age_LT_65_Cnt  Bene_Age_65_74_Cnt  Bene_Age_75_84_Cnt  Bene_Age_GT_84_Cnt  Bene_Feml_Cnt  Bene_Male_Cnt  Bene_Race_Wht_Cnt  Bene_Race_Black_Cnt  Bene_Race_Api_Cnt  Bene_Race_Hspnc_Cnt  Bene_Race_Natind_Cnt  Bene_Race_Othr_Cnt  Bene_Ndual_Cnt  Bene_Dual_Cnt  Bene_CC_BH_ADHD_OthCD_V1_Pct  Bene_CC_BH_Alcohol_Drug_V1_Pct  Bene_CC_BH_Tobacco_V1_Pct  Bene_CC_BH_Alz_NonAlzdem_V2_Pct  Bene_CC_BH_Anxiety_V1_Pct  Bene_CC_BH_Bipolar_V1_Pct  Bene_CC_BH_Mood_V2_Pct  Bene_CC_BH_Depress_V1_Pct  Bene_CC_BH_PD_V1_Pct  Bene_CC_BH_PTSD_V1_Pct  Bene_CC_BH_Schizo_OthPsy_V1_Pct  Bene_CC_PH_Asthma_V2_Pct  Bene_CC_PH_Afib_V2_Pct  Bene_CC_PH_Cancer6_V2_Pct  Bene_CC_PH_CKD_V2_Pct  Bene_CC_PH_COPD_V2_Pct  Bene_CC_PH_Diabetes_V2_Pct  Bene_CC_PH_HF_NonIHD_V2_Pct  Bene_CC_PH_Hyperlipidemia_V2_Pct  Bene_CC_PH_Hypertension_V2_Pct  Bene_CC_PH_IschemicHeart_V2_Pct  Bene_CC_PH_Osteoporosis_V2_Pct  Bene_CC_PH_Parkinson_V2_Pct  Bene_CC_PH_Arthritis_V2_Pct  Bene_CC_PH_Stroke_TIA_V2_Pct  Bene_Avg_Risk_Scre  year\n",
+       "0  1003000399  Reconstructive Hand To Shoulder Of Indiana, Llc                    NaN            NaN                 NaN              NaN                  O  13431 Old Meridian Street       Suite 225           Carmel                       IN                     18             46032               1.0  Metropolitan area core: primary flow within an...                US                     General Surgery         Claim-Specialty                   15            235.0             301              340           83033.00              70600.40             54545.85                   56320.86           NaN                      0.0                  0.0                 0.0                  0.0                   0.00                      0.00                     0.00                           0.00           NaN                     15.0                235.0               301.0                340.0               83033.00                  70600.40                 54545.85                       56320.86            NaN                       0.0                   0.0                  0.0                   0.0                    0.00                       0.00                       0.0                             0.0     72.862661                20.0               120.0                74.0                21.0          148.0           87.0              222.0                  NaN                NaN                  NaN                   0.0                 NaN           220.0           15.0                           NaN                        0.051064                   0.102128                              NaN                   0.200000                        NaN                0.255319                   0.234043                   NaN                     NaN                              NaN                  0.085106                0.085106                   0.131915               0.102128                0.165957                    0.217021                     0.093617                          0.676596                        0.668085                         0.229787                        0.144681                          0.0                     0.646809                      0.046809            0.975801  2018\n",
+       "1  1003000845                           James D.Schlenker Mdsc                    NaN            NaN                 NaN              NaN                  O             6311 W 95th St             NaN         Oak Lawn                       IL                     17             60453               1.0  Metropolitan area core: primary flow within an...                US  Plastic and Reconstructive Surgery         Claim-Specialty                    8             19.0              22               22            4168.00               4034.22              3138.12                    4635.72           NaN                      0.0                  0.0                 0.0                  0.0                   0.00                      0.00                     0.00                           0.00           NaN                      8.0                 19.0                22.0                 22.0                4168.00                   4034.22                  3138.12                        4635.72            NaN                       0.0                   0.0                  0.0                   0.0                    0.00                       0.00                       0.0                             0.0     74.631579                 0.0                11.0                 NaN                 NaN            NaN            NaN               16.0                  NaN                0.0                  NaN                   0.0                 NaN             NaN            NaN                           0.0                        0.000000                   0.000000                              NaN                        NaN                        0.0                     NaN                        NaN                   0.0                     0.0                              0.0                  0.000000                     NaN                        NaN                    NaN                     NaN                         NaN                     0.000000                          0.842105                        0.736842                              NaN                             NaN                          NaN                     0.736842                           NaN            1.065053  2018\n",
+       "2  1003001934                        Yi Rui International Corp                    NaN            NaN                 NaN              NaN                  O               4307 8th Ave             NaN         Brooklyn                       NY                     36             11232               1.0  Metropolitan area core: primary flow within an...                US                            Pharmacy         Claim-Specialty                    5              NaN              37              796            2739.60                549.08               339.39                     407.47           NaN                      4.0                  NaN                35.0                 46.0                2448.28                    512.46                   321.75                         389.83             #                      NaN                  NaN                 NaN                  NaN                    NaN                       NaN                      NaN                            NaN              *                       NaN                   NaN                  NaN                   NaN                     NaN                        NaN                       NaN                             NaN     75.285714                 NaN                 NaN                 NaN                 NaN            NaN            NaN                NaN                  NaN                NaN                  NaN                   NaN                 NaN             NaN            NaN                           NaN                             NaN                        NaN                              NaN                        NaN                        NaN                     NaN                        NaN                   NaN                     NaN                              NaN                       NaN                     NaN                        NaN                    NaN                     NaN                         NaN                          NaN                               NaN                             NaN                              NaN                             NaN                          NaN                          NaN                           NaN            3.668578  2018\n",
+       "3  1003002254                                     Walgreen Co.                    NaN            NaN                 NaN              NaN                  O       5104 Bobby Hicks Hwy             NaN             Gray                       TN                     47             37615               1.0  Metropolitan area core: primary flow within an...                US                     Centralized Flu         Claim-Specialty                   10             56.0             150             3681           31078.36               5276.87              3699.85                    3835.41           NaN                      6.0                 56.0               148.0                390.0               26475.28                   3226.01                  2111.05                        2246.61           NaN                      0.0                  0.0                 0.0                  0.0                   0.00                      0.00                     0.00                           0.00            NaN                       4.0                   NaN                 12.0                3291.0                 4603.08                    2050.86                    1588.8                          1588.8     71.240000                 NaN                28.0                16.0                 NaN           37.0           19.0               56.0                  0.0                0.0                  0.0                   0.0                 0.0            45.0           11.0                           0.0                             NaN                        NaN                              NaN                   0.214286                        NaN                0.285714                   0.267857                   0.0                     0.0                              0.0                  0.196429                     NaN                        NaN               0.196429                     NaN                    0.821429                          NaN                          0.714286                        0.839286                         0.232143                             NaN                          0.0                     0.446429                           NaN            1.171945  2018\n",
+       "4  1003002767                        Thomas J Mcelligott Md Pc                    NaN            NaN                 NaN              NaN                  O            2415 Wall St Se         Suite B          Conyers                       GA                     13             30013               1.0  Metropolitan area core: primary flow within an...                US                  Orthopedic Surgery         Claim-Specialty                   10             38.0              44               45            4920.71               4808.81              3344.82                    3420.32           NaN                      0.0                  0.0                 0.0                  0.0                   0.00                      0.00                     0.00                           0.00           NaN                     10.0                 38.0                44.0                 45.0                4920.71                   4808.81                  3344.82                        3420.32            NaN                       0.0                   0.0                  0.0                   0.0                    0.00                       0.00                       0.0                             0.0     73.727273                 NaN                15.0                13.0                 NaN           26.0           12.0               26.0                 11.0                NaN                  0.0                   0.0                 NaN             NaN            NaN                           0.0                             NaN                        NaN                              NaN                        NaN                        NaN                     NaN                        NaN                   0.0                     NaN                              NaN                       NaN                     NaN                        NaN                    NaN                     NaN                         NaN                          NaN                          0.631579                        0.631579                              NaN                             NaN                          0.0                     0.447368                           NaN            1.302857  2018"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Column Names:\n",
+      "['Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org', 'Suplr_Prvdr_First_Name', 'Suplr_Prvdr_MI', 'Suplr_Prvdr_Crdntls', 'Suplr_Prvdr_Gndr', 'Suplr_Prvdr_Ent_Cd', 'Suplr_Prvdr_St1', 'Suplr_Prvdr_St2', 'Suplr_Prvdr_City', 'Suplr_Prvdr_State_Abrvtn', 'Suplr_Prvdr_State_FIPS', 'Suplr_Prvdr_Zip5', 'Suplr_Prvdr_RUCA', 'Suplr_Prvdr_RUCA_Desc', 'Suplr_Prvdr_Cntry', 'Suplr_Prvdr_Spclty_Desc', 'Suplr_Prvdr_Spclty_Srce', 'Tot_Suplr_HCPCS_Cds', 'Tot_Suplr_Benes', 'Tot_Suplr_Clms', 'Tot_Suplr_Srvcs', 'Suplr_Sbmtd_Chrgs', 'Suplr_Mdcr_Alowd_Amt', 'Suplr_Mdcr_Pymt_Amt', 'Suplr_Mdcr_Stdzd_Pymt_Amt', 'DME_Sprsn_Ind', 'DME_Tot_Suplr_HCPCS_Cds', 'DME_Tot_Suplr_Benes', 'DME_Tot_Suplr_Clms', 'DME_Tot_Suplr_Srvcs', 'DME_Suplr_Sbmtd_Chrgs', 'DME_Suplr_Mdcr_Alowd_Amt', 'DME_Suplr_Mdcr_Pymt_Amt', 'DME_Suplr_Mdcr_Stdzd_Pymt_Amt', 'POS_Sprsn_Ind', 'POS_Tot_Suplr_HCPCS_Cds', 'POS_Tot_Suplr_Benes', 'POS_Tot_Suplr_Clms', 'POS_Tot_Suplr_Srvcs', 'POS_Suplr_Sbmtd_Chrgs', 'POS_Suplr_Mdcr_Alowd_Amt', 'POS_Suplr_Mdcr_Pymt_Amt', 'POS_Suplr_Mdcr_Stdzd_Pymt_Amt', 'Drug_Sprsn_Ind', 'Drug_Tot_Suplr_HCPCS_Cds', 'Drug_Tot_Suplr_Benes', 'Drug_Tot_Suplr_Clms', 'Drug_Tot_Suplr_Srvcs', 'Drug_Suplr_Sbmtd_Chrgs', 'Drug_Suplr_Mdcr_Alowd_Amt', 'Drug_Suplr_Mdcr_Pymt_Amt', 'Drug_Suplr_Mdcr_Stdzd_Pymt_Amt', 'Bene_Avg_Age', 'Bene_Age_LT_65_Cnt', 'Bene_Age_65_74_Cnt', 'Bene_Age_75_84_Cnt', 'Bene_Age_GT_84_Cnt', 'Bene_Feml_Cnt', 'Bene_Male_Cnt', 'Bene_Race_Wht_Cnt', 'Bene_Race_Black_Cnt', 'Bene_Race_Api_Cnt', 'Bene_Race_Hspnc_Cnt', 'Bene_Race_Natind_Cnt', 'Bene_Race_Othr_Cnt', 'Bene_Ndual_Cnt', 'Bene_Dual_Cnt', 'Bene_CC_BH_ADHD_OthCD_V1_Pct', 'Bene_CC_BH_Alcohol_Drug_V1_Pct', 'Bene_CC_BH_Tobacco_V1_Pct', 'Bene_CC_BH_Alz_NonAlzdem_V2_Pct', 'Bene_CC_BH_Anxiety_V1_Pct', 'Bene_CC_BH_Bipolar_V1_Pct', 'Bene_CC_BH_Mood_V2_Pct', 'Bene_CC_BH_Depress_V1_Pct', 'Bene_CC_BH_PD_V1_Pct', 'Bene_CC_BH_PTSD_V1_Pct', 'Bene_CC_BH_Schizo_OthPsy_V1_Pct', 'Bene_CC_PH_Asthma_V2_Pct', 'Bene_CC_PH_Afib_V2_Pct', 'Bene_CC_PH_Cancer6_V2_Pct', 'Bene_CC_PH_CKD_V2_Pct', 'Bene_CC_PH_COPD_V2_Pct', 'Bene_CC_PH_Diabetes_V2_Pct', 'Bene_CC_PH_HF_NonIHD_V2_Pct', 'Bene_CC_PH_Hyperlipidemia_V2_Pct', 'Bene_CC_PH_Hypertension_V2_Pct', 'Bene_CC_PH_IschemicHeart_V2_Pct', 'Bene_CC_PH_Osteoporosis_V2_Pct', 'Bene_CC_PH_Parkinson_V2_Pct', 'Bene_CC_PH_Arthritis_V2_Pct', 'Bene_CC_PH_Stroke_TIA_V2_Pct', 'Bene_Avg_Risk_Scre', 'year']\n",
+      "\n",
+      "Number of unique suppliers: 86467\n",
+      "\n",
+      "Summary of numeric columns:\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Suplr_NPI</th>\n",
+       "      <th>Suplr_Prvdr_Zip5</th>\n",
+       "      <th>Suplr_Prvdr_RUCA</th>\n",
+       "      <th>Tot_Suplr_HCPCS_Cds</th>\n",
+       "      <th>Tot_Suplr_Benes</th>\n",
+       "      <th>Tot_Suplr_Clms</th>\n",
+       "      <th>Tot_Suplr_Srvcs</th>\n",
+       "      <th>Suplr_Sbmtd_Chrgs</th>\n",
+       "      <th>Suplr_Mdcr_Alowd_Amt</th>\n",
+       "      <th>Suplr_Mdcr_Pymt_Amt</th>\n",
+       "      <th>Suplr_Mdcr_Stdzd_Pymt_Amt</th>\n",
+       "      <th>DME_Tot_Suplr_HCPCS_Cds</th>\n",
+       "      <th>DME_Tot_Suplr_Benes</th>\n",
+       "      <th>DME_Tot_Suplr_Clms</th>\n",
+       "      <th>DME_Tot_Suplr_Srvcs</th>\n",
+       "      <th>DME_Suplr_Sbmtd_Chrgs</th>\n",
+       "      <th>DME_Suplr_Mdcr_Alowd_Amt</th>\n",
+       "      <th>DME_Suplr_Mdcr_Pymt_Amt</th>\n",
+       "      <th>DME_Suplr_Mdcr_Stdzd_Pymt_Amt</th>\n",
+       "      <th>POS_Tot_Suplr_HCPCS_Cds</th>\n",
+       "      <th>POS_Tot_Suplr_Benes</th>\n",
+       "      <th>POS_Tot_Suplr_Clms</th>\n",
+       "      <th>POS_Tot_Suplr_Srvcs</th>\n",
+       "      <th>POS_Suplr_Sbmtd_Chrgs</th>\n",
+       "      <th>POS_Suplr_Mdcr_Alowd_Amt</th>\n",
+       "      <th>POS_Suplr_Mdcr_Pymt_Amt</th>\n",
+       "      <th>POS_Suplr_Mdcr_Stdzd_Pymt_Amt</th>\n",
+       "      <th>Drug_Tot_Suplr_HCPCS_Cds</th>\n",
+       "      <th>Drug_Tot_Suplr_Benes</th>\n",
+       "      <th>Drug_Tot_Suplr_Clms</th>\n",
+       "      <th>Drug_Tot_Suplr_Srvcs</th>\n",
+       "      <th>Drug_Suplr_Sbmtd_Chrgs</th>\n",
+       "      <th>Drug_Suplr_Mdcr_Alowd_Amt</th>\n",
+       "      <th>Drug_Suplr_Mdcr_Pymt_Amt</th>\n",
+       "      <th>Drug_Suplr_Mdcr_Stdzd_Pymt_Amt</th>\n",
+       "      <th>Bene_Avg_Age</th>\n",
+       "      <th>Bene_Age_LT_65_Cnt</th>\n",
+       "      <th>Bene_Age_65_74_Cnt</th>\n",
+       "      <th>Bene_Age_75_84_Cnt</th>\n",
+       "      <th>Bene_Age_GT_84_Cnt</th>\n",
+       "      <th>Bene_Feml_Cnt</th>\n",
+       "      <th>Bene_Male_Cnt</th>\n",
+       "      <th>Bene_Race_Wht_Cnt</th>\n",
+       "      <th>Bene_Race_Black_Cnt</th>\n",
+       "      <th>Bene_Race_Api_Cnt</th>\n",
+       "      <th>Bene_Race_Hspnc_Cnt</th>\n",
+       "      <th>Bene_Race_Natind_Cnt</th>\n",
+       "      <th>Bene_Race_Othr_Cnt</th>\n",
+       "      <th>Bene_Ndual_Cnt</th>\n",
+       "      <th>Bene_Dual_Cnt</th>\n",
+       "      <th>Bene_CC_BH_ADHD_OthCD_V1_Pct</th>\n",
+       "      <th>Bene_CC_BH_Alcohol_Drug_V1_Pct</th>\n",
+       "      <th>Bene_CC_BH_Tobacco_V1_Pct</th>\n",
+       "      <th>Bene_CC_BH_Alz_NonAlzdem_V2_Pct</th>\n",
+       "      <th>Bene_CC_BH_Anxiety_V1_Pct</th>\n",
+       "      <th>Bene_CC_BH_Bipolar_V1_Pct</th>\n",
+       "      <th>Bene_CC_BH_Mood_V2_Pct</th>\n",
+       "      <th>Bene_CC_BH_Depress_V1_Pct</th>\n",
+       "      <th>Bene_CC_BH_PD_V1_Pct</th>\n",
+       "      <th>Bene_CC_BH_PTSD_V1_Pct</th>\n",
+       "      <th>Bene_CC_BH_Schizo_OthPsy_V1_Pct</th>\n",
+       "      <th>Bene_CC_PH_Asthma_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Afib_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Cancer6_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_CKD_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_COPD_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Diabetes_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_HF_NonIHD_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Hyperlipidemia_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Hypertension_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_IschemicHeart_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Osteoporosis_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Parkinson_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Arthritis_V2_Pct</th>\n",
+       "      <th>Bene_CC_PH_Stroke_TIA_V2_Pct</th>\n",
+       "      <th>Bene_Avg_Risk_Scre</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>3.526110e+05</td>\n",
+       "      <td>352611.000000</td>\n",
+       "      <td>352575.000000</td>\n",
+       "      <td>352611.000000</td>\n",
+       "      <td>331904.000000</td>\n",
+       "      <td>352611.000000</td>\n",
+       "      <td>3.526110e+05</td>\n",
+       "      <td>3.526110e+05</td>\n",
+       "      <td>3.526110e+05</td>\n",
+       "      <td>3.526110e+05</td>\n",
+       "      <td>3.526110e+05</td>\n",
+       "      <td>334378.000000</td>\n",
+       "      <td>312252.000000</td>\n",
+       "      <td>334378.000000</td>\n",
+       "      <td>3.343780e+05</td>\n",
+       "      <td>3.343780e+05</td>\n",
+       "      <td>3.343780e+05</td>\n",
+       "      <td>3.343780e+05</td>\n",
+       "      <td>3.343780e+05</td>\n",
+       "      <td>292989.000000</td>\n",
+       "      <td>271386.000000</td>\n",
+       "      <td>292989.000000</td>\n",
+       "      <td>2.929890e+05</td>\n",
+       "      <td>2.929890e+05</td>\n",
+       "      <td>2.929890e+05</td>\n",
+       "      <td>2.929890e+05</td>\n",
+       "      <td>2.929890e+05</td>\n",
+       "      <td>279663.000000</td>\n",
+       "      <td>195592.000000</td>\n",
+       "      <td>279663.000000</td>\n",
+       "      <td>2.796630e+05</td>\n",
+       "      <td>2.796630e+05</td>\n",
+       "      <td>2.796630e+05</td>\n",
+       "      <td>2.796630e+05</td>\n",
+       "      <td>2.796630e+05</td>\n",
+       "      <td>352549.000000</td>\n",
+       "      <td>112648.000000</td>\n",
+       "      <td>257838.000000</td>\n",
+       "      <td>210441.000000</td>\n",
+       "      <td>90651.000000</td>\n",
+       "      <td>253106.000000</td>\n",
+       "      <td>253106.000000</td>\n",
+       "      <td>297628.000000</td>\n",
+       "      <td>135898.000000</td>\n",
+       "      <td>152090.000000</td>\n",
+       "      <td>153492.000000</td>\n",
+       "      <td>281658.000000</td>\n",
+       "      <td>130667.000000</td>\n",
+       "      <td>179575.000000</td>\n",
+       "      <td>179575.000000</td>\n",
+       "      <td>210454.000000</td>\n",
+       "      <td>110545.000000</td>\n",
+       "      <td>128558.000000</td>\n",
+       "      <td>103844.000000</td>\n",
+       "      <td>184461.000000</td>\n",
+       "      <td>121693.000000</td>\n",
+       "      <td>194930.000000</td>\n",
+       "      <td>182561.000000</td>\n",
+       "      <td>170846.000000</td>\n",
+       "      <td>180278.000000</td>\n",
+       "      <td>151709.000000</td>\n",
+       "      <td>133002.000000</td>\n",
+       "      <td>157471.000000</td>\n",
+       "      <td>131889.000000</td>\n",
+       "      <td>231256.000000</td>\n",
+       "      <td>189497.000000</td>\n",
+       "      <td>289577.000000</td>\n",
+       "      <td>176141.000000</td>\n",
+       "      <td>311898.000000</td>\n",
+       "      <td>315415.000000</td>\n",
+       "      <td>227186.000000</td>\n",
+       "      <td>115383.000000</td>\n",
+       "      <td>163625.000000</td>\n",
+       "      <td>264873.000000</td>\n",
+       "      <td>102463.000000</td>\n",
+       "      <td>352548.000000</td>\n",
+       "      <td>352611.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>1.499823e+09</td>\n",
+       "      <td>47761.974436</td>\n",
+       "      <td>1.938816</td>\n",
+       "      <td>19.162681</td>\n",
+       "      <td>180.003712</td>\n",
+       "      <td>723.630647</td>\n",
+       "      <td>2.894472e+04</td>\n",
+       "      <td>4.327424e+05</td>\n",
+       "      <td>1.558504e+05</td>\n",
+       "      <td>1.198852e+05</td>\n",
+       "      <td>1.189166e+05</td>\n",
+       "      <td>8.810921</td>\n",
+       "      <td>145.596758</td>\n",
+       "      <td>642.879322</td>\n",
+       "      <td>3.905078e+03</td>\n",
+       "      <td>2.565282e+05</td>\n",
+       "      <td>8.591894e+04</td>\n",
+       "      <td>6.550221e+04</td>\n",
+       "      <td>6.456093e+04</td>\n",
+       "      <td>6.513569</td>\n",
+       "      <td>48.193319</td>\n",
+       "      <td>80.856425</td>\n",
+       "      <td>3.355593e+03</td>\n",
+       "      <td>6.501689e+04</td>\n",
+       "      <td>4.470813e+04</td>\n",
+       "      <td>3.454446e+04</td>\n",
+       "      <td>3.460400e+04</td>\n",
+       "      <td>2.651927</td>\n",
+       "      <td>15.184762</td>\n",
+       "      <td>53.643879</td>\n",
+       "      <td>1.554591e+04</td>\n",
+       "      <td>3.928560e+04</td>\n",
+       "      <td>1.697234e+04</td>\n",
+       "      <td>1.326474e+04</td>\n",
+       "      <td>1.314945e+04</td>\n",
+       "      <td>72.135987</td>\n",
+       "      <td>70.165835</td>\n",
+       "      <td>93.109103</td>\n",
+       "      <td>83.160492</td>\n",
+       "      <td>68.880751</td>\n",
+       "      <td>126.068106</td>\n",
+       "      <td>103.751768</td>\n",
+       "      <td>157.496126</td>\n",
+       "      <td>38.625859</td>\n",
+       "      <td>6.402137</td>\n",
+       "      <td>20.363218</td>\n",
+       "      <td>0.683773</td>\n",
+       "      <td>6.168627</td>\n",
+       "      <td>216.677667</td>\n",
+       "      <td>81.802339</td>\n",
+       "      <td>0.001365</td>\n",
+       "      <td>0.056437</td>\n",
+       "      <td>0.137417</td>\n",
+       "      <td>0.075832</td>\n",
+       "      <td>0.270268</td>\n",
+       "      <td>0.023278</td>\n",
+       "      <td>0.295474</td>\n",
+       "      <td>0.268735</td>\n",
+       "      <td>0.003625</td>\n",
+       "      <td>0.003438</td>\n",
+       "      <td>0.011297</td>\n",
+       "      <td>0.154784</td>\n",
+       "      <td>0.205620</td>\n",
+       "      <td>0.170630</td>\n",
+       "      <td>0.364954</td>\n",
+       "      <td>0.296001</td>\n",
+       "      <td>0.738303</td>\n",
+       "      <td>0.245400</td>\n",
+       "      <td>0.803431</td>\n",
+       "      <td>0.854808</td>\n",
+       "      <td>0.356955</td>\n",
+       "      <td>0.132541</td>\n",
+       "      <td>0.004815</td>\n",
+       "      <td>0.491985</td>\n",
+       "      <td>0.085324</td>\n",
+       "      <td>1.759540</td>\n",
+       "      <td>2019.933791</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>2.877778e+08</td>\n",
+       "      <td>28443.077792</td>\n",
+       "      <td>2.593615</td>\n",
+       "      <td>25.023950</td>\n",
+       "      <td>1318.464715</td>\n",
+       "      <td>5759.184130</td>\n",
+       "      <td>1.137952e+06</td>\n",
+       "      <td>6.107114e+06</td>\n",
+       "      <td>2.104732e+06</td>\n",
+       "      <td>1.642990e+06</td>\n",
+       "      <td>1.640688e+06</td>\n",
+       "      <td>17.959107</td>\n",
+       "      <td>1210.207291</td>\n",
+       "      <td>5472.581913</td>\n",
+       "      <td>1.183744e+05</td>\n",
+       "      <td>4.536824e+06</td>\n",
+       "      <td>1.222667e+06</td>\n",
+       "      <td>9.536508e+05</td>\n",
+       "      <td>9.520616e+05</td>\n",
+       "      <td>18.036465</td>\n",
+       "      <td>582.654243</td>\n",
+       "      <td>1658.046524</td>\n",
+       "      <td>1.774209e+05</td>\n",
+       "      <td>1.399145e+06</td>\n",
+       "      <td>7.558673e+05</td>\n",
+       "      <td>5.801982e+05</td>\n",
+       "      <td>5.969880e+05</td>\n",
+       "      <td>2.654579</td>\n",
+       "      <td>508.361346</td>\n",
+       "      <td>2599.424059</td>\n",
+       "      <td>1.233497e+06</td>\n",
+       "      <td>2.072189e+06</td>\n",
+       "      <td>9.877956e+05</td>\n",
+       "      <td>7.734391e+05</td>\n",
+       "      <td>7.665664e+05</td>\n",
+       "      <td>4.203287</td>\n",
+       "      <td>362.180891</td>\n",
+       "      <td>576.926665</td>\n",
+       "      <td>551.356612</td>\n",
+       "      <td>375.786927</td>\n",
+       "      <td>859.590068</td>\n",
+       "      <td>663.652059</td>\n",
+       "      <td>1086.588625</td>\n",
+       "      <td>283.259321</td>\n",
+       "      <td>53.643838</td>\n",
+       "      <td>181.647129</td>\n",
+       "      <td>9.812065</td>\n",
+       "      <td>45.984446</td>\n",
+       "      <td>1322.832122</td>\n",
+       "      <td>533.498517</td>\n",
+       "      <td>0.008951</td>\n",
+       "      <td>0.076374</td>\n",
+       "      <td>0.100486</td>\n",
+       "      <td>0.102199</td>\n",
+       "      <td>0.102902</td>\n",
+       "      <td>0.044075</td>\n",
+       "      <td>0.108544</td>\n",
+       "      <td>0.102275</td>\n",
+       "      <td>0.012957</td>\n",
+       "      <td>0.013120</td>\n",
+       "      <td>0.044755</td>\n",
+       "      <td>0.087742</td>\n",
+       "      <td>0.084632</td>\n",
+       "      <td>0.135977</td>\n",
+       "      <td>0.153952</td>\n",
+       "      <td>0.140641</td>\n",
+       "      <td>0.251118</td>\n",
+       "      <td>0.108972</td>\n",
+       "      <td>0.145535</td>\n",
+       "      <td>0.146975</td>\n",
+       "      <td>0.112958</td>\n",
+       "      <td>0.083479</td>\n",
+       "      <td>0.017198</td>\n",
+       "      <td>0.137264</td>\n",
+       "      <td>0.070467</td>\n",
+       "      <td>0.655410</td>\n",
+       "      <td>1.417299</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>1.003000e+09</td>\n",
+       "      <td>601.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>11.000000</td>\n",
+       "      <td>11.000000</td>\n",
+       "      <td>1.100000e+01</td>\n",
+       "      <td>1.960000e+01</td>\n",
+       "      <td>1.662000e+01</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.176000</td>\n",
+       "      <td>2018.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>1.255346e+09</td>\n",
+       "      <td>25504.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>8.000000</td>\n",
+       "      <td>28.000000</td>\n",
+       "      <td>60.000000</td>\n",
+       "      <td>4.770000e+02</td>\n",
+       "      <td>1.507708e+04</td>\n",
+       "      <td>4.234210e+03</td>\n",
+       "      <td>3.093515e+03</td>\n",
+       "      <td>3.221385e+03</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>15.000000</td>\n",
+       "      <td>26.000000</td>\n",
+       "      <td>4.900000e+01</td>\n",
+       "      <td>2.726097e+03</td>\n",
+       "      <td>6.501400e+02</td>\n",
+       "      <td>4.462525e+02</td>\n",
+       "      <td>4.783675e+02</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>70.280000</td>\n",
+       "      <td>13.000000</td>\n",
+       "      <td>18.000000</td>\n",
+       "      <td>17.000000</td>\n",
+       "      <td>11.000000</td>\n",
+       "      <td>24.000000</td>\n",
+       "      <td>19.000000</td>\n",
+       "      <td>23.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>29.000000</td>\n",
+       "      <td>15.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.074721</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.205950</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.225806</td>\n",
+       "      <td>0.205128</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.110497</td>\n",
+       "      <td>0.156250</td>\n",
+       "      <td>0.118056</td>\n",
+       "      <td>0.277108</td>\n",
+       "      <td>0.203704</td>\n",
+       "      <td>0.537500</td>\n",
+       "      <td>0.180812</td>\n",
+       "      <td>0.714286</td>\n",
+       "      <td>0.769231</td>\n",
+       "      <td>0.282609</td>\n",
+       "      <td>0.091703</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.400000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>1.363045</td>\n",
+       "      <td>2019.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>1.497926e+09</td>\n",
+       "      <td>44125.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>12.000000</td>\n",
+       "      <td>53.000000</td>\n",
+       "      <td>146.000000</td>\n",
+       "      <td>3.592000e+03</td>\n",
+       "      <td>3.964752e+04</td>\n",
+       "      <td>1.171618e+04</td>\n",
+       "      <td>8.781310e+03</td>\n",
+       "      <td>8.958790e+03</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>39.000000</td>\n",
+       "      <td>112.000000</td>\n",
+       "      <td>2.430000e+02</td>\n",
+       "      <td>1.437400e+04</td>\n",
+       "      <td>2.624245e+03</td>\n",
+       "      <td>1.843840e+03</td>\n",
+       "      <td>1.960370e+03</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>15.000000</td>\n",
+       "      <td>1.954000e+03</td>\n",
+       "      <td>3.273830e+03</td>\n",
+       "      <td>3.129500e+02</td>\n",
+       "      <td>2.231000e+02</td>\n",
+       "      <td>2.212200e+02</td>\n",
+       "      <td>72.557692</td>\n",
+       "      <td>24.000000</td>\n",
+       "      <td>30.000000</td>\n",
+       "      <td>26.000000</td>\n",
+       "      <td>19.000000</td>\n",
+       "      <td>39.000000</td>\n",
+       "      <td>31.000000</td>\n",
+       "      <td>45.000000</td>\n",
+       "      <td>11.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>59.000000</td>\n",
+       "      <td>25.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.040710</td>\n",
+       "      <td>0.135802</td>\n",
+       "      <td>0.057075</td>\n",
+       "      <td>0.259615</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.282353</td>\n",
+       "      <td>0.258065</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.147059</td>\n",
+       "      <td>0.201835</td>\n",
+       "      <td>0.151899</td>\n",
+       "      <td>0.351351</td>\n",
+       "      <td>0.277108</td>\n",
+       "      <td>0.811594</td>\n",
+       "      <td>0.236842</td>\n",
+       "      <td>0.806452</td>\n",
+       "      <td>0.857143</td>\n",
+       "      <td>0.349057</td>\n",
+       "      <td>0.130435</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.474729</td>\n",
+       "      <td>0.086397</td>\n",
+       "      <td>1.665859</td>\n",
+       "      <td>2020.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>1.740658e+09</td>\n",
+       "      <td>74104.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>18.000000</td>\n",
+       "      <td>105.000000</td>\n",
+       "      <td>315.000000</td>\n",
+       "      <td>1.050350e+04</td>\n",
+       "      <td>9.733840e+04</td>\n",
+       "      <td>3.444066e+04</td>\n",
+       "      <td>2.628954e+04</td>\n",
+       "      <td>2.645689e+04</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>81.000000</td>\n",
+       "      <td>256.000000</td>\n",
+       "      <td>5.780000e+02</td>\n",
+       "      <td>3.535016e+04</td>\n",
+       "      <td>6.448807e+03</td>\n",
+       "      <td>4.567375e+03</td>\n",
+       "      <td>4.816860e+03</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>19.000000</td>\n",
+       "      <td>23.000000</td>\n",
+       "      <td>5.800000e+01</td>\n",
+       "      <td>8.737850e+03</td>\n",
+       "      <td>6.100790e+03</td>\n",
+       "      <td>4.672270e+03</td>\n",
+       "      <td>4.877140e+03</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>16.000000</td>\n",
+       "      <td>42.000000</td>\n",
+       "      <td>6.352000e+03</td>\n",
+       "      <td>1.723049e+04</td>\n",
+       "      <td>5.056030e+03</td>\n",
+       "      <td>3.889745e+03</td>\n",
+       "      <td>3.861795e+03</td>\n",
+       "      <td>74.583333</td>\n",
+       "      <td>54.000000</td>\n",
+       "      <td>55.000000</td>\n",
+       "      <td>49.000000</td>\n",
+       "      <td>48.000000</td>\n",
+       "      <td>74.000000</td>\n",
+       "      <td>59.000000</td>\n",
+       "      <td>93.000000</td>\n",
+       "      <td>25.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>15.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>125.000000</td>\n",
+       "      <td>54.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.094395</td>\n",
+       "      <td>0.193682</td>\n",
+       "      <td>0.109195</td>\n",
+       "      <td>0.322835</td>\n",
+       "      <td>0.042476</td>\n",
+       "      <td>0.350000</td>\n",
+       "      <td>0.320755</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.193548</td>\n",
+       "      <td>0.254902</td>\n",
+       "      <td>0.190440</td>\n",
+       "      <td>0.432432</td>\n",
+       "      <td>0.366667</td>\n",
+       "      <td>0.920000</td>\n",
+       "      <td>0.302857</td>\n",
+       "      <td>0.894118</td>\n",
+       "      <td>0.937500</td>\n",
+       "      <td>0.421053</td>\n",
+       "      <td>0.173554</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.564815</td>\n",
+       "      <td>0.123711</td>\n",
+       "      <td>2.018267</td>\n",
+       "      <td>2021.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>1.993000e+09</td>\n",
+       "      <td>99901.000000</td>\n",
+       "      <td>99.000000</td>\n",
+       "      <td>538.000000</td>\n",
+       "      <td>237906.000000</td>\n",
+       "      <td>685662.000000</td>\n",
+       "      <td>3.290728e+08</td>\n",
+       "      <td>1.836805e+09</td>\n",
+       "      <td>3.991887e+08</td>\n",
+       "      <td>3.116929e+08</td>\n",
+       "      <td>3.117852e+08</td>\n",
+       "      <td>313.000000</td>\n",
+       "      <td>237906.000000</td>\n",
+       "      <td>685662.000000</td>\n",
+       "      <td>2.316198e+07</td>\n",
+       "      <td>1.830416e+09</td>\n",
+       "      <td>2.082855e+08</td>\n",
+       "      <td>1.622225e+08</td>\n",
+       "      <td>1.622216e+08</td>\n",
+       "      <td>277.000000</td>\n",
+       "      <td>94760.000000</td>\n",
+       "      <td>255217.000000</td>\n",
+       "      <td>3.156666e+07</td>\n",
+       "      <td>2.291182e+08</td>\n",
+       "      <td>1.343600e+08</td>\n",
+       "      <td>1.055647e+08</td>\n",
+       "      <td>1.126160e+08</td>\n",
+       "      <td>14.000000</td>\n",
+       "      <td>98084.000000</td>\n",
+       "      <td>619016.000000</td>\n",
+       "      <td>3.286327e+08</td>\n",
+       "      <td>4.578468e+08</td>\n",
+       "      <td>1.979528e+08</td>\n",
+       "      <td>1.550935e+08</td>\n",
+       "      <td>1.530220e+08</td>\n",
+       "      <td>108.000000</td>\n",
+       "      <td>46481.000000</td>\n",
+       "      <td>82335.000000</td>\n",
+       "      <td>81159.000000</td>\n",
+       "      <td>27931.000000</td>\n",
+       "      <td>142147.000000</td>\n",
+       "      <td>95759.000000</td>\n",
+       "      <td>170138.000000</td>\n",
+       "      <td>44508.000000</td>\n",
+       "      <td>4326.000000</td>\n",
+       "      <td>28959.000000</td>\n",
+       "      <td>740.000000</td>\n",
+       "      <td>2859.000000</td>\n",
+       "      <td>158443.000000</td>\n",
+       "      <td>79463.000000</td>\n",
+       "      <td>0.565217</td>\n",
+       "      <td>1.636364</td>\n",
+       "      <td>1.636364</td>\n",
+       "      <td>1.068421</td>\n",
+       "      <td>1.727273</td>\n",
+       "      <td>1.545455</td>\n",
+       "      <td>1.727273</td>\n",
+       "      <td>1.636364</td>\n",
+       "      <td>0.833333</td>\n",
+       "      <td>0.444444</td>\n",
+       "      <td>1.636364</td>\n",
+       "      <td>1.454545</td>\n",
+       "      <td>2.461538</td>\n",
+       "      <td>2.380952</td>\n",
+       "      <td>3.601562</td>\n",
+       "      <td>1.636364</td>\n",
+       "      <td>2.589844</td>\n",
+       "      <td>3.153846</td>\n",
+       "      <td>3.261719</td>\n",
+       "      <td>3.753906</td>\n",
+       "      <td>2.367188</td>\n",
+       "      <td>1.272727</td>\n",
+       "      <td>1.144068</td>\n",
+       "      <td>1.750000</td>\n",
+       "      <td>0.846154</td>\n",
+       "      <td>16.340466</td>\n",
+       "      <td>2022.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          Suplr_NPI  Suplr_Prvdr_Zip5  Suplr_Prvdr_RUCA  Tot_Suplr_HCPCS_Cds  Tot_Suplr_Benes  Tot_Suplr_Clms  Tot_Suplr_Srvcs  Suplr_Sbmtd_Chrgs  Suplr_Mdcr_Alowd_Amt  Suplr_Mdcr_Pymt_Amt  Suplr_Mdcr_Stdzd_Pymt_Amt  DME_Tot_Suplr_HCPCS_Cds  DME_Tot_Suplr_Benes  DME_Tot_Suplr_Clms  DME_Tot_Suplr_Srvcs  DME_Suplr_Sbmtd_Chrgs  DME_Suplr_Mdcr_Alowd_Amt  DME_Suplr_Mdcr_Pymt_Amt  DME_Suplr_Mdcr_Stdzd_Pymt_Amt  POS_Tot_Suplr_HCPCS_Cds  POS_Tot_Suplr_Benes  POS_Tot_Suplr_Clms  POS_Tot_Suplr_Srvcs  POS_Suplr_Sbmtd_Chrgs  POS_Suplr_Mdcr_Alowd_Amt  POS_Suplr_Mdcr_Pymt_Amt  POS_Suplr_Mdcr_Stdzd_Pymt_Amt  Drug_Tot_Suplr_HCPCS_Cds  Drug_Tot_Suplr_Benes  Drug_Tot_Suplr_Clms  Drug_Tot_Suplr_Srvcs  Drug_Suplr_Sbmtd_Chrgs  Drug_Suplr_Mdcr_Alowd_Amt  Drug_Suplr_Mdcr_Pymt_Amt  Drug_Suplr_Mdcr_Stdzd_Pymt_Amt   Bene_Avg_Age  Bene_Age_LT_65_Cnt  Bene_Age_65_74_Cnt  Bene_Age_75_84_Cnt  Bene_Age_GT_84_Cnt  Bene_Feml_Cnt  Bene_Male_Cnt  Bene_Race_Wht_Cnt  Bene_Race_Black_Cnt  Bene_Race_Api_Cnt  Bene_Race_Hspnc_Cnt  Bene_Race_Natind_Cnt  Bene_Race_Othr_Cnt  Bene_Ndual_Cnt  Bene_Dual_Cnt  Bene_CC_BH_ADHD_OthCD_V1_Pct  Bene_CC_BH_Alcohol_Drug_V1_Pct  Bene_CC_BH_Tobacco_V1_Pct  Bene_CC_BH_Alz_NonAlzdem_V2_Pct  Bene_CC_BH_Anxiety_V1_Pct  Bene_CC_BH_Bipolar_V1_Pct  Bene_CC_BH_Mood_V2_Pct  Bene_CC_BH_Depress_V1_Pct  Bene_CC_BH_PD_V1_Pct  Bene_CC_BH_PTSD_V1_Pct  Bene_CC_BH_Schizo_OthPsy_V1_Pct  Bene_CC_PH_Asthma_V2_Pct  Bene_CC_PH_Afib_V2_Pct  Bene_CC_PH_Cancer6_V2_Pct  Bene_CC_PH_CKD_V2_Pct  Bene_CC_PH_COPD_V2_Pct  Bene_CC_PH_Diabetes_V2_Pct  Bene_CC_PH_HF_NonIHD_V2_Pct  Bene_CC_PH_Hyperlipidemia_V2_Pct  Bene_CC_PH_Hypertension_V2_Pct  Bene_CC_PH_IschemicHeart_V2_Pct  Bene_CC_PH_Osteoporosis_V2_Pct  Bene_CC_PH_Parkinson_V2_Pct  Bene_CC_PH_Arthritis_V2_Pct  Bene_CC_PH_Stroke_TIA_V2_Pct  Bene_Avg_Risk_Scre           year\n",
+       "count  3.526110e+05     352611.000000     352575.000000        352611.000000    331904.000000   352611.000000     3.526110e+05       3.526110e+05          3.526110e+05         3.526110e+05               3.526110e+05            334378.000000        312252.000000       334378.000000         3.343780e+05           3.343780e+05              3.343780e+05             3.343780e+05                   3.343780e+05            292989.000000        271386.000000       292989.000000         2.929890e+05           2.929890e+05              2.929890e+05             2.929890e+05                   2.929890e+05             279663.000000         195592.000000        279663.000000          2.796630e+05            2.796630e+05               2.796630e+05              2.796630e+05                    2.796630e+05  352549.000000       112648.000000       257838.000000       210441.000000        90651.000000  253106.000000  253106.000000      297628.000000        135898.000000      152090.000000        153492.000000         281658.000000       130667.000000   179575.000000  179575.000000                 210454.000000                   110545.000000              128558.000000                    103844.000000              184461.000000              121693.000000           194930.000000              182561.000000         170846.000000           180278.000000                    151709.000000             133002.000000           157471.000000              131889.000000          231256.000000           189497.000000               289577.000000                176141.000000                     311898.000000                   315415.000000                    227186.000000                   115383.000000                163625.000000                264873.000000                 102463.000000       352548.000000  352611.000000\n",
+       "mean   1.499823e+09      47761.974436          1.938816            19.162681       180.003712      723.630647     2.894472e+04       4.327424e+05          1.558504e+05         1.198852e+05               1.189166e+05                 8.810921           145.596758          642.879322         3.905078e+03           2.565282e+05              8.591894e+04             6.550221e+04                   6.456093e+04                 6.513569            48.193319           80.856425         3.355593e+03           6.501689e+04              4.470813e+04             3.454446e+04                   3.460400e+04                  2.651927             15.184762            53.643879          1.554591e+04            3.928560e+04               1.697234e+04              1.326474e+04                    1.314945e+04      72.135987           70.165835           93.109103           83.160492           68.880751     126.068106     103.751768         157.496126            38.625859           6.402137            20.363218              0.683773            6.168627      216.677667      81.802339                      0.001365                        0.056437                   0.137417                         0.075832                   0.270268                   0.023278                0.295474                   0.268735              0.003625                0.003438                         0.011297                  0.154784                0.205620                   0.170630               0.364954                0.296001                    0.738303                     0.245400                          0.803431                        0.854808                         0.356955                        0.132541                     0.004815                     0.491985                      0.085324            1.759540    2019.933791\n",
+       "std    2.877778e+08      28443.077792          2.593615            25.023950      1318.464715     5759.184130     1.137952e+06       6.107114e+06          2.104732e+06         1.642990e+06               1.640688e+06                17.959107          1210.207291         5472.581913         1.183744e+05           4.536824e+06              1.222667e+06             9.536508e+05                   9.520616e+05                18.036465           582.654243         1658.046524         1.774209e+05           1.399145e+06              7.558673e+05             5.801982e+05                   5.969880e+05                  2.654579            508.361346          2599.424059          1.233497e+06            2.072189e+06               9.877956e+05              7.734391e+05                    7.665664e+05       4.203287          362.180891          576.926665          551.356612          375.786927     859.590068     663.652059        1086.588625           283.259321          53.643838           181.647129              9.812065           45.984446     1322.832122     533.498517                      0.008951                        0.076374                   0.100486                         0.102199                   0.102902                   0.044075                0.108544                   0.102275              0.012957                0.013120                         0.044755                  0.087742                0.084632                   0.135977               0.153952                0.140641                    0.251118                     0.108972                          0.145535                        0.146975                         0.112958                        0.083479                     0.017198                     0.137264                      0.070467            0.655410       1.417299\n",
+       "min    1.003000e+09        601.000000          1.000000             1.000000        11.000000       11.000000     1.100000e+01       1.960000e+01          1.662000e+01         0.000000e+00               0.000000e+00                 0.000000             0.000000            0.000000         0.000000e+00           0.000000e+00              0.000000e+00             0.000000e+00                   0.000000e+00                 0.000000             0.000000            0.000000         0.000000e+00           0.000000e+00              0.000000e+00             0.000000e+00                   0.000000e+00                  0.000000              0.000000             0.000000          0.000000e+00            0.000000e+00               0.000000e+00              0.000000e+00                    0.000000e+00       1.000000            0.000000            0.000000            0.000000            0.000000       0.000000       0.000000           0.000000             0.000000           0.000000             0.000000              0.000000            0.000000        0.000000       0.000000                      0.000000                        0.000000                   0.000000                         0.000000                   0.000000                   0.000000                0.000000                   0.000000              0.000000                0.000000                         0.000000                  0.000000                0.000000                   0.000000               0.000000                0.000000                    0.000000                     0.000000                          0.000000                        0.000000                         0.000000                        0.000000                     0.000000                     0.000000                      0.000000            0.176000    2018.000000\n",
+       "25%    1.255346e+09      25504.000000          1.000000             8.000000        28.000000       60.000000     4.770000e+02       1.507708e+04          4.234210e+03         3.093515e+03               3.221385e+03                 3.000000            15.000000           26.000000         4.900000e+01           2.726097e+03              6.501400e+02             4.462525e+02                   4.783675e+02                 0.000000             0.000000            0.000000         0.000000e+00           0.000000e+00              0.000000e+00             0.000000e+00                   0.000000e+00                  0.000000              0.000000             0.000000          0.000000e+00            0.000000e+00               0.000000e+00              0.000000e+00                    0.000000e+00      70.280000           13.000000           18.000000           17.000000           11.000000      24.000000      19.000000          23.000000             0.000000           0.000000             0.000000              0.000000            0.000000       29.000000      15.000000                      0.000000                        0.000000                   0.074721                         0.000000                   0.205950                   0.000000                0.225806                   0.205128              0.000000                0.000000                         0.000000                  0.110497                0.156250                   0.118056               0.277108                0.203704                    0.537500                     0.180812                          0.714286                        0.769231                         0.282609                        0.091703                     0.000000                     0.400000                      0.000000            1.363045    2019.000000\n",
+       "50%    1.497926e+09      44125.000000          1.000000            12.000000        53.000000      146.000000     3.592000e+03       3.964752e+04          1.171618e+04         8.781310e+03               8.958790e+03                 5.000000            39.000000          112.000000         2.430000e+02           1.437400e+04              2.624245e+03             1.843840e+03                   1.960370e+03                 0.000000             0.000000            0.000000         0.000000e+00           0.000000e+00              0.000000e+00             0.000000e+00                   0.000000e+00                  3.000000              0.000000            15.000000          1.954000e+03            3.273830e+03               3.129500e+02              2.231000e+02                    2.212200e+02      72.557692           24.000000           30.000000           26.000000           19.000000      39.000000      31.000000          45.000000            11.000000           0.000000             0.000000              0.000000            0.000000       59.000000      25.000000                      0.000000                        0.040710                   0.135802                         0.057075                   0.259615                   0.000000                0.282353                   0.258065              0.000000                0.000000                         0.000000                  0.147059                0.201835                   0.151899               0.351351                0.277108                    0.811594                     0.236842                          0.806452                        0.857143                         0.349057                        0.130435                     0.000000                     0.474729                      0.086397            1.665859    2020.000000\n",
+       "75%    1.740658e+09      74104.000000          1.000000            18.000000       105.000000      315.000000     1.050350e+04       9.733840e+04          3.444066e+04         2.628954e+04               2.645689e+04                 6.000000            81.000000          256.000000         5.780000e+02           3.535016e+04              6.448807e+03             4.567375e+03                   4.816860e+03                 4.000000            19.000000           23.000000         5.800000e+01           8.737850e+03              6.100790e+03             4.672270e+03                   4.877140e+03                  5.000000             16.000000            42.000000          6.352000e+03            1.723049e+04               5.056030e+03              3.889745e+03                    3.861795e+03      74.583333           54.000000           55.000000           49.000000           48.000000      74.000000      59.000000          93.000000            25.000000           0.000000            15.000000              0.000000            0.000000      125.000000      54.000000                      0.000000                        0.094395                   0.193682                         0.109195                   0.322835                   0.042476                0.350000                   0.320755              0.000000                0.000000                         0.000000                  0.193548                0.254902                   0.190440               0.432432                0.366667                    0.920000                     0.302857                          0.894118                        0.937500                         0.421053                        0.173554                     0.000000                     0.564815                      0.123711            2.018267    2021.000000\n",
+       "max    1.993000e+09      99901.000000         99.000000           538.000000    237906.000000   685662.000000     3.290728e+08       1.836805e+09          3.991887e+08         3.116929e+08               3.117852e+08               313.000000        237906.000000       685662.000000         2.316198e+07           1.830416e+09              2.082855e+08             1.622225e+08                   1.622216e+08               277.000000         94760.000000       255217.000000         3.156666e+07           2.291182e+08              1.343600e+08             1.055647e+08                   1.126160e+08                 14.000000          98084.000000        619016.000000          3.286327e+08            4.578468e+08               1.979528e+08              1.550935e+08                    1.530220e+08     108.000000        46481.000000        82335.000000        81159.000000        27931.000000  142147.000000   95759.000000      170138.000000         44508.000000        4326.000000         28959.000000            740.000000         2859.000000   158443.000000   79463.000000                      0.565217                        1.636364                   1.636364                         1.068421                   1.727273                   1.545455                1.727273                   1.636364              0.833333                0.444444                         1.636364                  1.454545                2.461538                   2.380952               3.601562                1.636364                    2.589844                     3.153846                          3.261719                        3.753906                         2.367188                        1.272727                     1.144068                     1.750000                      0.846154           16.340466    2022.000000"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "if not combined_df.empty:\n",
+    "    print(\"\\nFirst few rows:\")\n",
+    "    display(combined_df.head())\n",
+    "\n",
+    "    print(\"\\nColumn Names:\")\n",
+    "    print(combined_df.columns.tolist())\n",
+    "\n",
+    "    print(f\"\\nNumber of unique suppliers: {combined_df['Suplr_NPI'].nunique()}\")\n",
+    "    print(\"\\nSummary of numeric columns:\")\n",
+    "    display(combined_df.describe(include=[np.number]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f0106077",
+   "metadata": {},
+   "source": [
+    "## 3. Mapping Columns to Data Dictionary\n",
+    "We've got a `DATA_DICTIONARY` that provides definitions for each column. Let's map them to the DataFrame's columns."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "744715a8",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Data Dictionary Mapping:\n",
+      "\n",
+      "- Suplr_NPI: Supplier NPI - NPI for the Supplier on the DMEPOS claim\n",
+      "- Suplr_Prvdr_Last_Name_Org: Supplier Last Name/Organization Name - When registered as individual, the Supplier's last name. When registered as organization, this is the organization name\n",
+      "- Suplr_Prvdr_First_Name: Supplier First Name - When registered as individual, the Supplier's first name\n",
+      "- Suplr_Prvdr_MI: Supplier Middle Initial - When registered as individual, the Supplier's middle initial\n",
+      "- Suplr_Prvdr_Crdntls: Supplier Credentials - When registered as individual, these are the Supplier's credentials\n",
+      "- Suplr_Prvdr_Gndr: Supplier Gender - When registered as individual, this is the Supplier's gender\n",
+      "- Suplr_Prvdr_Ent_Cd: Supplier Entity Code - 'I' identifies Suppliers registered as individuals, 'O' identifies Suppliers registered as organizations\n",
+      "- Suplr_Prvdr_St1: Supplier Street 1 - First line of the Supplier's street address\n",
+      "- Suplr_Prvdr_St2: Supplier Street 2 - Second line of the Supplier's street address\n",
+      "- Suplr_Prvdr_City: Supplier City - The city where the Supplier is located\n",
+      "- Suplr_Prvdr_State_Abrvtn: Supplier State - State postal abbreviation where the Supplier is located\n",
+      "- Suplr_Prvdr_State_FIPS: Supplier State FIPS Code - FIPS code for Supplier's state\n",
+      "- Suplr_Prvdr_Zip5: Supplier ZIP - The Supplier's ZIP code\n",
+      "- Suplr_Prvdr_RUCA: Supplier RUCA - Rural-Urban Commuting Area Code for the Supplier ZIP code\n",
+      "- Suplr_Prvdr_RUCA_Desc: Supplier RUCA Description - Description of Rural-Urban Commuting Area (RUCA) Code\n",
+      "- Suplr_Prvdr_Cntry: Supplier Country - Country where the Supplier is located\n",
+      "- Suplr_Prvdr_Spclty_Desc: Supplier Provider Specialty Description - Derived from Medicare provider/supplier specialty code\n",
+      "- Suplr_Prvdr_Spclty_Srce: Supplier Provider Specialty Source - Source of the Supplier Specialty (claims-specialty or NPPES-specialty)\n",
+      "- Tot_Suplr_HCPCS_Cds: Number of Supplier HCPCS - Total unique DMEPOS product/service HCPCS codes\n",
+      "- Tot_Suplr_Benes: Number of Supplier Beneficiaries - Total unique beneficiaries (<11 are suppressed)\n",
+      "- Tot_Suplr_Clms: Number of Supplier Claims - Total DMEPOS claims submitted\n",
+      "- Tot_Suplr_Srvcs: Number of Supplier Services - Total DMEPOS products/services rendered\n",
+      "- Suplr_Sbmtd_Chrgs: Supplier Submitted Charges - Total charges submitted for DMEPOS products/services\n",
+      "- Suplr_Mdcr_Alowd_Amt: Supplier Medicare Allowed Amount - Total Medicare allowed amount\n",
+      "- Suplr_Mdcr_Pymt_Amt: Supplier Medicare Payment Amount - Amount Medicare paid after deductible/coinsurance\n",
+      "- Suplr_Mdcr_Stdzd_Pymt_Amt: Supplier Medicare Standard Payment Amount - Standardized Medicare payments\n",
+      "- DME_Sprsn_Ind: Durable Medical Equipment Suppression Indicator - '*'=suppressed (1-10 claims), '#'=counter-suppressed\n",
+      "- DME_Tot_Suplr_HCPCS_Cds: Number of DME HCPCS - Total unique DME HCPCS codes\n",
+      "- DME_Tot_Suplr_Benes: Number of DME Beneficiaries - Total unique beneficiaries with DME claims (<11 are suppressed)\n",
+      "- DME_Tot_Suplr_Clms: Number of DME Claims - Total DME claims submitted\n",
+      "- DME_Tot_Suplr_Srvcs: Number of DME Services - Total DME products/services rendered\n",
+      "- DME_Suplr_Sbmtd_Chrgs: DME Submitted Charges - Total charges submitted for DME products/services\n",
+      "- DME_Suplr_Mdcr_Alowd_Amt: DME Medicare Allowed Amount - Total Medicare allowed amount for DME\n",
+      "- DME_Suplr_Mdcr_Pymt_Amt: DME Medicare Payment Amount - Amount Medicare paid for DME after deductible/coinsurance\n",
+      "- DME_Suplr_Mdcr_Stdzd_Pymt_Amt: DME Medicare Standard Payment Amount - Standardized Medicare payments for DME\n",
+      "- POS_Sprsn_Ind: Prosthetic and Orthotic Suppression Indicator - '*'=suppressed (1-10 claims), '#'=counter-suppressed\n",
+      "- POS_Tot_Suplr_HCPCS_Cds: Number of Prosthetic/Orthotic HCPCS - Total unique prosthetic/orthotic HCPCS codes\n",
+      "- POS_Tot_Suplr_Benes: Number of Prosthetic/Orthotic Beneficiaries - Total unique beneficiaries\n",
+      "- POS_Tot_Suplr_Clms: Number of Prosthetic/Orthotic Claims - Total prosthetic/orthotic claims submitted\n",
+      "- POS_Tot_Suplr_Srvcs: Number of Prosthetic/Orthotic Services - Total prosthetic/orthotic products/services\n",
+      "- POS_Suplr_Sbmtd_Chrgs: Prosthetic/Orthotic Submitted Charges - Total charges submitted for prosthetic/orthotic\n",
+      "- POS_Suplr_Mdcr_Alowd_Amt: Prosthetic/Orthotic Medicare Allowed Amount - Total Medicare allowed amount\n",
+      "- POS_Suplr_Mdcr_Pymt_Amt: Prosthetic/Orthotic Medicare Payment Amount - Amount Medicare paid after deductible/coinsurance\n",
+      "- POS_Suplr_Mdcr_Stdzd_Pymt_Amt: Prosthetic/Orthotic Medicare Standard Payment Amount - Standardized Medicare payments\n",
+      "- Drug_Sprsn_Ind: Drug and Nutritional Suppression Indicator - '*'=suppressed (1-10 claims), '#'=counter-suppressed\n",
+      "- Drug_Tot_Suplr_HCPCS_Cds: Number of Drug/Nutritional HCPCS - Total unique drug/nutritional HCPCS codes\n",
+      "- Drug_Tot_Suplr_Benes: Number of Drug/Nutritional Beneficiaries - Total unique beneficiaries\n",
+      "- Drug_Tot_Suplr_Clms: Number of Drug/Nutritional Claims - Total drug/nutritional claims submitted\n",
+      "- Drug_Tot_Suplr_Srvcs: Number of Drug/Nutritional Services - Total drug/nutritional products/services\n",
+      "- Drug_Suplr_Sbmtd_Chrgs: Drug/Nutritional Submitted Charges - Total charges submitted for drug/nutritional\n",
+      "- Drug_Suplr_Mdcr_Alowd_Amt: Drug/Nutritional Medicare Allowed Amount - Total Medicare allowed amount\n",
+      "- Drug_Suplr_Mdcr_Pymt_Amt: Drug/Nutritional Medicare Payment Amount - Amount Medicare paid after deductible/coinsurance\n",
+      "- Drug_Suplr_Mdcr_Stdzd_Pymt_Amt: Drug/Nutritional Medicare Standard Payment Amount - Standardized Medicare payments\n",
+      "- Bene_Avg_Age: Average Age of Beneficiaries - Average age at end of calendar year or time of death\n",
+      "- Bene_Age_LT_65_Cnt: Number of Beneficiaries <65 - Count of beneficiaries under 65 years old\n",
+      "- Bene_Age_65_74_Cnt: Number of Beneficiaries 65-74 - Count of beneficiaries between 65-74 years old\n",
+      "- Bene_Age_75_84_Cnt: Number of Beneficiaries 75-84 - Count of beneficiaries between 75-84 years old\n",
+      "- Bene_Age_GT_84_Cnt: Number of Beneficiaries >84 - Count of beneficiaries over 84 years old\n",
+      "- Bene_Feml_Cnt: Number of Female Beneficiaries - Count of female beneficiaries\n",
+      "- Bene_Male_Cnt: Number of Male Beneficiaries - Count of male beneficiaries\n",
+      "- Bene_Race_Wht_Cnt: Number of White Beneficiaries - Count of non-Hispanic white beneficiaries\n",
+      "- Bene_Race_Black_Cnt: Number of Black Beneficiaries - Count of non-Hispanic Black/African American beneficiaries\n",
+      "- Bene_Race_Api_Cnt: Number of Asian/PI Beneficiaries - Count of Asian Pacific Islander beneficiaries\n",
+      "- Bene_Race_Hspnc_Cnt: Number of Hispanic Beneficiaries - Count of Hispanic beneficiaries\n",
+      "- Bene_Race_Natind_Cnt: Number of Native American/Alaska Native Beneficiaries - Count of American Indian/Alaska Native beneficiaries\n",
+      "- Bene_Race_Othr_Cnt: Number of Other Race Beneficiaries - Count of beneficiaries with race not elsewhere classified\n",
+      "- Bene_Ndual_Cnt: Number of Medicare & Medicaid Beneficiaries - Count of dual-eligible beneficiaries\n",
+      "- Bene_Dual_Cnt: Number of Medicare-Only Beneficiaries - Count of Medicare-only beneficiaries\n",
+      "- Bene_CC_BH_ADHD_OthCD_V1_Pct: Percent with ADHD and Other Conduct Disorders\n",
+      "- Bene_CC_BH_Alcohol_Drug_V1_Pct: Percent with Alcohol and Drug Use Disorders\n",
+      "- Bene_CC_BH_Tobacco_V1_Pct: Percent with Tobacco Use Disorders\n",
+      "- Bene_CC_BH_Alz_NonAlzdem_V2_Pct: Percent with Alzheimer's and Non-Alzheimer's Dementia\n",
+      "- Bene_CC_BH_Anxiety_V1_Pct: Percent with Anxiety Disorders\n",
+      "- Bene_CC_BH_Bipolar_V1_Pct: Percent with Bipolar Disorder\n",
+      "- Bene_CC_BH_Mood_V2_Pct: Percent with Depression, Bipolar or Other Mood Disorders\n",
+      "- Bene_CC_BH_Depress_V1_Pct: Percent with Major Depressive Affective Disorder\n",
+      "- Bene_CC_BH_PD_V1_Pct: Percent with Personality Disorders\n",
+      "- Bene_CC_BH_PTSD_V1_Pct: Percent with Post-Traumatic Stress Disorder\n",
+      "- Bene_CC_BH_Schizo_OthPsy_V1_Pct: Percent with Schizophrenia and Other Psychotic Disorders\n",
+      "- Bene_CC_PH_Asthma_V2_Pct: Percent with Asthma\n",
+      "- Bene_CC_PH_Afib_V2_Pct: Percent with Atrial Fibrillation and Flutter\n",
+      "- Bene_CC_PH_Cancer6_V2_Pct: Percent with Cancer (combined 6 cancer indicators)\n",
+      "- Bene_CC_PH_CKD_V2_Pct: Percent with Chronic Kidney Disease\n",
+      "- Bene_CC_PH_COPD_V2_Pct: Percent with Chronic Obstructive Pulmonary Disease\n",
+      "- Bene_CC_PH_Diabetes_V2_Pct: Percent with Diabetes\n",
+      "- Bene_CC_PH_HF_NonIHD_V2_Pct: Percent with Heart Failure and Non-Ischemic Heart Disease\n",
+      "- Bene_CC_PH_Hyperlipidemia_V2_Pct: Percent with Hyperlipidemia\n",
+      "- Bene_CC_PH_Hypertension_V2_Pct: Percent with Hypertension\n",
+      "- Bene_CC_PH_IschemicHeart_V2_Pct: Percent with Ischemic Heart Disease\n",
+      "- Bene_CC_PH_Osteoporosis_V2_Pct: Percent with Osteoporosis\n",
+      "- Bene_CC_PH_Parkinson_V2_Pct: Percent with Parkinson's Disease\n",
+      "- Bene_CC_PH_Arthritis_V2_Pct: Percent with Rheumatoid Arthritis/Osteoarthritis\n",
+      "- Bene_CC_PH_Stroke_TIA_V2_Pct: Percent with Stroke/Transient Ischemic Attack\n",
+      "- Bene_Avg_Risk_Scre: Average HCC Risk Score of Beneficiaries\n",
+      "- year: Year of the data\n"
+     ]
+    }
+   ],
+   "source": [
+    "if not combined_df.empty:\n",
+    "    column_info = {}\n",
+    "    for column in combined_df.columns:\n",
+    "        if column in DATA_DICTIONARY:\n",
+    "            column_info[column] = DATA_DICTIONARY[column]\n",
+    "        else:\n",
+    "            column_info[column] = \"Description not available\"\n",
+    "    \n",
+    "    # Optionally store in DataFrame attributes (just for reference, not required)\n",
+    "    combined_df.attrs['column_descriptions'] = column_info\n",
+    "\n",
+    "    # Display an overview\n",
+    "    print(\"Data Dictionary Mapping:\\n\")\n",
+    "    for col in combined_df.columns:\n",
+    "        desc = column_info[col]\n",
+    "        print(f\"- {col}: {desc}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a009a7cc",
+   "metadata": {},
+   "source": [
+    "## 4. Helper: Format Dollar Amounts\n",
+    "A small function to display large numbers with K/M suffixes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "0462c05c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def format_dollar_amount(amount):\n",
+    "    \"\"\"Return a string formatted with $ and K/M if needed.\"\"\"\n",
+    "    if amount >= 1_000_000:\n",
+    "        return f\"${amount/1_000_000:.1f}M\"\n",
+    "    elif amount >= 1_000:\n",
+    "        return f\"${amount/1_000:.1f}K\"\n",
+    "    else:\n",
+    "        return f\"${amount:,.0f}\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "29348be9",
+   "metadata": {},
+   "source": [
+    "# 5. Year-over-Year Growth Analysis\n",
+    "We'll look at *Medicare Payment Amount* by Supplier (NPI) across years, and compute YOY growth.\n",
+    "- Filter for suppliers that appear in all relevant years (2018–2022).\n",
+    "- Only consider suppliers with a meaningful (>= 100k) total in 2022 to focus on large-volume providers.\n",
+    "- Identify top 10 by average growth rate."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "0cdba951",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Top 10 Suppliers by Average Year-over-Year Growth (2018–2022), \n",
+      "\n",
+      " Filtered to those with >= $100K in 2022 payments:\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Suplr_NPI</th>\n",
+       "      <th>Suplr_Prvdr_Last_Name_Org</th>\n",
+       "      <th>growth_2019</th>\n",
+       "      <th>growth_2020</th>\n",
+       "      <th>growth_2021</th>\n",
+       "      <th>growth_2022</th>\n",
+       "      <th>avg_growth</th>\n",
+       "      <th>Suplr_Sbmtd_Chrgs</th>\n",
+       "      <th>Suplr_Mdcr_Pymt_Amt</th>\n",
+       "      <th>Tot_Suplr_Benes</th>\n",
+       "      <th>Tot_Suplr_Clms</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>500</th>\n",
+       "      <td>1063967768</td>\n",
+       "      <td>P-Cares Medical Supplies, Llc</td>\n",
+       "      <td>216.762990</td>\n",
+       "      <td>-18.279120</td>\n",
+       "      <td>-1.438596</td>\n",
+       "      <td>59704.308031</td>\n",
+       "      <td>14975.338326</td>\n",
+       "      <td>2.996986e+07</td>\n",
+       "      <td>15894839.27</td>\n",
+       "      <td>3029.60</td>\n",
+       "      <td>21002</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5958</th>\n",
+       "      <td>1891275590</td>\n",
+       "      <td>Lincare Inc</td>\n",
+       "      <td>43427.677155</td>\n",
+       "      <td>40.042523</td>\n",
+       "      <td>9.862999</td>\n",
+       "      <td>1.397273</td>\n",
+       "      <td>10869.744987</td>\n",
+       "      <td>1.351100e+08</td>\n",
+       "      <td>19498239.26</td>\n",
+       "      <td>8006.60</td>\n",
+       "      <td>250598</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3078</th>\n",
+       "      <td>1457837080</td>\n",
+       "      <td>Respiratory Services Of Western New York, Inc.</td>\n",
+       "      <td>24521.584413</td>\n",
+       "      <td>489.458451</td>\n",
+       "      <td>59.869097</td>\n",
+       "      <td>46.621208</td>\n",
+       "      <td>6279.383292</td>\n",
+       "      <td>1.742520e+06</td>\n",
+       "      <td>636034.24</td>\n",
+       "      <td>442.00</td>\n",
+       "      <td>8743</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5538</th>\n",
+       "      <td>1821424789</td>\n",
+       "      <td>Vohra Post Acute Care Physicians Of Texas, Pllc</td>\n",
+       "      <td>24557.093269</td>\n",
+       "      <td>105.801915</td>\n",
+       "      <td>24.589830</td>\n",
+       "      <td>1.304009</td>\n",
+       "      <td>6172.197256</td>\n",
+       "      <td>1.954372e+07</td>\n",
+       "      <td>7851147.07</td>\n",
+       "      <td>1268.25</td>\n",
+       "      <td>21386</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3389</th>\n",
+       "      <td>1508938127</td>\n",
+       "      <td>Aahi St Joseph Mercy Hospital Inc</td>\n",
+       "      <td>-73.766346</td>\n",
+       "      <td>-96.932529</td>\n",
+       "      <td>23208.805119</td>\n",
+       "      <td>139.646393</td>\n",
+       "      <td>5794.438159</td>\n",
+       "      <td>1.052284e+06</td>\n",
+       "      <td>477838.88</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>421</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1197</th>\n",
+       "      <td>1174553804</td>\n",
+       "      <td>Care One Medical Equipment And Supplies, Inc.</td>\n",
+       "      <td>19205.127235</td>\n",
+       "      <td>50.732207</td>\n",
+       "      <td>-6.816210</td>\n",
+       "      <td>34.941358</td>\n",
+       "      <td>4820.996148</td>\n",
+       "      <td>3.241600e+06</td>\n",
+       "      <td>1038009.02</td>\n",
+       "      <td>529.25</td>\n",
+       "      <td>11495</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3365</th>\n",
+       "      <td>1508826199</td>\n",
+       "      <td>The Home Health Store Of Tomball, Inc.</td>\n",
+       "      <td>70.638944</td>\n",
+       "      <td>23.053685</td>\n",
+       "      <td>53.075010</td>\n",
+       "      <td>17908.424045</td>\n",
+       "      <td>4513.797921</td>\n",
+       "      <td>2.914811e+07</td>\n",
+       "      <td>15253804.77</td>\n",
+       "      <td>5578.40</td>\n",
+       "      <td>56914</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5022</th>\n",
+       "      <td>1750391751</td>\n",
+       "      <td>Amerihealth Medical Group, Inc.</td>\n",
+       "      <td>18039.357850</td>\n",
+       "      <td>-46.804741</td>\n",
+       "      <td>6.478163</td>\n",
+       "      <td>-16.942326</td>\n",
+       "      <td>4495.522236</td>\n",
+       "      <td>2.947875e+06</td>\n",
+       "      <td>1106496.20</td>\n",
+       "      <td>873.60</td>\n",
+       "      <td>19592</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3923</th>\n",
+       "      <td>1598044208</td>\n",
+       "      <td>Scooter Chair Repair Georgia, Llc</td>\n",
+       "      <td>16495.684706</td>\n",
+       "      <td>46.102762</td>\n",
+       "      <td>-17.790577</td>\n",
+       "      <td>-18.348840</td>\n",
+       "      <td>4126.412013</td>\n",
+       "      <td>1.078492e+07</td>\n",
+       "      <td>4825597.72</td>\n",
+       "      <td>219.60</td>\n",
+       "      <td>3544</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2865</th>\n",
+       "      <td>1437108214</td>\n",
+       "      <td>Christian Home Health Services, Inc</td>\n",
+       "      <td>16471.811385</td>\n",
+       "      <td>-19.951771</td>\n",
+       "      <td>10.856242</td>\n",
+       "      <td>4.742256</td>\n",
+       "      <td>4116.864528</td>\n",
+       "      <td>1.948006e+06</td>\n",
+       "      <td>573413.07</td>\n",
+       "      <td>391.75</td>\n",
+       "      <td>7344</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       Suplr_NPI                        Suplr_Prvdr_Last_Name_Org   growth_2019  growth_2020   growth_2021   growth_2022    avg_growth  Suplr_Sbmtd_Chrgs  Suplr_Mdcr_Pymt_Amt  Tot_Suplr_Benes  Tot_Suplr_Clms\n",
+       "500   1063967768                    P-Cares Medical Supplies, Llc    216.762990   -18.279120     -1.438596  59704.308031  14975.338326       2.996986e+07          15894839.27          3029.60           21002\n",
+       "5958  1891275590                                      Lincare Inc  43427.677155    40.042523      9.862999      1.397273  10869.744987       1.351100e+08          19498239.26          8006.60          250598\n",
+       "3078  1457837080   Respiratory Services Of Western New York, Inc.  24521.584413   489.458451     59.869097     46.621208   6279.383292       1.742520e+06            636034.24           442.00            8743\n",
+       "5538  1821424789  Vohra Post Acute Care Physicians Of Texas, Pllc  24557.093269   105.801915     24.589830      1.304009   6172.197256       1.954372e+07           7851147.07          1268.25           21386\n",
+       "3389  1508938127                Aahi St Joseph Mercy Hospital Inc    -73.766346   -96.932529  23208.805119    139.646393   5794.438159       1.052284e+06            477838.88              NaN             421\n",
+       "1197  1174553804    Care One Medical Equipment And Supplies, Inc.  19205.127235    50.732207     -6.816210     34.941358   4820.996148       3.241600e+06           1038009.02           529.25           11495\n",
+       "3365  1508826199           The Home Health Store Of Tomball, Inc.     70.638944    23.053685     53.075010  17908.424045   4513.797921       2.914811e+07          15253804.77          5578.40           56914\n",
+       "5022  1750391751                  Amerihealth Medical Group, Inc.  18039.357850   -46.804741      6.478163    -16.942326   4495.522236       2.947875e+06           1106496.20           873.60           19592\n",
+       "3923  1598044208                Scooter Chair Repair Georgia, Llc  16495.684706    46.102762    -17.790577    -18.348840   4126.412013       1.078492e+07           4825597.72           219.60            3544\n",
+       "2865  1437108214              Christian Home Health Services, Inc  16471.811385   -19.951771     10.856242      4.742256   4116.864528       1.948006e+06            573413.07           391.75            7344"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "if not combined_df.empty:\n",
+    "    # 5.1 Group by (Supplier, year), then sum relevant metrics\n",
+    "    supplier_yearly = combined_df.groupby([\n",
+    "        'Suplr_NPI',\n",
+    "        'Suplr_Prvdr_Last_Name_Org',\n",
+    "        'year'\n",
+    "    ], as_index=False).agg({\n",
+    "        'Suplr_Sbmtd_Chrgs': 'sum',\n",
+    "        'Suplr_Mdcr_Pymt_Amt': 'sum',\n",
+    "        'Tot_Suplr_Benes': 'mean',  # average across rows\n",
+    "        'Tot_Suplr_Clms': 'sum'\n",
+    "    })\n",
+    "\n",
+    "    # Create a pivot where columns are years, values are 'Suplr_Mdcr_Pymt_Amt'\n",
+    "    pivot_charges = supplier_yearly.pivot_table(\n",
+    "        index=['Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org'],\n",
+    "        columns='year',\n",
+    "        values='Suplr_Mdcr_Pymt_Amt',\n",
+    "        fill_value=0\n",
+    "    )\n",
+    "\n",
+    "    # We'll calculate YOY growth for (2019 vs 2018), (2020 vs 2019), etc.\n",
+    "    growth_rates = pd.DataFrame(index=pivot_charges.index)\n",
+    "    for year_pair in [(2019, 2018), (2020, 2019), (2021, 2020), (2022, 2021)]:\n",
+    "        current, previous = year_pair\n",
+    "        growth_column = f'growth_{current}'\n",
+    "        growth_rates[growth_column] = (\n",
+    "            (pivot_charges[current] - pivot_charges[previous]) /\n",
+    "            pivot_charges[previous].replace(0, np.nan)\n",
+    "        ) * 100\n",
+    "\n",
+    "    growth_cols = [col for col in growth_rates.columns if col.startswith('growth_')]\n",
+    "    growth_rates['avg_growth'] = growth_rates[growth_cols].mean(axis=1)\n",
+    "\n",
+    "    # Filter: Supplier must have >0 in all years, and >=100k in 2022\n",
+    "    filter_mask = (\n",
+    "        (pivot_charges[2018] > 0) &\n",
+    "        (pivot_charges[2019] > 0) &\n",
+    "        (pivot_charges[2020] > 0) &\n",
+    "        (pivot_charges[2021] > 0) &\n",
+    "        (pivot_charges[2022] >= 100000)\n",
+    "    )\n",
+    "\n",
+    "    valid_suppliers = pivot_charges[filter_mask]\n",
+    "    valid_growth = growth_rates.loc[valid_suppliers.index].reset_index()\n",
+    "\n",
+    "    # Merge with aggregated totals (all years combined) just for more reporting info\n",
+    "    supplier_totals = supplier_yearly.groupby([\n",
+    "        'Suplr_NPI',\n",
+    "        'Suplr_Prvdr_Last_Name_Org'\n",
+    "    ], as_index=False).agg({\n",
+    "        'Suplr_Sbmtd_Chrgs': 'sum',\n",
+    "        'Suplr_Mdcr_Pymt_Amt': 'sum',\n",
+    "        'Tot_Suplr_Benes': 'mean',\n",
+    "        'Tot_Suplr_Clms': 'sum'\n",
+    "    })\n",
+    "\n",
+    "    growth_merged = pd.merge(\n",
+    "        valid_growth,\n",
+    "        supplier_totals,\n",
+    "        on=['Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org'],\n",
+    "        how='left'\n",
+    "    )\n",
+    "\n",
+    "    # Sort by average growth descending\n",
+    "    top_growth = growth_merged.sort_values('avg_growth', ascending=False).head(10)\n",
+    "    \n",
+    "    print(\"\\nTop 10 Suppliers by Average Year-over-Year Growth (2018–2022), \\n\\n\",\n",
+    "          \"Filtered to those with >= $100K in 2022 payments:\")\n",
+    "    display(top_growth)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1af5e0cf",
    "metadata": {},
+   "source": [
+    "### Display Year-by-Year Payment Patterns for Top 10\n",
+    "We'll show each supplier's biggest jump and beneficiary growth, if available."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "25fc60d9",
+   "metadata": {
+    "tags": []
+   },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Hello World\n"
+      "\n",
+      "Detailed Patterns for Top 10 Growth Suppliers:\n",
+      "\n",
+      "1. P-Cares Medical Supplies, Llc (NPI: 1063967768)\n",
+      "   - Average Growth: 14975.34%\n",
+      "   - Total Medicare Payments (2018–2022): $15.9M\n",
+      "   - Year-by-year Payments: 2018: $10.4K, 2019: $32.8K, 2020: $26.8K, 2021: $26.4K, 2022: $15.8M\n",
+      "   - Largest Jump: 2021.0 to 2022.0 (+59704.31%)\n",
+      "   - Beneficiary Growth: 52782.1% \n",
+      "\n",
+      "2. Lincare Inc (NPI: 1891275590)\n",
+      "   - Average Growth: 10869.74%\n",
+      "   - Total Medicare Payments (2018–2022): $19.5M\n",
+      "   - Year-by-year Payments: 2018: $8.1K, 2019: $3.5M, 2020: $5.0M, 2021: $5.5M, 2022: $5.5M\n",
+      "   - Largest Jump: 2018.0 to 2019.0 (+43427.68%)\n",
+      "   - Beneficiary Growth: 10389.0% \n",
+      "\n",
+      "3. Respiratory Services Of Western New York, Inc. (NPI: 1457837080)\n",
+      "   - Average Growth: 6279.38%\n",
+      "   - Total Medicare Payments (2018–2022): $636.0K\n",
+      "   - Year-by-year Payments: 2018: $86, 2019: $21.1K, 2020: $124.4K, 2021: $198.9K, 2022: $291.6K\n",
+      "   - Largest Jump: 2018.0 to 2019.0 (+24521.58%)\n",
+      "   - Beneficiary Growth: 480.9% \n",
+      "\n",
+      "4. Vohra Post Acute Care Physicians Of Texas, Pllc (NPI: 1821424789)\n",
+      "   - Average Growth: 6172.20%\n",
+      "   - Total Medicare Payments (2018–2022): $7.9M\n",
+      "   - Year-by-year Payments: 2018: $3.9K, 2019: $954.7K, 2020: $2.0M, 2021: $2.4M, 2022: $2.5M\n",
+      "   - Largest Jump: 2018.0 to 2019.0 (+24557.09%)\n",
+      "   - Beneficiary Growth: 134.7% \n",
+      "\n",
+      "5. Aahi St Joseph Mercy Hospital Inc (NPI: 1508938127)\n",
+      "   - Average Growth: 5794.44%\n",
+      "   - Total Medicare Payments (2018–2022): $477.8K\n",
+      "   - Year-by-year Payments: 2018: $62.5K, 2019: $16.4K, 2020: $503, 2021: $117.3K, 2022: $281.1K\n",
+      "   - Largest Jump: 2020.0 to 2021.0 (+23208.81%)\n",
+      "\n",
+      "6. Care One Medical Equipment And Supplies, Inc. (NPI: 1174553804)\n",
+      "   - Average Growth: 4821.00%\n",
+      "   - Total Medicare Payments (2018–2022): $1.0M\n",
+      "   - Year-by-year Payments: 2018: $925, 2019: $178.6K, 2020: $269.2K, 2021: $250.8K, 2022: $338.5K\n",
+      "   - Largest Jump: 2018.0 to 2019.0 (+19205.13%)\n",
+      "   - Beneficiary Growth: 70.1% \n",
+      "\n",
+      "7. The Home Health Store Of Tomball, Inc. (NPI: 1508826199)\n",
+      "   - Average Growth: 4513.80%\n",
+      "   - Total Medicare Payments (2018–2022): $15.3M\n",
+      "   - Year-by-year Payments: 2018: $26.0K, 2019: $44.4K, 2020: $54.6K, 2021: $83.5K, 2022: $15.0M\n",
+      "   - Largest Jump: 2021.0 to 2022.0 (+17908.42%)\n",
+      "   - Beneficiary Growth: 25079.6% \n",
+      "\n",
+      "8. Amerihealth Medical Group, Inc. (NPI: 1750391751)\n",
+      "   - Average Growth: 4495.52%\n",
+      "   - Total Medicare Payments (2018–2022): $1.1M\n",
+      "   - Year-by-year Payments: 2018: $2.4K, 2019: $429.8K, 2020: $228.6K, 2021: $243.5K, 2022: $202.2K\n",
+      "   - Largest Jump: 2018.0 to 2019.0 (+18039.36%)\n",
+      "   - Beneficiary Growth: 2878.8% \n",
+      "\n",
+      "9. Scooter Chair Repair Georgia, Llc (NPI: 1598044208)\n",
+      "   - Average Growth: 4126.41%\n",
+      "   - Total Medicare Payments (2018–2022): $4.8M\n",
+      "   - Year-by-year Payments: 2018: $6.3K, 2019: $1.0M, 2020: $1.5M, 2021: $1.2M, 2022: $1.0M\n",
+      "   - Largest Jump: 2018.0 to 2019.0 (+16495.68%)\n",
+      "   - Beneficiary Growth: 131.0% \n",
+      "\n",
+      "10. Christian Home Health Services, Inc (NPI: 1437108214)\n",
+      "   - Average Growth: 4116.86%\n",
+      "   - Total Medicare Payments (2018–2022): $573.4K\n",
+      "   - Year-by-year Payments: 2018: $955, 2019: $158.3K, 2020: $126.7K, 2021: $140.4K, 2022: $147.1K\n",
+      "   - Largest Jump: 2018.0 to 2019.0 (+16471.81%)\n",
+      "   - Beneficiary Growth: -27.0% \n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/g6/2s70_fq11hn4czzmpgd40ky80000gn/T/ipykernel_34747/4120139511.py:19: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  data.sort_values('year', inplace=True)\n",
+      "/var/folders/g6/2s70_fq11hn4czzmpgd40ky80000gn/T/ipykernel_34747/4120139511.py:19: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  data.sort_values('year', inplace=True)\n",
+      "/var/folders/g6/2s70_fq11hn4czzmpgd40ky80000gn/T/ipykernel_34747/4120139511.py:19: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  data.sort_values('year', inplace=True)\n",
+      "/var/folders/g6/2s70_fq11hn4czzmpgd40ky80000gn/T/ipykernel_34747/4120139511.py:19: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  data.sort_values('year', inplace=True)\n",
+      "/var/folders/g6/2s70_fq11hn4czzmpgd40ky80000gn/T/ipykernel_34747/4120139511.py:19: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  data.sort_values('year', inplace=True)\n",
+      "/var/folders/g6/2s70_fq11hn4czzmpgd40ky80000gn/T/ipykernel_34747/4120139511.py:19: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  data.sort_values('year', inplace=True)\n",
+      "/var/folders/g6/2s70_fq11hn4czzmpgd40ky80000gn/T/ipykernel_34747/4120139511.py:19: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  data.sort_values('year', inplace=True)\n",
+      "/var/folders/g6/2s70_fq11hn4czzmpgd40ky80000gn/T/ipykernel_34747/4120139511.py:19: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  data.sort_values('year', inplace=True)\n",
+      "/var/folders/g6/2s70_fq11hn4czzmpgd40ky80000gn/T/ipykernel_34747/4120139511.py:19: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  data.sort_values('year', inplace=True)\n",
+      "/var/folders/g6/2s70_fq11hn4czzmpgd40ky80000gn/T/ipykernel_34747/4120139511.py:19: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  data.sort_values('year', inplace=True)\n"
      ]
     }
    ],
    "source": [
-    "print(\"Hello World\")"
+    "if not combined_df.empty:\n",
+    "    # Create a function to display details for the top-10\n",
+    "    def show_top_10_growth_details(top_df, supplier_yearly_df):\n",
+    "        print(\"\\nDetailed Patterns for Top 10 Growth Suppliers:\\n\")\n",
+    "        top_npi = top_df['Suplr_NPI'].tolist()\n",
+    "\n",
+    "        # Filter original groupby results for just these suppliers\n",
+    "        subset = supplier_yearly_df[supplier_yearly_df['Suplr_NPI'].isin(top_npi)].copy()\n",
+    "        subset.sort_values(['Suplr_NPI', 'year'], inplace=True)\n",
+    "\n",
+    "        for i, row in enumerate(top_df.itertuples(), start=1):\n",
+    "            npi = row.Suplr_NPI\n",
+    "            name = row.Suplr_Prvdr_Last_Name_Org\n",
+    "            avg_growth = row.avg_growth\n",
+    "            total_pay = row.Suplr_Mdcr_Pymt_Amt\n",
+    "\n",
+    "            # Grab the subset for this supplier\n",
+    "            data = subset[subset['Suplr_NPI'] == npi]\n",
+    "            data.sort_values('year', inplace=True)\n",
+    "\n",
+    "            print(f\"{i}. {name} (NPI: {npi})\")\n",
+    "            print(f\"   - Average Growth: {avg_growth:.2f}%\")\n",
+    "            print(f\"   - Total Medicare Payments (2018–2022): {format_dollar_amount(total_pay)}\")\n",
+    "\n",
+    "            # Show year-by-year\n",
+    "            year_strs = []\n",
+    "            for y in range(2018, 2023):\n",
+    "                row_y = data[data['year'] == y]\n",
+    "                if not row_y.empty:\n",
+    "                    pay = row_y.iloc[0]['Suplr_Mdcr_Pymt_Amt']\n",
+    "                    year_strs.append(f\"{y}: {format_dollar_amount(pay)}\")\n",
+    "                else:\n",
+    "                    year_strs.append(f\"{y}: $0\")\n",
+    "            print(\"   - Year-by-year Payments: \" + \", \".join(year_strs))\n",
+    "\n",
+    "            # Identify the largest yoy jump\n",
+    "            data_list = data[['year', 'Suplr_Mdcr_Pymt_Amt']].sort_values('year').values.tolist()\n",
+    "            max_jump = 0\n",
+    "            jump_year = None\n",
+    "            for idx in range(1, len(data_list)):\n",
+    "                prev_amt = data_list[idx-1][1]\n",
+    "                curr_amt = data_list[idx][1]\n",
+    "                if prev_amt > 0:\n",
+    "                    yoy_pct = (curr_amt - prev_amt) / prev_amt * 100\n",
+    "                    if yoy_pct > max_jump:\n",
+    "                        max_jump = yoy_pct\n",
+    "                        jump_year = (data_list[idx-1][0], data_list[idx][0])\n",
+    "\n",
+    "            if jump_year:\n",
+    "                print(f\"   - Largest Jump: {jump_year[0]} to {jump_year[1]} (+{max_jump:.2f}%)\")\n",
+    "\n",
+    "            # Check beneficiary growth\n",
+    "            benes = data[['year', 'Tot_Suplr_Benes']].dropna()\n",
+    "            if len(benes) > 1:\n",
+    "                benes.sort_values('year', inplace=True)\n",
+    "                first_benes = benes.iloc[0]['Tot_Suplr_Benes']\n",
+    "                last_benes = benes.iloc[-1]['Tot_Suplr_Benes']\n",
+    "                if first_benes > 0:\n",
+    "                    bene_growth = (last_benes - first_benes) / first_benes * 100\n",
+    "                    print(f\"   - Beneficiary Growth: {bene_growth:.1f}% \")\n",
+    "\n",
+    "            print(\"\")\n",
+    "\n",
+    "    show_top_10_growth_details(top_growth, supplier_yearly)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bee5376d",
+   "metadata": {},
+   "source": [
+    "# 6. Analysis of High Submitted vs. Low Allowed/Paid Amounts\n",
+    "We check each supplier's total submitted charges vs. the allowed and paid amounts across **all** years."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "2201637e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Top 10 Suppliers: Highest Submitted Charges vs. Allowed Amount Ratio\n",
+      "\n",
+      "- Flatbush Rx Corp (NPI: 1669839536)\n",
+      "  Submitted: $252.8K, Allowed: $1.1K, Paid: $616\n",
+      "  Submitted : Allowed = 221.97x\n",
+      "\n",
+      "- Arooba Corp (NPI: 1649225152)\n",
+      "  Submitted: $312.0K, Allowed: $1.7K, Paid: $1.1K\n",
+      "  Submitted : Allowed = 182.41x\n",
+      "\n",
+      "- Mingocare Inc (NPI: 1003228156)\n",
+      "  Submitted: $702.7K, Allowed: $4.0K, Paid: $2.4K\n",
+      "  Submitted : Allowed = 177.83x\n",
+      "\n",
+      "- Nile City Pharmacy Inc (NPI: 1578076212)\n",
+      "  Submitted: $106.4K, Allowed: $702, Paid: $524\n",
+      "  Submitted : Allowed = 151.50x\n",
+      "\n",
+      "- Farmacia Julia Discount #2 Llc (NPI: 1457430274)\n",
+      "  Submitted: $410.4K, Allowed: $3.4K, Paid: $2.1K\n",
+      "  Submitted : Allowed = 122.31x\n",
+      "\n",
+      "- Gamer Pharmacy Inc (NPI: 1588697692)\n",
+      "  Submitted: $9.3M, Allowed: $76.9K, Paid: $56.8K\n",
+      "  Submitted : Allowed = 120.95x\n",
+      "\n",
+      "- Madina Pharmacy Inc (NPI: 1538525316)\n",
+      "  Submitted: $427.3K, Allowed: $4.3K, Paid: $2.7K\n",
+      "  Submitted : Allowed = 99.99x\n",
+      "\n",
+      "- Colonial Pharmacy Inc (NPI: 1255438198)\n",
+      "  Submitted: $407.8K, Allowed: $4.1K, Paid: $2.5K\n",
+      "  Submitted : Allowed = 98.28x\n",
+      "\n",
+      "- Blue Ridge Pharmacy Inc (NPI: 1538564596)\n",
+      "  Submitted: $2.0M, Allowed: $21.0K, Paid: $15.6K\n",
+      "  Submitted : Allowed = 92.84x\n",
+      "\n",
+      "- Welch Pharmacy Inc (NPI: 1336326792)\n",
+      "  Submitted: $1.2M, Allowed: $13.4K, Paid: $8.3K\n",
+      "  Submitted : Allowed = 92.37x\n",
+      "\n",
+      "\n",
+      "Top 10 Suppliers: Highest Submitted Charges vs. Paid Amount Ratio\n",
+      "\n",
+      "- Flatbush Rx Corp (NPI: 1669839536)\n",
+      "  Submitted: $252.8K, Allowed: $1.1K, Paid: $616\n",
+      "  Submitted : Paid = 410.09x\n",
+      "\n",
+      "- Mingocare Inc (NPI: 1003228156)\n",
+      "  Submitted: $702.7K, Allowed: $4.0K, Paid: $2.4K\n",
+      "  Submitted : Paid = 292.70x\n",
+      "\n",
+      "- Arooba Corp (NPI: 1649225152)\n",
+      "  Submitted: $312.0K, Allowed: $1.7K, Paid: $1.1K\n",
+      "  Submitted : Paid = 285.22x\n",
+      "\n",
+      "- Nile City Pharmacy Inc (NPI: 1578076212)\n",
+      "  Submitted: $106.4K, Allowed: $702, Paid: $524\n",
+      "  Submitted : Paid = 202.93x\n",
+      "\n",
+      "- Farmacia Julia Discount #2 Llc (NPI: 1457430274)\n",
+      "  Submitted: $410.4K, Allowed: $3.4K, Paid: $2.1K\n",
+      "  Submitted : Paid = 196.85x\n",
+      "\n",
+      "- Colonial Pharmacy Inc (NPI: 1255438198)\n",
+      "  Submitted: $407.8K, Allowed: $4.1K, Paid: $2.5K\n",
+      "  Submitted : Paid = 165.16x\n",
+      "\n",
+      "- Gamer Pharmacy Inc (NPI: 1588697692)\n",
+      "  Submitted: $9.3M, Allowed: $76.9K, Paid: $56.8K\n",
+      "  Submitted : Paid = 163.71x\n",
+      "\n",
+      "- Madina Pharmacy Inc (NPI: 1538525316)\n",
+      "  Submitted: $427.3K, Allowed: $4.3K, Paid: $2.7K\n",
+      "  Submitted : Paid = 155.57x\n",
+      "\n",
+      "- Welch Pharmacy Inc (NPI: 1336326792)\n",
+      "  Submitted: $1.2M, Allowed: $13.4K, Paid: $8.3K\n",
+      "  Submitted : Paid = 148.70x\n",
+      "\n",
+      "- Crystal Drugs Inc (NPI: 1124049184)\n",
+      "  Submitted: $720.3K, Allowed: $8.2K, Paid: $5.2K\n",
+      "  Submitted : Paid = 137.98x\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "if not combined_df.empty:\n",
+    "    supplier_totals_ap = combined_df.groupby([\n",
+    "        'Suplr_NPI',\n",
+    "        'Suplr_Prvdr_Last_Name_Org'\n",
+    "    ], as_index=False).agg({\n",
+    "        'Suplr_Sbmtd_Chrgs': 'sum',\n",
+    "        'Suplr_Mdcr_Alowd_Amt': 'sum',\n",
+    "        'Suplr_Mdcr_Pymt_Amt': 'sum',\n",
+    "        'Tot_Suplr_Benes': 'mean',\n",
+    "        'Tot_Suplr_Clms': 'sum'\n",
+    "    })\n",
+    "\n",
+    "    supplier_totals_ap['submitted_allowed_ratio'] = (\n",
+    "        supplier_totals_ap['Suplr_Sbmtd_Chrgs'] / (supplier_totals_ap['Suplr_Mdcr_Alowd_Amt'] + 1e-9)\n",
+    "    )\n",
+    "    supplier_totals_ap['submitted_paid_ratio'] = (\n",
+    "        supplier_totals_ap['Suplr_Sbmtd_Chrgs'] / (supplier_totals_ap['Suplr_Mdcr_Pymt_Amt'] + 1e-9)\n",
+    "    )\n",
+    "\n",
+    "    # Focus on those with at least $100K submitted charges to reduce noise\n",
+    "    significant_ap = supplier_totals_ap[supplier_totals_ap['Suplr_Sbmtd_Chrgs'] >= 100000]\n",
+    "\n",
+    "    # Highest submitted-to-allowed ratio\n",
+    "    top_allowed = significant_ap.sort_values(\n",
+    "        'submitted_allowed_ratio', ascending=False\n",
+    "    ).head(10)\n",
+    "\n",
+    "    print(\"Top 10 Suppliers: Highest Submitted Charges vs. Allowed Amount Ratio\\n\")\n",
+    "    for i, row in top_allowed.iterrows():\n",
+    "        npi = row['Suplr_NPI']\n",
+    "        name = row['Suplr_Prvdr_Last_Name_Org']\n",
+    "        submitted = row['Suplr_Sbmtd_Chrgs']\n",
+    "        allowed = row['Suplr_Mdcr_Alowd_Amt']\n",
+    "        paid = row['Suplr_Mdcr_Pymt_Amt']\n",
+    "        ratio = row['submitted_allowed_ratio']\n",
+    "\n",
+    "        print(f\"- {name} (NPI: {npi})\")\n",
+    "        print(f\"  Submitted: {format_dollar_amount(submitted)}, Allowed: {format_dollar_amount(allowed)}, Paid: {format_dollar_amount(paid)}\")\n",
+    "        print(f\"  Submitted : Allowed = {ratio:.2f}x\\n\")\n",
+    "\n",
+    "    # Highest submitted-to-paid ratio\n",
+    "    top_paid = significant_ap.sort_values(\n",
+    "        'submitted_paid_ratio', ascending=False\n",
+    "    ).head(10)\n",
+    "\n",
+    "    print(\"\\nTop 10 Suppliers: Highest Submitted Charges vs. Paid Amount Ratio\\n\")\n",
+    "    for i, row in top_paid.iterrows():\n",
+    "        npi = row['Suplr_NPI']\n",
+    "        name = row['Suplr_Prvdr_Last_Name_Org']\n",
+    "        submitted = row['Suplr_Sbmtd_Chrgs']\n",
+    "        allowed = row['Suplr_Mdcr_Alowd_Amt']\n",
+    "        paid = row['Suplr_Mdcr_Pymt_Amt']\n",
+    "        ratio = row['submitted_paid_ratio']\n",
+    "\n",
+    "        print(f\"- {name} (NPI: {npi})\")\n",
+    "        print(f\"  Submitted: {format_dollar_amount(submitted)}, Allowed: {format_dollar_amount(allowed)}, Paid: {format_dollar_amount(paid)}\")\n",
+    "        print(f\"  Submitted : Paid = {ratio:.2f}x\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ebfae2d2",
+   "metadata": {},
+   "source": [
+    "# 7. Peer Group Analysis\n",
+    "Analyze suppliers in the context of their **specialty**, **state**, or combined specialty–state. \n",
+    "Outliers are flagged if they exceed 3× the peer group's median in more than one of these metrics:\n",
+    "- Total Claims\n",
+    "- Total Submitted Charges\n",
+    "- Total Payments"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "8f0e0b17",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "## Peer Group Analysis by Specialty\n",
+      "\n",
+      "Significant Specialty Outliers (exceeding 3× median in >=2 metrics):\n",
+      "- Accredo Health Group Inc (NPI: 1417915653)\n",
+      "  Specialty: Pharmacy | State: PA\n",
+      "  Claims: 209,938, Charges: $3464.6M, Payments: $1267.1M\n",
+      "\n",
+      "- North Coast Medical Supply, Llc (NPI: 1245259282)\n",
+      "  Specialty: Pharmacy | State: CA\n",
+      "  Claims: 1,236,598, Charges: $3458.1M, Payments: $245.3M\n",
+      "\n",
+      "- Lincare Pharmacy Services Inc. (NPI: 1780748939)\n",
+      "  Specialty: Pharmacy | State: FL\n",
+      "  Claims: 2,533,531, Charges: $2178.0M, Payments: $644.5M\n",
+      "\n",
+      "- Zoll Services Llc (NPI: 1164535274)\n",
+      "  Specialty: Other Medical Supply Company | State: PA\n",
+      "  Claims: 345,064, Charges: $1365.1M, Payments: $738.0M\n",
+      "\n",
+      "- Degc Enterprises (U.S.), Inc. (NPI: 1295827780)\n",
+      "  Specialty: Pharmacy | State: FL\n",
+      "  Claims: 1,329,923, Charges: $1291.7M, Payments: $325.9M\n",
+      "\n",
+      "- United States Medical Supply, Llc (NPI: 1700889227)\n",
+      "  Specialty: Other Medical Supply Company | State: FL\n",
+      "  Claims: 3,296,437, Charges: $1103.4M, Payments: $297.8M\n",
+      "\n",
+      "- 180 Medical Inc (NPI: 1639160708)\n",
+      "  Specialty: Other Medical Supply Company | State: OK\n",
+      "  Claims: 1,224,531, Charges: $1036.1M, Payments: $420.4M\n",
+      "\n",
+      "- Rgh Enterprises, Llc (NPI: 1609858729)\n",
+      "  Specialty: All Other Suppliers | State: OH\n",
+      "  Claims: 1,242,822, Charges: $965.4M, Payments: $271.6M\n",
+      "\n",
+      "- Lincare Pharmacy Services Inc. (NPI: 1003970260)\n",
+      "  Specialty: Pharmacy | State: CA\n",
+      "  Claims: 953,473, Charges: $817.3M, Payments: $240.2M\n",
+      "\n",
+      "- Coram Alternate Site Services Inc (NPI: 1386674067)\n",
+      "  Specialty: All Other Suppliers | State: MN\n",
+      "  Claims: 26,085, Charges: $786.8M, Payments: $53.1M\n",
+      "\n",
+      "\n",
+      "## Peer Group Analysis by State\n",
+      "\n",
+      "Significant State Outliers (>= 3× median in >=2 metrics):\n",
+      "- Accredo Health Group Inc (NPI: 1417915653)\n",
+      "  State: PA | Specialty: Pharmacy\n",
+      "  Claims: 209,938, Charges: $3464.6M, Payments: $1267.1M\n",
+      "\n",
+      "- North Coast Medical Supply, Llc (NPI: 1245259282)\n",
+      "  State: CA | Specialty: Pharmacy\n",
+      "  Claims: 1,236,598, Charges: $3458.1M, Payments: $245.3M\n",
+      "\n",
+      "- Lincare Pharmacy Services Inc. (NPI: 1780748939)\n",
+      "  State: FL | Specialty: Pharmacy\n",
+      "  Claims: 2,533,531, Charges: $2178.0M, Payments: $644.5M\n",
+      "\n",
+      "- Zoll Services Llc (NPI: 1164535274)\n",
+      "  State: PA | Specialty: Other Medical Supply Company\n",
+      "  Claims: 345,064, Charges: $1365.1M, Payments: $738.0M\n",
+      "\n",
+      "- Degc Enterprises (U.S.), Inc. (NPI: 1295827780)\n",
+      "  State: FL | Specialty: Pharmacy\n",
+      "  Claims: 1,329,923, Charges: $1291.7M, Payments: $325.9M\n",
+      "\n",
+      "- United States Medical Supply, Llc (NPI: 1700889227)\n",
+      "  State: FL | Specialty: Other Medical Supply Company\n",
+      "  Claims: 3,296,437, Charges: $1103.4M, Payments: $297.8M\n",
+      "\n",
+      "- 180 Medical Inc (NPI: 1639160708)\n",
+      "  State: OK | Specialty: Other Medical Supply Company\n",
+      "  Claims: 1,224,531, Charges: $1036.1M, Payments: $420.4M\n",
+      "\n",
+      "- Rgh Enterprises, Llc (NPI: 1609858729)\n",
+      "  State: OH | Specialty: All Other Suppliers\n",
+      "  Claims: 1,242,822, Charges: $965.4M, Payments: $271.6M\n",
+      "\n",
+      "- Lincare Pharmacy Services Inc. (NPI: 1003970260)\n",
+      "  State: CA | Specialty: Pharmacy\n",
+      "  Claims: 953,473, Charges: $817.3M, Payments: $240.2M\n",
+      "\n",
+      "- Coram Alternate Site Services Inc (NPI: 1386674067)\n",
+      "  State: MN | Specialty: All Other Suppliers\n",
+      "  Claims: 26,085, Charges: $786.8M, Payments: $53.1M\n",
+      "\n",
+      "\n",
+      "## Peer Group Analysis by Combined Specialty–State\n",
+      "\n",
+      "Significant Combined Specialty–State Outliers (>= 3× median in >=2 metrics):\n",
+      "- Accredo Health Group Inc (NPI: 1417915653)\n",
+      "  Specialty: Pharmacy | State: PA\n",
+      "  Claims: 209,938, Charges: $3464.6M, Payments: $1267.1M\n",
+      "\n",
+      "- North Coast Medical Supply, Llc (NPI: 1245259282)\n",
+      "  Specialty: Pharmacy | State: CA\n",
+      "  Claims: 1,236,598, Charges: $3458.1M, Payments: $245.3M\n",
+      "\n",
+      "- Lincare Pharmacy Services Inc. (NPI: 1780748939)\n",
+      "  Specialty: Pharmacy | State: FL\n",
+      "  Claims: 2,533,531, Charges: $2178.0M, Payments: $644.5M\n",
+      "\n",
+      "- Zoll Services Llc (NPI: 1164535274)\n",
+      "  Specialty: Other Medical Supply Company | State: PA\n",
+      "  Claims: 345,064, Charges: $1365.1M, Payments: $738.0M\n",
+      "\n",
+      "- Degc Enterprises (U.S.), Inc. (NPI: 1295827780)\n",
+      "  Specialty: Pharmacy | State: FL\n",
+      "  Claims: 1,329,923, Charges: $1291.7M, Payments: $325.9M\n",
+      "\n",
+      "- United States Medical Supply, Llc (NPI: 1700889227)\n",
+      "  Specialty: Other Medical Supply Company | State: FL\n",
+      "  Claims: 3,296,437, Charges: $1103.4M, Payments: $297.8M\n",
+      "\n",
+      "- 180 Medical Inc (NPI: 1639160708)\n",
+      "  Specialty: Other Medical Supply Company | State: OK\n",
+      "  Claims: 1,224,531, Charges: $1036.1M, Payments: $420.4M\n",
+      "\n",
+      "- Lincare Pharmacy Services Inc. (NPI: 1003970260)\n",
+      "  Specialty: Pharmacy | State: CA\n",
+      "  Claims: 953,473, Charges: $817.3M, Payments: $240.2M\n",
+      "\n",
+      "- Coram Alternate Site Services Inc (NPI: 1386674067)\n",
+      "  Specialty: All Other Suppliers | State: MN\n",
+      "  Claims: 26,085, Charges: $786.8M, Payments: $53.1M\n",
+      "\n",
+      "- Caremark, L.L.C. (NPI: 1134100134)\n",
+      "  Specialty: All Other Suppliers | State: IL\n",
+      "  Claims: 61,520, Charges: $737.9M, Payments: $244.4M\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "if not combined_df.empty:\n",
+    "    # Ensure we have columns needed for specialty/state analysis\n",
+    "    required_cols = [\n",
+    "        'Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org',\n",
+    "        'Suplr_Prvdr_Spclty_Desc', 'Suplr_Prvdr_State_Abrvtn',\n",
+    "        'Suplr_Sbmtd_Chrgs', 'Suplr_Mdcr_Pymt_Amt',\n",
+    "        'Tot_Suplr_Clms', 'Tot_Suplr_Srvcs'\n",
+    "    ]\n",
+    "    missing_cols = [c for c in required_cols if c not in combined_df.columns]\n",
+    "    if missing_cols:\n",
+    "        print(f\"Missing columns for Peer Group Analysis: {missing_cols}\")\n",
+    "    else:\n",
+    "        supplier_metrics = combined_df.groupby([\n",
+    "            'Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org',\n",
+    "            'Suplr_Prvdr_Spclty_Desc', 'Suplr_Prvdr_State_Abrvtn'\n",
+    "        ], as_index=False).agg({\n",
+    "            'Suplr_Sbmtd_Chrgs': 'sum',\n",
+    "            'Suplr_Mdcr_Pymt_Amt': 'sum',\n",
+    "            'Tot_Suplr_Clms': 'sum',\n",
+    "            'Tot_Suplr_Srvcs': 'sum'\n",
+    "        })\n",
+    "\n",
+    "        # Add derived metrics\n",
+    "        supplier_metrics['Avg_Chrg_Per_Clm'] = supplier_metrics['Suplr_Sbmtd_Chrgs'] / supplier_metrics['Tot_Suplr_Clms'].replace(0, np.nan)\n",
+    "        supplier_metrics['Avg_Pymt_Per_Clm'] = supplier_metrics['Suplr_Mdcr_Pymt_Amt'] / supplier_metrics['Tot_Suplr_Clms'].replace(0, np.nan)\n",
+    "        supplier_metrics['Avg_Srvcs_Per_Clm'] = supplier_metrics['Tot_Suplr_Srvcs'] / supplier_metrics['Tot_Suplr_Clms'].replace(0, np.nan)\n",
+    "\n",
+    "        print(\"\\n## Peer Group Analysis by Specialty\\n\")\n",
+    "        specialty_counts = supplier_metrics['Suplr_Prvdr_Spclty_Desc'].value_counts()\n",
+    "        valid_specialties = specialty_counts[specialty_counts >= 5].index  # at least 5 suppliers\n",
+    "\n",
+    "        if len(valid_specialties) > 0:\n",
+    "            peer_specialty_metrics = supplier_metrics[supplier_metrics['Suplr_Prvdr_Spclty_Desc'].isin(valid_specialties)].groupby('Suplr_Prvdr_Spclty_Desc').agg({\n",
+    "                'Suplr_Sbmtd_Chrgs': ['median'],\n",
+    "                'Suplr_Mdcr_Pymt_Amt': ['median'],\n",
+    "                'Tot_Suplr_Clms': ['median'],\n",
+    "                'Tot_Suplr_Srvcs': ['median']\n",
+    "            })\n",
+    "            peer_specialty_metrics.columns = [\"_\".join(col) for col in peer_specialty_metrics.columns]\n",
+    "\n",
+    "            outliers_by_specialty = []\n",
+    "\n",
+    "            for specialty in valid_specialties:\n",
+    "                group = supplier_metrics[supplier_metrics['Suplr_Prvdr_Spclty_Desc'] == specialty]\n",
+    "                med_clms = peer_specialty_metrics.loc[specialty, 'Tot_Suplr_Clms_median']\n",
+    "                med_chrg = peer_specialty_metrics.loc[specialty, 'Suplr_Sbmtd_Chrgs_median']\n",
+    "                med_pay = peer_specialty_metrics.loc[specialty, 'Suplr_Mdcr_Pymt_Amt_median']\n",
+    "\n",
+    "                # Compare each supplier to 3x median\n",
+    "                claim_outliers = group[group['Tot_Suplr_Clms'] > 3 * med_clms]\n",
+    "                charge_outliers = group[group['Suplr_Sbmtd_Chrgs'] > 3 * med_chrg]\n",
+    "                payment_outliers = group[group['Suplr_Mdcr_Pymt_Amt'] > 3 * med_pay]\n",
+    "\n",
+    "                # Combine\n",
+    "                all_out = pd.concat([\n",
+    "                    claim_outliers[['Suplr_NPI']].assign(flag='claims'),\n",
+    "                    charge_outliers[['Suplr_NPI']].assign(flag='charges'),\n",
+    "                    payment_outliers[['Suplr_NPI']].assign(flag='payments')\n",
+    "                ], ignore_index=True)\n",
+    "                # We want suppliers that appear at least in 2 out of 3 categories\n",
+    "                outlier_counts = all_out.groupby('Suplr_NPI').size()\n",
+    "                multi_flags = outlier_counts[outlier_counts >= 2].index\n",
+    "                multi_outliers = group[group['Suplr_NPI'].isin(multi_flags)]\n",
+    "\n",
+    "                for idx, row in multi_outliers.iterrows():\n",
+    "                    outliers_by_specialty.append({\n",
+    "                        'NPI': row['Suplr_NPI'],\n",
+    "                        'Name': row['Suplr_Prvdr_Last_Name_Org'],\n",
+    "                        'Specialty': row['Suplr_Prvdr_Spclty_Desc'],\n",
+    "                        'State': row['Suplr_Prvdr_State_Abrvtn'],\n",
+    "                        'Total_Claims': row['Tot_Suplr_Clms'],\n",
+    "                        'Total_Charges': row['Suplr_Sbmtd_Chrgs'],\n",
+    "                        'Total_Payments': row['Suplr_Mdcr_Pymt_Amt']\n",
+    "                    })\n",
+    "\n",
+    "            if len(outliers_by_specialty) > 0:\n",
+    "                # Just show top 10 by total charges\n",
+    "                outliers_by_specialty = sorted(\n",
+    "                    outliers_by_specialty,\n",
+    "                    key=lambda x: x['Total_Charges'],\n",
+    "                    reverse=True\n",
+    "                )\n",
+    "\n",
+    "                print(\"Significant Specialty Outliers (exceeding 3× median in >=2 metrics):\")\n",
+    "                for outlier in outliers_by_specialty[:10]:\n",
+    "                    print(f\"- {outlier['Name']} (NPI: {outlier['NPI']})\")\n",
+    "                    print(f\"  Specialty: {outlier['Specialty']} | State: {outlier['State']}\")\n",
+    "                    print(f\"  Claims: {outlier['Total_Claims']:,}, Charges: {format_dollar_amount(outlier['Total_Charges'])}, Payments: {format_dollar_amount(outlier['Total_Payments'])}\\n\")\n",
+    "            else:\n",
+    "                print(\"No multi-metric outliers by specialty.\")\n",
+    "        else:\n",
+    "            print(\"No specialty with >=5 suppliers.\")\n",
+    "\n",
+    "        print(\"\\n## Peer Group Analysis by State\\n\")\n",
+    "        state_counts = supplier_metrics['Suplr_Prvdr_State_Abrvtn'].value_counts()\n",
+    "        valid_states = state_counts[state_counts >= 5].index\n",
+    "\n",
+    "        if len(valid_states) > 0:\n",
+    "            peer_state_metrics = supplier_metrics[supplier_metrics['Suplr_Prvdr_State_Abrvtn'].isin(valid_states)].groupby('Suplr_Prvdr_State_Abrvtn').agg({\n",
+    "                'Suplr_Sbmtd_Chrgs': ['median'],\n",
+    "                'Suplr_Mdcr_Pymt_Amt': ['median'],\n",
+    "                'Tot_Suplr_Clms': ['median'],\n",
+    "                'Tot_Suplr_Srvcs': ['median']\n",
+    "            })\n",
+    "            peer_state_metrics.columns = [\"_\".join(col) for col in peer_state_metrics.columns]\n",
+    "\n",
+    "            outliers_by_state = []\n",
+    "\n",
+    "            for st in valid_states:\n",
+    "                group = supplier_metrics[supplier_metrics['Suplr_Prvdr_State_Abrvtn'] == st]\n",
+    "                med_clms = peer_state_metrics.loc[st, 'Tot_Suplr_Clms_median']\n",
+    "                med_chrg = peer_state_metrics.loc[st, 'Suplr_Sbmtd_Chrgs_median']\n",
+    "                med_pay = peer_state_metrics.loc[st, 'Suplr_Mdcr_Pymt_Amt_median']\n",
+    "\n",
+    "                # Compare to 3x\n",
+    "                claim_outliers = group[group['Tot_Suplr_Clms'] > 3 * med_clms]\n",
+    "                charge_outliers = group[group['Suplr_Sbmtd_Chrgs'] > 3 * med_chrg]\n",
+    "                payment_outliers = group[group['Suplr_Mdcr_Pymt_Amt'] > 3 * med_pay]\n",
+    "\n",
+    "                all_out = pd.concat([\n",
+    "                    claim_outliers[['Suplr_NPI']].assign(flag='claims'),\n",
+    "                    charge_outliers[['Suplr_NPI']].assign(flag='charges'),\n",
+    "                    payment_outliers[['Suplr_NPI']].assign(flag='payments')\n",
+    "                ], ignore_index=True)\n",
+    "                outlier_counts = all_out.groupby('Suplr_NPI').size()\n",
+    "                multi_flags = outlier_counts[outlier_counts >= 2].index\n",
+    "                multi_outliers = group[group['Suplr_NPI'].isin(multi_flags)]\n",
+    "\n",
+    "                for idx, row in multi_outliers.iterrows():\n",
+    "                    outliers_by_state.append({\n",
+    "                        'NPI': row['Suplr_NPI'],\n",
+    "                        'Name': row['Suplr_Prvdr_Last_Name_Org'],\n",
+    "                        'Specialty': row['Suplr_Prvdr_Spclty_Desc'],\n",
+    "                        'State': st,\n",
+    "                        'Total_Claims': row['Tot_Suplr_Clms'],\n",
+    "                        'Total_Charges': row['Suplr_Sbmtd_Chrgs'],\n",
+    "                        'Total_Payments': row['Suplr_Mdcr_Pymt_Amt']\n",
+    "                    })\n",
+    "\n",
+    "            if len(outliers_by_state) > 0:\n",
+    "                outliers_by_state = sorted(\n",
+    "                    outliers_by_state,\n",
+    "                    key=lambda x: x['Total_Charges'],\n",
+    "                    reverse=True\n",
+    "                )\n",
+    "                print(\"Significant State Outliers (>= 3× median in >=2 metrics):\")\n",
+    "                for outlier in outliers_by_state[:10]:\n",
+    "                    print(f\"- {outlier['Name']} (NPI: {outlier['NPI']})\")\n",
+    "                    print(f\"  State: {outlier['State']} | Specialty: {outlier['Specialty']}\")\n",
+    "                    print(f\"  Claims: {outlier['Total_Claims']:,}, Charges: {format_dollar_amount(outlier['Total_Charges'])}, Payments: {format_dollar_amount(outlier['Total_Payments'])}\\n\")\n",
+    "            else:\n",
+    "                print(\"No multi-metric outliers by state.\")\n",
+    "        else:\n",
+    "            print(\"No states with >=5 suppliers.\")\n",
+    "\n",
+    "        print(\"\\n## Peer Group Analysis by Combined Specialty–State\\n\")\n",
+    "        supplier_metrics['SpecState'] = supplier_metrics['Suplr_Prvdr_Spclty_Desc'].astype(str) + ' - ' + supplier_metrics['Suplr_Prvdr_State_Abrvtn'].astype(str)\n",
+    "        combo_counts = supplier_metrics['SpecState'].value_counts()\n",
+    "        valid_specstates = combo_counts[combo_counts >= 5].index\n",
+    "\n",
+    "        if len(valid_specstates) > 0:\n",
+    "            # Calculate medians for each group\n",
+    "            combo_medians = supplier_metrics[supplier_metrics['SpecState'].isin(valid_specstates)].groupby('SpecState').agg({\n",
+    "                'Suplr_Sbmtd_Chrgs': 'median',\n",
+    "                'Suplr_Mdcr_Pymt_Amt': 'median',\n",
+    "                'Tot_Suplr_Clms': 'median',\n",
+    "                'Tot_Suplr_Srvcs': 'median'\n",
+    "            })\n",
+    "            outliers_combined = []\n",
+    "            \n",
+    "            for cs in valid_specstates:\n",
+    "                group = supplier_metrics[supplier_metrics['SpecState'] == cs]\n",
+    "                med_clms = combo_medians.loc[cs, 'Tot_Suplr_Clms']\n",
+    "                med_chrg = combo_medians.loc[cs, 'Suplr_Sbmtd_Chrgs']\n",
+    "                med_pay = combo_medians.loc[cs, 'Suplr_Mdcr_Pymt_Amt']\n",
+    "\n",
+    "                claim_outliers = group[group['Tot_Suplr_Clms'] > 3 * med_clms]\n",
+    "                charge_outliers = group[group['Suplr_Sbmtd_Chrgs'] > 3 * med_chrg]\n",
+    "                payment_outliers = group[group['Suplr_Mdcr_Pymt_Amt'] > 3 * med_pay]\n",
+    "\n",
+    "                all_out = pd.concat([\n",
+    "                    claim_outliers[['Suplr_NPI']].assign(flag='claims'),\n",
+    "                    charge_outliers[['Suplr_NPI']].assign(flag='charges'),\n",
+    "                    payment_outliers[['Suplr_NPI']].assign(flag='payments')\n",
+    "                ], ignore_index=True)\n",
+    "                outlier_counts = all_out.groupby('Suplr_NPI').size()\n",
+    "                multi_flags = outlier_counts[outlier_counts >= 2].index\n",
+    "\n",
+    "                multi_outliers = group[group['Suplr_NPI'].isin(multi_flags)]\n",
+    "                for idx, row in multi_outliers.iterrows():\n",
+    "                    outliers_combined.append({\n",
+    "                        'NPI': row['Suplr_NPI'],\n",
+    "                        'Name': row['Suplr_Prvdr_Last_Name_Org'],\n",
+    "                        'SpecState': cs,\n",
+    "                        'Specialty': row['Suplr_Prvdr_Spclty_Desc'],\n",
+    "                        'State': row['Suplr_Prvdr_State_Abrvtn'],\n",
+    "                        'Total_Claims': row['Tot_Suplr_Clms'],\n",
+    "                        'Total_Charges': row['Suplr_Sbmtd_Chrgs'],\n",
+    "                        'Total_Payments': row['Suplr_Mdcr_Pymt_Amt']\n",
+    "                    })\n",
+    "            \n",
+    "            if outliers_combined:\n",
+    "                # Sort by total charges just as a quick way to highlight big outliers\n",
+    "                outliers_combined = sorted(\n",
+    "                    outliers_combined,\n",
+    "                    key=lambda x: x['Total_Charges'],\n",
+    "                    reverse=True\n",
+    "                )\n",
+    "                print(\"Significant Combined Specialty–State Outliers (>= 3× median in >=2 metrics):\")\n",
+    "                for outlier in outliers_combined[:10]:\n",
+    "                    print(f\"- {outlier['Name']} (NPI: {outlier['NPI']})\")\n",
+    "                    print(f\"  Specialty: {outlier['Specialty']} | State: {outlier['State']}\")\n",
+    "                    print(f\"  Claims: {outlier['Total_Claims']:,}, Charges: {format_dollar_amount(outlier['Total_Charges'])}, Payments: {format_dollar_amount(outlier['Total_Payments'])}\\n\")\n",
+    "            else:\n",
+    "                print(\"No multi-metric outliers at the combined specialty–state level.\")\n",
+    "        else:\n",
+    "            print(\"No combined specialty–state groups with >=5 suppliers.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9b851377",
+   "metadata": {},
+   "source": [
+    "# 8. Conclusions & Next Steps\n",
+    "We've combined multi-year DME data, identified year-over-year outliers, analyzed high submitted vs. allowed/paid ratios, and performed peer-group checks.\n",
+    "\n",
+    "### Potential Enhancements\n",
+    "1. **Additional Metrics**: Incorporate DME-specific categories (e.g., prosthetics vs. drug/nutrition) and investigate outliers in each.\n",
+    "2. **Machine Learning**: Replace threshold-based outlier detection with algorithms (Isolation Forest, DBSCAN, etc.).\n",
+    "3. **Visualization**: Plot distributions, boxplots, or time-series charts for top suspicious suppliers.\n",
+    "4. **Interactive Dashboards**: Provide an interface for users to adjust thresholds and instantly see flagged suppliers.\n"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "PY311LLM",
    "language": "python",
    "name": "python3"
   },
diff --git a/dme_data_analysis.py b/dme_data_analysis.py
deleted file mode 100644
index c2b29aa..0000000
--- a/dme_data_analysis.py
+++ /dev/null
@@ -1,952 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-"""
-DME Data Analysis Script
-This script imports and analyzes the DME data files by year.
-"""
-
-import pandas as pd
-import numpy as np
-import os
-from pprint import pprint
-from collections import defaultdict, Counter
-import matplotlib.pyplot as plt
-import seaborn as sns
-import sys
-
-
-def import_dme_data(file_path):
-    """
-    Import and preprocess DME data from a CSV file.
-
-    Parameters:
-    -----------
-    file_path : str
-        Path to the CSV file containing DME data
-
-    Returns:
-    --------
-    df : DataFrame
-        Processed DataFrame containing DME data
-    """
-    print(f"Importing data from {file_path}...")
-
-    try:
-        # Import data with appropriate dtypes to handle monetary values correctly
-        df = pd.read_csv(file_path, low_memory=False)
-
-        # Convert monetary columns to numeric
-        money_columns = [
-            col for col in df.columns if 'Pymt' in col or 'Amt' in col]
-        for col in money_columns:
-            if col in df.columns:
-                df[col] = pd.to_numeric(df[col], errors='coerce')
-
-        print(f"Successfully imported data with shape: {df.shape}")
-        return df
-
-    except Exception as e:
-        print(f"Error importing data: {str(e)}")
-        return None
-
-
-# Data dictionary mapping variable names to their descriptions
-DATA_DICTIONARY = {
-    # Supplier Information
-    "Suplr_NPI": "Supplier NPI - NPI for the Supplier on the DMEPOS claim",
-    "Suplr_Prvdr_Last_Name_Org": "Supplier Last Name/Organization Name - When registered as individual, the Supplier's last name. When registered as organization, this is the organization name",
-    "Suplr_Prvdr_First_Name": "Supplier First Name - When registered as individual, the Supplier's first name",
-    "Suplr_Prvdr_MI": "Supplier Middle Initial - When registered as individual, the Supplier's middle initial",
-    "Suplr_Prvdr_Crdntls": "Supplier Credentials - When registered as individual, these are the Supplier's credentials",
-    "Suplr_Prvdr_Gndr": "Supplier Gender - When registered as individual, this is the Supplier's gender",
-    "Suplr_Prvdr_Ent_Cd": "Supplier Entity Code - 'I' identifies Suppliers registered as individuals, 'O' identifies Suppliers registered as organizations",
-    "Suplr_Prvdr_St1": "Supplier Street 1 - First line of the Supplier's street address",
-    "Suplr_Prvdr_St2": "Supplier Street 2 - Second line of the Supplier's street address",
-    "Suplr_Prvdr_City": "Supplier City - The city where the Supplier is located",
-    "Suplr_Prvdr_State_Abrvtn": "Supplier State - State postal abbreviation where the Supplier is located",
-    "Suplr_Prvdr_State_FIPS": "Supplier State FIPS Code - FIPS code for Supplier's state",
-    "Suplr_Prvdr_Zip5": "Supplier ZIP - The Supplier's ZIP code",
-    "Suplr_Prvdr_RUCA": "Supplier RUCA - Rural-Urban Commuting Area Code for the Supplier ZIP code",
-    "Suplr_Prvdr_RUCA_Desc": "Supplier RUCA Description - Description of Rural-Urban Commuting Area (RUCA) Code",
-    "Suplr_Prvdr_Cntry": "Supplier Country - Country where the Supplier is located",
-    "Suplr_Prvdr_Spclty_Desc": "Supplier Provider Specialty Description - Derived from Medicare provider/supplier specialty code",
-    "Suplr_Prvdr_Spclty_Srce": "Supplier Provider Specialty Source - Source of the Supplier Specialty (claims-specialty or NPPES-specialty)",
-
-    # Total Supplier Claims/Services
-    "Tot_Suplr_HCPCS_Cds": "Number of Supplier HCPCS - Total unique DMEPOS product/service HCPCS codes",
-    "Tot_Suplr_Benes": "Number of Supplier Beneficiaries - Total unique beneficiaries (<11 are suppressed)",
-    "Tot_Suplr_Clms": "Number of Supplier Claims - Total DMEPOS claims submitted",
-    "Tot_Suplr_Srvcs": "Number of Supplier Services - Total DMEPOS products/services rendered",
-    "Suplr_Sbmtd_Chrgs": "Supplier Submitted Charges - Total charges submitted for DMEPOS products/services",
-    "Suplr_Mdcr_Alowd_Amt": "Supplier Medicare Allowed Amount - Total Medicare allowed amount",
-    "Suplr_Mdcr_Pymt_Amt": "Supplier Medicare Payment Amount - Amount Medicare paid after deductible/coinsurance",
-    "Suplr_Mdcr_Stdzd_Pymt_Amt": "Supplier Medicare Standard Payment Amount - Standardized Medicare payments",
-
-    # DME-specific Fields
-    "DME_Sprsn_Ind": "Durable Medical Equipment Suppression Indicator - '*'=suppressed (1-10 claims), '#'=counter-suppressed",
-    "DME_Tot_Suplr_HCPCS_Cds": "Number of DME HCPCS - Total unique DME HCPCS codes",
-    "DME_Tot_Suplr_Benes": "Number of DME Beneficiaries - Total unique beneficiaries with DME claims (<11 are suppressed)",
-    "DME_Tot_Suplr_Clms": "Number of DME Claims - Total DME claims submitted",
-    "DME_Tot_Suplr_Srvcs": "Number of DME Services - Total DME products/services rendered",
-    "DME_Suplr_Sbmtd_Chrgs": "DME Submitted Charges - Total charges submitted for DME products/services",
-    "DME_Suplr_Mdcr_Alowd_Amt": "DME Medicare Allowed Amount - Total Medicare allowed amount for DME",
-    "DME_Suplr_Mdcr_Pymt_Amt": "DME Medicare Payment Amount - Amount Medicare paid for DME after deductible/coinsurance",
-    "DME_Suplr_Mdcr_Stdzd_Pymt_Amt": "DME Medicare Standard Payment Amount - Standardized Medicare payments for DME",
-
-    # Prosthetic and Orthotic Fields
-    "POS_Sprsn_Ind": "Prosthetic and Orthotic Suppression Indicator - '*'=suppressed (1-10 claims), '#'=counter-suppressed",
-    "POS_Tot_Suplr_HCPCS_Cds": "Number of Prosthetic/Orthotic HCPCS - Total unique prosthetic/orthotic HCPCS codes",
-    "POS_Tot_Suplr_Benes": "Number of Prosthetic/Orthotic Beneficiaries - Total unique beneficiaries",
-    "POS_Tot_Suplr_Clms": "Number of Prosthetic/Orthotic Claims - Total prosthetic/orthotic claims submitted",
-    "POS_Tot_Suplr_Srvcs": "Number of Prosthetic/Orthotic Services - Total prosthetic/orthotic products/services",
-    "POS_Suplr_Sbmtd_Chrgs": "Prosthetic/Orthotic Submitted Charges - Total charges submitted for prosthetic/orthotic",
-    "POS_Suplr_Mdcr_Alowd_Amt": "Prosthetic/Orthotic Medicare Allowed Amount - Total Medicare allowed amount",
-    "POS_Suplr_Mdcr_Pymt_Amt": "Prosthetic/Orthotic Medicare Payment Amount - Amount Medicare paid after deductible/coinsurance",
-    "POS_Suplr_Mdcr_Stdzd_Pymt_Amt": "Prosthetic/Orthotic Medicare Standard Payment Amount - Standardized Medicare payments",
-
-    # Drug and Nutritional Fields
-    "Drug_Sprsn_Ind": "Drug and Nutritional Suppression Indicator - '*'=suppressed (1-10 claims), '#'=counter-suppressed",
-    "Drug_Tot_Suplr_HCPCS_Cds": "Number of Drug/Nutritional HCPCS - Total unique drug/nutritional HCPCS codes",
-    "Drug_Tot_Suplr_Benes": "Number of Drug/Nutritional Beneficiaries - Total unique beneficiaries",
-    "Drug_Tot_Suplr_Clms": "Number of Drug/Nutritional Claims - Total drug/nutritional claims submitted",
-    "Drug_Tot_Suplr_Srvcs": "Number of Drug/Nutritional Services - Total drug/nutritional products/services",
-    "Drug_Suplr_Sbmtd_Chrgs": "Drug/Nutritional Submitted Charges - Total charges submitted for drug/nutritional",
-    "Drug_Suplr_Mdcr_Alowd_Amt": "Drug/Nutritional Medicare Allowed Amount - Total Medicare allowed amount",
-    "Drug_Suplr_Mdcr_Pymt_Amt": "Drug/Nutritional Medicare Payment Amount - Amount Medicare paid after deductible/coinsurance",
-    "Drug_Suplr_Mdcr_Stdzd_Pymt_Amt": "Drug/Nutritional Medicare Standard Payment Amount - Standardized Medicare payments",
-
-    # Beneficiary Demographics
-    "Bene_Avg_Age": "Average Age of Beneficiaries - Average age at end of calendar year or time of death",
-    "Bene_Age_LT_65_Cnt": "Number of Beneficiaries <65 - Count of beneficiaries under 65 years old",
-    "Bene_Age_65_74_Cnt": "Number of Beneficiaries 65-74 - Count of beneficiaries between 65-74 years old",
-    "Bene_Age_75_84_Cnt": "Number of Beneficiaries 75-84 - Count of beneficiaries between 75-84 years old",
-    "Bene_Age_GT_84_Cnt": "Number of Beneficiaries >84 - Count of beneficiaries over 84 years old",
-    "Bene_Feml_Cnt": "Number of Female Beneficiaries - Count of female beneficiaries",
-    "Bene_Male_Cnt": "Number of Male Beneficiaries - Count of male beneficiaries",
-    "Bene_Race_Wht_Cnt": "Number of White Beneficiaries - Count of non-Hispanic white beneficiaries",
-    "Bene_Race_Black_Cnt": "Number of Black Beneficiaries - Count of non-Hispanic Black/African American beneficiaries",
-    "Bene_Race_Api_Cnt": "Number of Asian/PI Beneficiaries - Count of Asian Pacific Islander beneficiaries",
-    "Bene_Race_Hspnc_Cnt": "Number of Hispanic Beneficiaries - Count of Hispanic beneficiaries",
-    "Bene_Race_Natind_Cnt": "Number of Native American/Alaska Native Beneficiaries - Count of American Indian/Alaska Native beneficiaries",
-    "Bene_Race_Othr_Cnt": "Number of Other Race Beneficiaries - Count of beneficiaries with race not elsewhere classified",
-    "Bene_Ndual_Cnt": "Number of Medicare & Medicaid Beneficiaries - Count of dual-eligible beneficiaries",
-    "Bene_Dual_Cnt": "Number of Medicare-Only Beneficiaries - Count of Medicare-only beneficiaries",
-
-    # Beneficiary Health Conditions (Mental/Behavioral Health)
-    "Bene_CC_BH_ADHD_OthCD_V1_Pct": "Percent with ADHD and Other Conduct Disorders",
-    "Bene_CC_BH_Alcohol_Drug_V1_Pct": "Percent with Alcohol and Drug Use Disorders",
-    "Bene_CC_BH_Tobacco_V1_Pct": "Percent with Tobacco Use Disorders",
-    "Bene_CC_BH_Alz_NonAlzdem_V2_Pct": "Percent with Alzheimer's and Non-Alzheimer's Dementia",
-    "Bene_CC_BH_Anxiety_V1_Pct": "Percent with Anxiety Disorders",
-    "Bene_CC_BH_Bipolar_V1_Pct": "Percent with Bipolar Disorder",
-    "Bene_CC_BH_Mood_V2_Pct": "Percent with Depression, Bipolar or Other Mood Disorders",
-    "Bene_CC_BH_Depress_V1_Pct": "Percent with Major Depressive Affective Disorder",
-    "Bene_CC_BH_PD_V1_Pct": "Percent with Personality Disorders",
-    "Bene_CC_BH_PTSD_V1_Pct": "Percent with Post-Traumatic Stress Disorder",
-    "Bene_CC_BH_Schizo_OthPsy_V1_Pct": "Percent with Schizophrenia and Other Psychotic Disorders",
-
-    # Beneficiary Health Conditions (Physical Health)
-    "Bene_CC_PH_Asthma_V2_Pct": "Percent with Asthma",
-    "Bene_CC_PH_Afib_V2_Pct": "Percent with Atrial Fibrillation and Flutter",
-    "Bene_CC_PH_Cancer6_V2_Pct": "Percent with Cancer (combined 6 cancer indicators)",
-    "Bene_CC_PH_CKD_V2_Pct": "Percent with Chronic Kidney Disease",
-    "Bene_CC_PH_COPD_V2_Pct": "Percent with Chronic Obstructive Pulmonary Disease",
-    "Bene_CC_PH_Diabetes_V2_Pct": "Percent with Diabetes",
-    "Bene_CC_PH_HF_NonIHD_V2_Pct": "Percent with Heart Failure and Non-Ischemic Heart Disease",
-    "Bene_CC_PH_Hyperlipidemia_V2_Pct": "Percent with Hyperlipidemia",
-    "Bene_CC_PH_Hypertension_V2_Pct": "Percent with Hypertension",
-    "Bene_CC_PH_IschemicHeart_V2_Pct": "Percent with Ischemic Heart Disease",
-    "Bene_CC_PH_Osteoporosis_V2_Pct": "Percent with Osteoporosis",
-    "Bene_CC_PH_Parkinson_V2_Pct": "Percent with Parkinson's Disease",
-    "Bene_CC_PH_Arthritis_V2_Pct": "Percent with Rheumatoid Arthritis/Osteoarthritis",
-    "Bene_CC_PH_Stroke_TIA_V2_Pct": "Percent with Stroke/Transient Ischemic Attack",
-
-    # Risk Score
-    "Bene_Avg_Risk_Scre": "Average HCC Risk Score of Beneficiaries"
-}
-
-
-def get_column_category(column_name):
-    """Return the category for a given column name based on prefix."""
-    if column_name.startswith('Suplr_'):
-        return "Supplier Information"
-    elif column_name.startswith('DME_'):
-        return "Durable Medical Equipment"
-    elif column_name.startswith('POS_'):
-        return "Prosthetics and Orthotics"
-    elif column_name.startswith('Drug_'):
-        return "Drug and Nutritional Products"
-    elif column_name.startswith('Bene_CC_BH_'):
-        return "Beneficiary Behavioral Health Conditions"
-    elif column_name.startswith('Bene_CC_PH_'):
-        return "Beneficiary Physical Health Conditions"
-    elif column_name.startswith('Bene_'):
-        return "Beneficiary Demographics"
-    else:
-        return "Other"
-
-
-def get_top_suppliers(df, top_n=10):
-    """Return the top suppliers by number of beneficiaries."""
-    top_suppliers = df.sort_values(
-        'DME_Tot_Suplr_Benes', ascending=False).head(top_n)
-
-    # Format results for better readability
-    results = []
-    for _, row in top_suppliers.iterrows():
-        supplier_name = row['Suplr_Prvdr_Last_Name_Org']
-        beneficiaries = row['DME_Tot_Suplr_Benes']
-        claims = row['DME_Tot_Suplr_Clms']
-        payments = row['DME_Suplr_Mdcr_Pymt_Amt']
-
-        results.append({
-            'Supplier': supplier_name,
-            'Beneficiaries': beneficiaries,
-            'Claims': claims,
-            'Medicare Payments': f"${payments:,.2f}"
-        })
-
-    return pd.DataFrame(results)
-
-
-def get_beneficiary_demographics(df):
-    """Analyze beneficiary demographics from the data."""
-    # Extract age distribution
-    age_cols = ['Bene_Age_LT_65_Cnt', 'Bene_Age_65_74_Cnt',
-                'Bene_Age_75_84_Cnt', 'Bene_Age_GT_84_Cnt']
-    age_totals = df[age_cols].sum()
-    total_benes = age_totals.sum()
-    age_pcts = (age_totals / total_benes * 100).round(2)
-
-    # Extract gender distribution
-    gender_cols = ['Bene_Feml_Cnt', 'Bene_Male_Cnt']
-    gender_totals = df[gender_cols].sum()
-    gender_pcts = (gender_totals / gender_totals.sum() * 100).round(2)
-
-    # Extract race distribution
-    race_cols = ['Bene_Race_Wht_Cnt', 'Bene_Race_Black_Cnt', 'Bene_Race_Api_Cnt',
-                 'Bene_Race_Hspnc_Cnt', 'Bene_Race_Natind_Cnt', 'Bene_Race_Othr_Cnt']
-    race_totals = df[race_cols].sum()
-    race_pcts = (race_totals / race_totals.sum() * 100).round(2)
-
-    # Format results with readable labels from data dictionary
-    age_results = {DATA_DICTIONARY[col].split(
-        ' - ')[0]: pct for col, pct in zip(age_cols, age_pcts)}
-    gender_results = {DATA_DICTIONARY[col].split(
-        ' - ')[0]: pct for col, pct in zip(gender_cols, gender_pcts)}
-    race_results = {DATA_DICTIONARY[col].split(
-        ' - ')[0]: pct for col, pct in zip(race_cols, race_pcts)}
-
-    return {
-        'Age Distribution': age_results,
-        'Gender Distribution': gender_results,
-        'Race Distribution': race_results
-    }
-
-
-def get_common_health_conditions(df):
-    """Extract the most common health conditions among beneficiaries."""
-    # Physical health conditions
-    ph_cols = [col for col in df.columns if col.startswith(
-        'Bene_CC_PH_') and col.endswith('_Pct')]
-    ph_values = []
-
-    for col in ph_cols:
-        # Calculate weighted average (weighted by number of beneficiaries)
-        weighted_avg = (df[col] * df['DME_Tot_Suplr_Benes']
-                        ).sum() / df['DME_Tot_Suplr_Benes'].sum()
-        ph_values.append((DATA_DICTIONARY[col], weighted_avg))
-
-    # Behavioral health conditions
-    bh_cols = [col for col in df.columns if col.startswith(
-        'Bene_CC_BH_') and col.endswith('_Pct')]
-    bh_values = []
-
-    for col in bh_cols:
-        # Calculate weighted average (weighted by number of beneficiaries)
-        weighted_avg = (df[col] * df['DME_Tot_Suplr_Benes']
-                        ).sum() / df['DME_Tot_Suplr_Benes'].sum()
-        bh_values.append((DATA_DICTIONARY[col], weighted_avg))
-
-    # Sort by prevalence
-    ph_values.sort(key=lambda x: x[1], reverse=True)
-    bh_values.sort(key=lambda x: x[1], reverse=True)
-
-    return {
-        'Physical Health Conditions': ph_values,
-        'Behavioral Health Conditions': bh_values
-    }
-
-
-def analyze_spending_patterns(df_by_year):
-    """Analyze spending patterns across years."""
-    year_data = []
-
-    for year, df in df_by_year.items():
-        # Calculate total beneficiaries and spending
-        total_benes = df['DME_Tot_Suplr_Benes'].sum()
-        total_spend = df['DME_Suplr_Mdcr_Pymt_Amt'].sum()
-
-        # Calculate spending per beneficiary
-        spend_per_bene = total_spend / total_benes if total_benes > 0 else 0
-
-        # Calculate distribution of spending by DME, POS, and Drug categories
-        dme_spend = df['DME_Suplr_Mdcr_Pymt_Amt'].sum()
-        pos_spend = df['POS_Suplr_Mdcr_Pymt_Amt'].sum()
-        drug_spend = df['Drug_Suplr_Mdcr_Pymt_Amt'].sum()
-
-        # Add to results
-        year_data.append({
-            'Year': year,
-            'Total Beneficiaries': total_benes,
-            'Total Spending': total_spend,
-            'Spending Per Beneficiary': spend_per_bene,
-            'DME Spending': dme_spend,
-            'Prosthetic/Orthotic Spending': pos_spend,
-            'Drug Spending': drug_spend
-        })
-
-    return pd.DataFrame(year_data)
-
-
-# -------------------- Visualization Functions --------------------
-
-def plot_spending_trends(spend_df):
-    """
-    Create visualizations for spending trends over time.
-
-    Parameters:
-    -----------
-    spend_df : DataFrame
-        DataFrame with yearly spending data, as returned by analyze_spending_patterns
-
-    Returns:
-    --------
-    fig : matplotlib Figure
-        The figure containing the visualizations
-    """
-    # Set the style
-    sns.set_style('whitegrid')
-
-    # Create a figure with 2x2 subplots
-    fig, axes = plt.subplots(2, 2, figsize=(16, 14))
-
-    # Total beneficiaries by year
-    sns.lineplot(x='Year', y='Total Beneficiaries', data=spend_df,
-                 marker='o', linewidth=3, markersize=10, ax=axes[0, 0], color='#1f77b4')
-    axes[0, 0].set_title('Total Beneficiaries by Year', fontsize=16)
-    axes[0, 0].ticklabel_format(style='plain', axis='y')
-    axes[0, 0].grid(True)
-
-    # Total spending by year
-    sns.lineplot(x='Year', y='Total Spending', data=spend_df,
-                 marker='o', linewidth=3, markersize=10, ax=axes[0, 1], color='#ff7f0e')
-    axes[0, 1].set_title('Total Medicare DME Spending by Year', fontsize=16)
-    axes[0, 1].ticklabel_format(style='plain', axis='y')
-    axes[0, 1].yaxis.set_major_formatter(
-        plt.FuncFormatter(lambda x, pos: f'${x/1e9:.1f}B'))
-    axes[0, 1].grid(True)
-
-    # Spending per beneficiary by year
-    sns.lineplot(x='Year', y='Spending Per Beneficiary', data=spend_df,
-                 marker='o', linewidth=3, markersize=10, ax=axes[1, 0], color='#2ca02c')
-    axes[1, 0].set_title('Average Spending Per Beneficiary', fontsize=16)
-    axes[1, 0].yaxis.set_major_formatter(
-        plt.FuncFormatter(lambda x, pos: f'${x:.0f}'))
-    axes[1, 0].grid(True)
-
-    # Spending by category stacked area chart
-    category_data = spend_df[['Year', 'DME Spending',
-                              'Prosthetic/Orthotic Spending', 'Drug Spending']]
-    category_data_stacked = category_data.set_index('Year')
-
-    # Convert to billions for better readability
-    category_data_stacked = category_data_stacked / 1e9
-
-    # Plot stacked area chart
-    category_data_stacked.plot.area(stacked=True, ax=axes[1, 1],
-                                    color=['#1f77b4', '#ff7f0e', '#2ca02c'],
-                                    alpha=0.7)
-    axes[1, 1].set_title('Spending by Category', fontsize=16)
-    axes[1, 1].set_ylabel('Spending (Billions $)')
-    axes[1, 1].yaxis.set_major_formatter(
-        plt.FuncFormatter(lambda x, pos: f'${x:.1f}B'))
-    axes[1, 1].legend(loc='upper left')
-    axes[1, 1].grid(True)
-
-    plt.tight_layout()
-    return fig
-
-
-def plot_demographics(df, year=None):
-    """
-    Create visualizations for beneficiary demographics.
-
-    Parameters:
-    -----------
-    df : DataFrame or dict
-        Either a DataFrame for a specific year or the df_by_year dictionary
-    year : int, optional
-        If df is a dictionary, specify which year to visualize
-
-    Returns:
-    --------
-    fig : matplotlib Figure
-        The figure containing the visualizations
-    """
-    # If we have multiple years, extract the specified year
-    if isinstance(df, dict) and year is not None:
-        if year in df:
-            df = df[year]
-        else:
-            raise ValueError(f"Year {year} not found in data")
-
-    # Get demographics data
-    demo_results = get_beneficiary_demographics(df)
-
-    # Create a figure with 3 subplots for age, gender, and race
-    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
-
-    # Age distribution
-    age_data = demo_results['Age Distribution']
-    age_labels = list(age_data.keys())
-    age_values = list(age_data.values())
-
-    axes[0].pie(age_values, labels=age_labels, autopct='%1.1f%%',
-                startangle=90, colors=sns.color_palette("Blues", len(age_labels)))
-    axes[0].set_title('Age Distribution', fontsize=16)
-
-    # Gender distribution
-    gender_data = demo_results['Gender Distribution']
-    gender_labels = list(gender_data.keys())
-    gender_values = list(gender_data.values())
-
-    axes[1].pie(gender_values, labels=gender_labels, autopct='%1.1f%%',
-                startangle=90, colors=sns.color_palette("Set2", len(gender_labels)))
-    axes[1].set_title('Gender Distribution', fontsize=16)
-
-    # Race distribution
-    race_data = demo_results['Race Distribution']
-    race_labels = list(race_data.keys())
-    race_values = list(race_data.values())
-
-    # Sort by percentage (descending)
-    sorted_race = sorted(zip(race_labels, race_values),
-                         key=lambda x: x[1], reverse=True)
-    race_labels, race_values = zip(*sorted_race)
-
-    axes[2].pie(race_values, labels=race_labels, autopct='%1.1f%%',
-                startangle=90, colors=sns.color_palette("Set3", len(race_labels)))
-    axes[2].set_title('Race Distribution', fontsize=16)
-
-    plt.tight_layout()
-    return fig
-
-
-def plot_health_conditions(df, year=None, top_n=10):
-    """
-    Create visualizations for health conditions prevalence.
-
-    Parameters:
-    -----------
-    df : DataFrame or dict
-        Either a DataFrame for a specific year or the df_by_year dictionary
-    year : int, optional
-        If df is a dictionary, specify which year to visualize
-    top_n : int, optional
-        Number of top conditions to display (default: 10)
-
-    Returns:
-    --------
-    fig : matplotlib Figure
-        The figure containing the visualizations
-    """
-    # If we have multiple years, extract the specified year
-    if isinstance(df, dict) and year is not None:
-        if year in df:
-            df = df[year]
-        else:
-            raise ValueError(f"Year {year} not found in data")
-
-    # Get health conditions data
-    conditions = get_common_health_conditions(df)
-
-    # Create a figure with 2 subplots for physical and behavioral health
-    fig, axes = plt.subplots(1, 2, figsize=(20, 10))
-
-    # Physical health conditions
-    ph_data = conditions['Physical Health Conditions'][:top_n]
-    ph_labels = [cond for cond, _ in ph_data]
-    ph_values = [val for _, val in ph_data]
-
-    # Horizontal bar chart for physical health
-    sns.barplot(x=ph_values, y=ph_labels, palette="Blues_d", ax=axes[0])
-    axes[0].set_title('Top Physical Health Conditions', fontsize=16)
-    axes[0].set_xlabel('Percentage of Beneficiaries', fontsize=12)
-    axes[0].xaxis.set_major_formatter(
-        plt.FuncFormatter(lambda x, pos: f'{x:.2f}%'))
-    axes[0].grid(axis='x')
-
-    # Behavioral health conditions
-    bh_data = conditions['Behavioral Health Conditions'][:top_n]
-    bh_labels = [cond for cond, _ in bh_data]
-    bh_values = [val for _, val in bh_data]
-
-    # Horizontal bar chart for behavioral health
-    sns.barplot(x=bh_values, y=bh_labels, palette="Oranges_d", ax=axes[1])
-    axes[1].set_title('Top Behavioral Health Conditions', fontsize=16)
-    axes[1].set_xlabel('Percentage of Beneficiaries', fontsize=12)
-    axes[1].xaxis.set_major_formatter(
-        plt.FuncFormatter(lambda x, pos: f'{x:.2f}%'))
-    axes[1].grid(axis='x')
-
-    plt.tight_layout()
-    return fig
-
-
-def plot_top_suppliers(df, year=None, top_n=10):
-    """
-    Create visualizations for top suppliers.
-
-    Parameters:
-    -----------
-    df : DataFrame or dict
-        Either a DataFrame for a specific year or the df_by_year dictionary
-    year : int, optional
-        If df is a dictionary, specify which year to visualize
-    top_n : int, optional
-        Number of top suppliers to display (default: 10)
-
-    Returns:
-    --------
-    fig : matplotlib Figure
-        The figure containing the visualizations
-    """
-    # If we have multiple years, extract the specified year
-    if isinstance(df, dict) and year is not None:
-        if year in df:
-            df = df[year]
-        else:
-            raise ValueError(f"Year {year} not found in data")
-
-    # Get top suppliers data
-    top_suppliers_df = get_top_suppliers(df, top_n=top_n)
-
-    # Convert payments string to numeric values
-    top_suppliers_df['Medicare Payments (Numeric)'] = top_suppliers_df['Medicare Payments'].str.replace(
-        '$', '').str.replace(',', '').astype(float)
-
-    # Sort by payment amount
-    top_suppliers_df = top_suppliers_df.sort_values(
-        'Medicare Payments (Numeric)', ascending=True)
-
-    # Create a figure with 2 subplots
-    fig, axes = plt.subplots(1, 2, figsize=(20, 10))
-
-    # Payments bar chart
-    sns.barplot(x='Medicare Payments (Numeric)', y='Supplier', data=top_suppliers_df,
-                palette="viridis", ax=axes[0])
-    axes[0].set_title(
-        f'Top {top_n} Suppliers by Medicare Payments', fontsize=16)
-    axes[0].set_xlabel('Medicare Payments ($)', fontsize=12)
-    axes[0].xaxis.set_major_formatter(
-        plt.FuncFormatter(lambda x, pos: f'${x/1e6:.1f}M'))
-    axes[0].grid(axis='x')
-
-    # Beneficiaries bar chart
-    top_suppliers_df = top_suppliers_df.sort_values(
-        'Beneficiaries', ascending=True)
-    sns.barplot(x='Beneficiaries', y='Supplier', data=top_suppliers_df,
-                palette="plasma", ax=axes[1])
-    axes[1].set_title(
-        f'Top {top_n} Suppliers by Number of Beneficiaries', fontsize=16)
-    axes[1].set_xlabel('Number of Beneficiaries', fontsize=12)
-    axes[1].xaxis.set_major_formatter(
-        plt.FuncFormatter(lambda x, pos: f'{x:.0f}'))
-    axes[1].grid(axis='x')
-
-    plt.tight_layout()
-    return fig
-
-
-def plot_geographical_distribution(df, year=None):
-    """
-    Create visualizations for the geographical distribution of suppliers.
-
-    Parameters:
-    -----------
-    df : DataFrame or dict
-        Either a DataFrame for a specific year or the df_by_year dictionary
-    year : int, optional
-        If df is a dictionary, specify which year to visualize
-
-    Returns:
-    --------
-    fig : matplotlib Figure
-        The figure containing the visualizations
-    """
-    # If we have multiple years, extract the specified year
-    if isinstance(df, dict) and year is not None:
-        if year in df:
-            df = df[year]
-        else:
-            raise ValueError(f"Year {year} not found in data")
-
-    # Create a figure with 2 subplots
-    fig, axes = plt.subplots(1, 2, figsize=(20, 8))
-
-    # State distribution
-    state_counts = df['Suplr_Prvdr_State_Abrvtn'].value_counts().reset_index()
-    state_counts.columns = ['State', 'Suppliers']
-
-    # Sort by count (descending) and get top 15
-    state_counts = state_counts.sort_values(
-        'Suppliers', ascending=False).head(15)
-
-    sns.barplot(x='Suppliers', y='State', data=state_counts,
-                palette="viridis", ax=axes[0])
-    axes[0].set_title('Top 15 States by Number of Suppliers', fontsize=16)
-    axes[0].set_xlabel('Number of Suppliers', fontsize=12)
-    axes[0].grid(axis='x')
-
-    # Rural vs Urban distribution
-    if 'Suplr_Prvdr_RUCA_Desc' in df.columns:
-        ruca_counts = df['Suplr_Prvdr_RUCA_Desc'].value_counts().reset_index()
-        ruca_counts.columns = ['RUCA Description', 'Suppliers']
-
-        explode = [0.1] * len(ruca_counts)  # Explode all slices
-
-        # Plot pie chart for RUCA distribution
-        axes[1].pie(ruca_counts['Suppliers'], labels=ruca_counts['RUCA Description'],
-                    autopct='%1.1f%%', startangle=90,
-                    colors=sns.color_palette("Set2", len(ruca_counts)),
-                    explode=explode)
-        axes[1].set_title(
-            'Supplier Distribution by Rural-Urban Classification', fontsize=16)
-    else:
-        axes[1].text(0.5, 0.5, 'RUCA Description not available',
-                     ha='center', va='center', fontsize=14)
-        axes[1].set_title(
-            'Rural-Urban Distribution (Not Available)', fontsize=16)
-
-    plt.tight_layout()
-    return fig
-
-
-def create_notebook_visualizations(df_by_year):
-    """
-    Create all visualizations for a Jupyter notebook.
-
-    This is a convenience function that calls all visualization functions
-    and returns them for display in a Jupyter notebook.
-
-    Parameters:
-    -----------
-    df_by_year : dict
-        Dictionary with yearly dataframes, as created in main()
-
-    Returns:
-    --------
-    visualizations : dict
-        Dictionary with all visualizations
-    """
-    import matplotlib.pyplot as plt
-
-    # Most recent year
-    recent_year = max(df_by_year.keys())
-
-    # Create spending trend visualizations
-    spend_df = analyze_spending_patterns(df_by_year)
-    spending_fig = plot_spending_trends(spend_df)
-
-    # Create demographics visualizations for most recent year
-    demographics_fig = plot_demographics(df_by_year[recent_year])
-
-    # Create health conditions visualizations for most recent year
-    health_conditions_fig = plot_health_conditions(df_by_year[recent_year])
-
-    # Create top suppliers visualizations for most recent year
-    suppliers_fig = plot_top_suppliers(df_by_year[recent_year])
-
-    # Create geographical distribution visualizations for most recent year
-    geo_fig = plot_geographical_distribution(df_by_year[recent_year])
-
-    # Return all visualizations
-    return {
-        'spending_trends': spending_fig,
-        'demographics': demographics_fig,
-        'health_conditions': health_conditions_fig,
-        'top_suppliers': suppliers_fig,
-        'geographical_distribution': geo_fig
-    }
-
-
-def main():
-    """Main function to import and analyze DME data files."""
-    print("DME Data Analysis")
-    print("================\n")
-
-    # Dictionary to store dataframes by year
-    df_by_year = {}
-
-    # Import data for years 2017-2022
-    for year in range(2017, 2023):
-        csv_path = f"data/{year}/mup_dme_ry24_p05_v10_dy{str(year)[-2:]}_supr.csv"
-        if os.path.exists(csv_path):
-            print(f"Importing data for {year}...")
-            df_by_year[year] = pd.read_csv(csv_path, low_memory=False)
-            print(
-                f"✓ Data for {year} imported successfully. Shape: {df_by_year[year].shape}")
-        else:
-            print(f"Warning: No data file found for {year}")
-
-    print("\nAll available data files have been imported.")
-
-    # Data Overview
-    print("\n1. Data Overview")
-    print("---------------\n")
-
-    # Create a summary table
-    summary_data = {
-        'Year': [],
-        'Suppliers': [],
-        'Total Beneficiaries': [],
-        'Total Claims': [],
-        'Total Payments ($)': []
-    }
-
-    for year, df in df_by_year.items():
-        summary_data['Year'].append(year)
-        summary_data['Suppliers'].append(df.shape[0])
-        summary_data['Total Beneficiaries'].append(
-            df['DME_Tot_Suplr_Benes'].sum())
-        summary_data['Total Claims'].append(df['DME_Tot_Suplr_Clms'].sum())
-        summary_data['Total Payments ($)'].append(
-            df['DME_Suplr_Mdcr_Pymt_Amt'].sum())
-
-    summary_df = pd.DataFrame(summary_data)
-    print("Summary statistics across years:")
-    print(summary_df.to_string(index=False,
-          float_format=lambda x: f"{x:,.0f}" if isinstance(x, (int, float)) else x))
-
-    # Calculate year-over-year changes
-    if len(summary_df) > 1:
-        yoy_data = {
-            'Metric': ['Suppliers', 'Beneficiaries', 'Claims', 'Payments'],
-            'Change 2021-2022 (%)': [0, 0, 0, 0]
-        }
-
-        # Calculate year-over-year changes for the most recent years
-        if 2021 in df_by_year and 2022 in df_by_year:
-            suppliers_2021 = summary_df[summary_df['Year']
-                                        == 2021]['Suppliers'].values[0]
-            suppliers_2022 = summary_df[summary_df['Year']
-                                        == 2022]['Suppliers'].values[0]
-            bene_2021 = summary_df[summary_df['Year'] ==
-                                   2021]['Total Beneficiaries'].values[0]
-            bene_2022 = summary_df[summary_df['Year'] ==
-                                   2022]['Total Beneficiaries'].values[0]
-            claims_2021 = summary_df[summary_df['Year']
-                                     == 2021]['Total Claims'].values[0]
-            claims_2022 = summary_df[summary_df['Year']
-                                     == 2022]['Total Claims'].values[0]
-            payments_2021 = summary_df[summary_df['Year']
-                                       == 2021]['Total Payments ($)'].values[0]
-            payments_2022 = summary_df[summary_df['Year']
-                                       == 2022]['Total Payments ($)'].values[0]
-
-            # Calculate percentage changes
-            yoy_data['Change 2021-2022 (%)'][0] = (
-                (suppliers_2022 - suppliers_2021) / suppliers_2021) * 100
-            yoy_data['Change 2021-2022 (%)'][1] = (
-                (bene_2022 - bene_2021) / bene_2021) * 100
-            yoy_data['Change 2021-2022 (%)'][2] = (
-                (claims_2022 - claims_2021) / claims_2021) * 100
-            yoy_data['Change 2021-2022 (%)'][3] = (
-                (payments_2022 - payments_2021) / payments_2021) * 100
-
-            yoy_df = pd.DataFrame(yoy_data)
-            print("\nYear-over-year changes (2021-2022):")
-            print(yoy_df.to_string(
-                index=False, float_format=lambda x: f"{x:.2f}%"))
-
-    # Column categories
-    print("\nColumn Categories:")
-    recent_year = max(df_by_year.keys())
-    df = df_by_year[recent_year]
-
-    categories = set()
-    for col in df.columns:
-        categories.add(get_column_category(col))
-
-    for category in sorted(categories):
-        # Print a few example columns for each category
-        example_cols = [
-            col for col in df.columns if get_column_category(col) == category][:3]
-        print(
-            f"  - {category}: {len([col for col in df.columns if get_column_category(col) == category])} columns")
-        print(f"    Examples: {', '.join(example_cols)}")
-        for col in example_cols:
-            if col in DATA_DICTIONARY:
-                print(f"      {col}: {DATA_DICTIONARY[col]}")
-
-    # Top Suppliers
-    print("\n2. Top Suppliers")
-    print("--------------\n")
-    recent_year = max(df_by_year.keys())
-    top_suppliers = get_top_suppliers(df_by_year[recent_year])
-    print(f"Top suppliers for {recent_year}:")
-    print(top_suppliers.to_string(index=False))
-
-    # Beneficiary Demographics
-    print("\n3. Beneficiary Demographics")
-    print("--------------------------\n")
-    demographics = get_beneficiary_demographics(df_by_year[recent_year])
-    print(f"Demographics for {recent_year}:")
-
-    # Print age distribution
-    print("\nAge Distribution:")
-    for age_group, percentage in demographics['Age Distribution'].items():
-        print(f"  - {age_group}: {percentage:.2f}%")
-
-    # Print gender distribution
-    print("\nGender Distribution:")
-    for gender, percentage in demographics['Gender Distribution'].items():
-        print(f"  - {gender}: {percentage:.2f}%")
-
-    # Print race distribution
-    print("\nRace Distribution:")
-    for race, percentage in demographics['Race Distribution'].items():
-        print(f"  - {race}: {percentage:.2f}%")
-
-    # Health Conditions
-    print("\n4. Common Health Conditions")
-    print("-------------------------\n")
-    conditions = get_common_health_conditions(df_by_year[recent_year])
-    print(f"Health conditions for {recent_year}:")
-
-    # Print physical health conditions
-    print("\nPhysical Health Conditions:")
-    for condition, percentage in conditions['Physical Health Conditions'][:10]:
-        print(f"  - {condition}: {percentage:.2f}%")
-
-    # Print behavioral health conditions
-    print("\nBehavioral Health Conditions:")
-    for condition, percentage in conditions['Behavioral Health Conditions'][:10]:
-        print(f"  - {condition}: {percentage:.2f}%")
-
-    # Spending Patterns
-    print("\n5. Medicare Spending Patterns")
-    print("---------------------------\n")
-    spending_df = analyze_spending_patterns(df_by_year)
-
-    # Format the DataFrame for display with appropriate formatting
-    formatted_spending_df = spending_df.copy()
-
-    # Format monetary columns with dollar signs
-    monetary_cols = ['Total Spending', 'Spending Per Beneficiary', 'DME Spending',
-                     'Prosthetic/Orthotic Spending', 'Drug Spending']
-    for col in monetary_cols:
-        if col in formatted_spending_df.columns:
-            formatted_spending_df[col] = formatted_spending_df[col].apply(
-                lambda x: f"${x:,.2f}")
-
-    # Format count columns with commas
-    count_cols = ['Year', 'Total Beneficiaries']
-    for col in count_cols:
-        if col in formatted_spending_df.columns:
-            formatted_spending_df[col] = formatted_spending_df[col].apply(
-                lambda x: f"{x:,.0f}")
-
-    print("Medicare spending patterns across years:")
-    print(formatted_spending_df.to_string(index=False))
-
-    # ----- VISUALIZATIONS -----
-    print("\n\n6. Generating Visualizations")
-    print("---------------------------\n")
-
-    # Setting plot style
-    sns.set_style('whitegrid')
-    plt.rcParams['figure.figsize'] = [14, 9]
-
-    # Generate all visualizations
-    visualizations = {}
-
-    # 1. Spending Trends
-    print("Generating spending trends visualization...")
-    spending_trends_fig = plot_spending_trends(spending_df)
-    visualizations['spending_trends'] = spending_trends_fig
-
-    # 2. Demographics
-    print("Generating demographics visualization...")
-    demographics_fig = plot_demographics(df_by_year[recent_year])
-    visualizations['demographics'] = demographics_fig
-
-    # 3. Health Conditions
-    print("Generating health conditions visualization...")
-    health_conditions_fig = plot_health_conditions(df_by_year[recent_year])
-    visualizations['health_conditions'] = health_conditions_fig
-
-    # 4. Top Suppliers
-    print("Generating top suppliers visualization...")
-    suppliers_fig = plot_top_suppliers(df_by_year[recent_year])
-    visualizations['top_suppliers'] = suppliers_fig
-
-    # 5. Geographical Distribution
-    print("Generating geographical distribution visualization...")
-    geo_fig = plot_geographical_distribution(df_by_year[recent_year])
-    visualizations['geographical_distribution'] = geo_fig
-
-    # 6. Custom visualization: YoY percentage changes
-    print("Generating year-over-year changes visualization...")
-
-    # Calculate YoY percentage changes
-    spending_df['Beneficiaries % Change'] = spending_df['Total Beneficiaries'].pct_change() * \
-        100
-    spending_df['Spending % Change'] = spending_df['Total Spending'].pct_change() * \
-        100
-    spending_df['Per Beneficiary % Change'] = spending_df['Spending Per Beneficiary'].pct_change() * \
-        100
-
-    # Create plot
-    yoy_fig, ax = plt.subplots(figsize=(14, 8))
-    metrics = ['Beneficiaries % Change',
-               'Spending % Change', 'Per Beneficiary % Change']
-    colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
-
-    for i, metric in enumerate(metrics):
-        ax.plot(spending_df['Year'][1:], spending_df[metric][1:],
-                marker='o', linewidth=3, markersize=10,
-                label=metric.replace(' % Change', ''),
-                color=colors[i])
-
-    ax.axhline(y=0, color='r', linestyle='--', alpha=0.5)
-    ax.set_title(
-        'Year-over-Year Percentage Changes in Key Metrics', fontsize=16)
-    ax.legend(fontsize=12)
-    ax.grid(True)
-    ax.set_xlabel('Year', fontsize=14)
-    ax.set_ylabel('Percentage Change (%)', fontsize=14)
-    visualizations['yoy_changes'] = yoy_fig
-
-    # Save visualizations to files if not in a notebook environment
-    try:
-        # Check if we're in a notebook environment
-        if 'ipykernel' not in sys.modules:
-            print("\nSaving visualizations to files...")
-            os.makedirs('visualizations', exist_ok=True)
-            for name, fig in visualizations.items():
-                fig.savefig(
-                    f'visualizations/{name}.png', dpi=300, bbox_inches='tight')
-                print(f"Saved: visualizations/{name}.png")
-    except:
-        print("Note: Visualizations will be displayed if run in a Jupyter notebook")
-
-    # When run in Jupyter, the figures will be displayed inline
-    return df_by_year, visualizations
-
-
-if __name__ == "__main__":
-    import sys
-    main()
diff --git a/dme_dictionary.py b/dme_dictionary.py
new file mode 100644
index 0000000..f5cd696
--- /dev/null
+++ b/dme_dictionary.py
@@ -0,0 +1,116 @@
+DATA_DICTIONARY = {
+    # Supplier Information
+    "Suplr_NPI": "Supplier NPI - NPI for the Supplier on the DMEPOS claim",
+    "Suplr_Prvdr_Last_Name_Org": "Supplier Last Name/Organization Name - When registered as individual, the Supplier's last name. When registered as organization, this is the organization name",
+    "Suplr_Prvdr_First_Name": "Supplier First Name - When registered as individual, the Supplier's first name",
+    "Suplr_Prvdr_MI": "Supplier Middle Initial - When registered as individual, the Supplier's middle initial",
+    "Suplr_Prvdr_Crdntls": "Supplier Credentials - When registered as individual, these are the Supplier's credentials",
+    "Suplr_Prvdr_Gndr": "Supplier Gender - When registered as individual, this is the Supplier's gender",
+    "Suplr_Prvdr_Ent_Cd": "Supplier Entity Code - 'I' identifies Suppliers registered as individuals, 'O' identifies Suppliers registered as organizations",
+    "Suplr_Prvdr_St1": "Supplier Street 1 - First line of the Supplier's street address",
+    "Suplr_Prvdr_St2": "Supplier Street 2 - Second line of the Supplier's street address",
+    "Suplr_Prvdr_City": "Supplier City - The city where the Supplier is located",
+    "Suplr_Prvdr_State_Abrvtn": "Supplier State - State postal abbreviation where the Supplier is located",
+    "Suplr_Prvdr_State_FIPS": "Supplier State FIPS Code - FIPS code for Supplier's state",
+    "Suplr_Prvdr_Zip5": "Supplier ZIP - The Supplier's ZIP code",
+    "Suplr_Prvdr_RUCA": "Supplier RUCA - Rural-Urban Commuting Area Code for the Supplier ZIP code",
+    "Suplr_Prvdr_RUCA_Desc": "Supplier RUCA Description - Description of Rural-Urban Commuting Area (RUCA) Code",
+    "Suplr_Prvdr_Cntry": "Supplier Country - Country where the Supplier is located",
+    "Suplr_Prvdr_Spclty_Desc": "Supplier Provider Specialty Description - Derived from Medicare provider/supplier specialty code",
+    "Suplr_Prvdr_Spclty_Srce": "Supplier Provider Specialty Source - Source of the Supplier Specialty (claims-specialty or NPPES-specialty)",
+
+    # Total Supplier Claims/Services
+    "Tot_Suplr_HCPCS_Cds": "Number of Supplier HCPCS - Total unique DMEPOS product/service HCPCS codes",
+    "Tot_Suplr_Benes": "Number of Supplier Beneficiaries - Total unique beneficiaries (<11 are suppressed)",
+    "Tot_Suplr_Clms": "Number of Supplier Claims - Total DMEPOS claims submitted",
+    "Tot_Suplr_Srvcs": "Number of Supplier Services - Total DMEPOS products/services rendered",
+    "Suplr_Sbmtd_Chrgs": "Supplier Submitted Charges - Total charges submitted for DMEPOS products/services",
+    "Suplr_Mdcr_Alowd_Amt": "Supplier Medicare Allowed Amount - Total Medicare allowed amount",
+    "Suplr_Mdcr_Pymt_Amt": "Supplier Medicare Payment Amount - Amount Medicare paid after deductible/coinsurance",
+    "Suplr_Mdcr_Stdzd_Pymt_Amt": "Supplier Medicare Standard Payment Amount - Standardized Medicare payments",
+
+    # DME-specific Fields
+    "DME_Sprsn_Ind": "Durable Medical Equipment Suppression Indicator - '*'=suppressed (1-10 claims), '#'=counter-suppressed",
+    "DME_Tot_Suplr_HCPCS_Cds": "Number of DME HCPCS - Total unique DME HCPCS codes",
+    "DME_Tot_Suplr_Benes": "Number of DME Beneficiaries - Total unique beneficiaries with DME claims (<11 are suppressed)",
+    "DME_Tot_Suplr_Clms": "Number of DME Claims - Total DME claims submitted",
+    "DME_Tot_Suplr_Srvcs": "Number of DME Services - Total DME products/services rendered",
+    "DME_Suplr_Sbmtd_Chrgs": "DME Submitted Charges - Total charges submitted for DME products/services",
+    "DME_Suplr_Mdcr_Alowd_Amt": "DME Medicare Allowed Amount - Total Medicare allowed amount for DME",
+    "DME_Suplr_Mdcr_Pymt_Amt": "DME Medicare Payment Amount - Amount Medicare paid for DME after deductible/coinsurance",
+    "DME_Suplr_Mdcr_Stdzd_Pymt_Amt": "DME Medicare Standard Payment Amount - Standardized Medicare payments for DME",
+
+    # Prosthetic and Orthotic Fields
+    "POS_Sprsn_Ind": "Prosthetic and Orthotic Suppression Indicator - '*'=suppressed (1-10 claims), '#'=counter-suppressed",
+    "POS_Tot_Suplr_HCPCS_Cds": "Number of Prosthetic/Orthotic HCPCS - Total unique prosthetic/orthotic HCPCS codes",
+    "POS_Tot_Suplr_Benes": "Number of Prosthetic/Orthotic Beneficiaries - Total unique beneficiaries",
+    "POS_Tot_Suplr_Clms": "Number of Prosthetic/Orthotic Claims - Total prosthetic/orthotic claims submitted",
+    "POS_Tot_Suplr_Srvcs": "Number of Prosthetic/Orthotic Services - Total prosthetic/orthotic products/services",
+    "POS_Suplr_Sbmtd_Chrgs": "Prosthetic/Orthotic Submitted Charges - Total charges submitted for prosthetic/orthotic",
+    "POS_Suplr_Mdcr_Alowd_Amt": "Prosthetic/Orthotic Medicare Allowed Amount - Total Medicare allowed amount",
+    "POS_Suplr_Mdcr_Pymt_Amt": "Prosthetic/Orthotic Medicare Payment Amount - Amount Medicare paid after deductible/coinsurance",
+    "POS_Suplr_Mdcr_Stdzd_Pymt_Amt": "Prosthetic/Orthotic Medicare Standard Payment Amount - Standardized Medicare payments",
+
+    # Drug and Nutritional Fields
+    "Drug_Sprsn_Ind": "Drug and Nutritional Suppression Indicator - '*'=suppressed (1-10 claims), '#'=counter-suppressed",
+    "Drug_Tot_Suplr_HCPCS_Cds": "Number of Drug/Nutritional HCPCS - Total unique drug/nutritional HCPCS codes",
+    "Drug_Tot_Suplr_Benes": "Number of Drug/Nutritional Beneficiaries - Total unique beneficiaries",
+    "Drug_Tot_Suplr_Clms": "Number of Drug/Nutritional Claims - Total drug/nutritional claims submitted",
+    "Drug_Tot_Suplr_Srvcs": "Number of Drug/Nutritional Services - Total drug/nutritional products/services",
+    "Drug_Suplr_Sbmtd_Chrgs": "Drug/Nutritional Submitted Charges - Total charges submitted for drug/nutritional",
+    "Drug_Suplr_Mdcr_Alowd_Amt": "Drug/Nutritional Medicare Allowed Amount - Total Medicare allowed amount",
+    "Drug_Suplr_Mdcr_Pymt_Amt": "Drug/Nutritional Medicare Payment Amount - Amount Medicare paid after deductible/coinsurance",
+    "Drug_Suplr_Mdcr_Stdzd_Pymt_Amt": "Drug/Nutritional Medicare Standard Payment Amount - Standardized Medicare payments",
+
+    # Beneficiary Demographics
+    "Bene_Avg_Age": "Average Age of Beneficiaries - Average age at end of calendar year or time of death",
+    "Bene_Age_LT_65_Cnt": "Number of Beneficiaries <65 - Count of beneficiaries under 65 years old",
+    "Bene_Age_65_74_Cnt": "Number of Beneficiaries 65-74 - Count of beneficiaries between 65-74 years old",
+    "Bene_Age_75_84_Cnt": "Number of Beneficiaries 75-84 - Count of beneficiaries between 75-84 years old",
+    "Bene_Age_GT_84_Cnt": "Number of Beneficiaries >84 - Count of beneficiaries over 84 years old",
+    "Bene_Feml_Cnt": "Number of Female Beneficiaries - Count of female beneficiaries",
+    "Bene_Male_Cnt": "Number of Male Beneficiaries - Count of male beneficiaries",
+    "Bene_Race_Wht_Cnt": "Number of White Beneficiaries - Count of non-Hispanic white beneficiaries",
+    "Bene_Race_Black_Cnt": "Number of Black Beneficiaries - Count of non-Hispanic Black/African American beneficiaries",
+    "Bene_Race_Api_Cnt": "Number of Asian/PI Beneficiaries - Count of Asian Pacific Islander beneficiaries",
+    "Bene_Race_Hspnc_Cnt": "Number of Hispanic Beneficiaries - Count of Hispanic beneficiaries",
+    "Bene_Race_Natind_Cnt": "Number of Native American/Alaska Native Beneficiaries - Count of American Indian/Alaska Native beneficiaries",
+    "Bene_Race_Othr_Cnt": "Number of Other Race Beneficiaries - Count of beneficiaries with race not elsewhere classified",
+    "Bene_Ndual_Cnt": "Number of Medicare & Medicaid Beneficiaries - Count of dual-eligible beneficiaries",
+    "Bene_Dual_Cnt": "Number of Medicare-Only Beneficiaries - Count of Medicare-only beneficiaries",
+
+    # Beneficiary Health Conditions (Mental/Behavioral Health)
+    "Bene_CC_BH_ADHD_OthCD_V1_Pct": "Percent with ADHD and Other Conduct Disorders",
+    "Bene_CC_BH_Alcohol_Drug_V1_Pct": "Percent with Alcohol and Drug Use Disorders",
+    "Bene_CC_BH_Tobacco_V1_Pct": "Percent with Tobacco Use Disorders",
+    "Bene_CC_BH_Alz_NonAlzdem_V2_Pct": "Percent with Alzheimer's and Non-Alzheimer's Dementia",
+    "Bene_CC_BH_Anxiety_V1_Pct": "Percent with Anxiety Disorders",
+    "Bene_CC_BH_Bipolar_V1_Pct": "Percent with Bipolar Disorder",
+    "Bene_CC_BH_Mood_V2_Pct": "Percent with Depression, Bipolar or Other Mood Disorders",
+    "Bene_CC_BH_Depress_V1_Pct": "Percent with Major Depressive Affective Disorder",
+    "Bene_CC_BH_PD_V1_Pct": "Percent with Personality Disorders",
+    "Bene_CC_BH_PTSD_V1_Pct": "Percent with Post-Traumatic Stress Disorder",
+    "Bene_CC_BH_Schizo_OthPsy_V1_Pct": "Percent with Schizophrenia and Other Psychotic Disorders",
+
+    # Beneficiary Health Conditions (Physical Health)
+    "Bene_CC_PH_Asthma_V2_Pct": "Percent with Asthma",
+    "Bene_CC_PH_Afib_V2_Pct": "Percent with Atrial Fibrillation and Flutter",
+    "Bene_CC_PH_Cancer6_V2_Pct": "Percent with Cancer (combined 6 cancer indicators)",
+    "Bene_CC_PH_CKD_V2_Pct": "Percent with Chronic Kidney Disease",
+    "Bene_CC_PH_COPD_V2_Pct": "Percent with Chronic Obstructive Pulmonary Disease",
+    "Bene_CC_PH_Diabetes_V2_Pct": "Percent with Diabetes",
+    "Bene_CC_PH_HF_NonIHD_V2_Pct": "Percent with Heart Failure and Non-Ischemic Heart Disease",
+    "Bene_CC_PH_Hyperlipidemia_V2_Pct": "Percent with Hyperlipidemia",
+    "Bene_CC_PH_Hypertension_V2_Pct": "Percent with Hypertension",
+    "Bene_CC_PH_IschemicHeart_V2_Pct": "Percent with Ischemic Heart Disease",
+    "Bene_CC_PH_Osteoporosis_V2_Pct": "Percent with Osteoporosis",
+    "Bene_CC_PH_Parkinson_V2_Pct": "Percent with Parkinson's Disease",
+    "Bene_CC_PH_Arthritis_V2_Pct": "Percent with Rheumatoid Arthritis/Osteoarthritis",
+    "Bene_CC_PH_Stroke_TIA_V2_Pct": "Percent with Stroke/Transient Ischemic Attack",
+
+    # Risk Score
+    "Bene_Avg_Risk_Scre": "Average HCC Risk Score of Beneficiaries",
+
+    # Year column (added by our script)
+    "year": "Year of the data"
+}
diff --git a/dme_notebook_example.ipynb b/dme_notebook_example.ipynb
deleted file mode 100644
index 0519ecb..0000000
--- a/dme_notebook_example.ipynb
+++ /dev/null
@@ -1 +0,0 @@
- 
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index f40278b..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-jupyter==1.0.0
-notebook==7.3.2
-pandas==2.2.3
-numpy==1.26.0
-matplotlib==3.9.2
-seaborn==0.13.2
-scikit-learn==1.6.1 
\ No newline at end of file