morpc · aporr · Jan 23, 2026 · Jan 23, 2026 · Jan 23, 2026 · Jan 26, 2026
diff --git a/morpc/census/geos.py b/morpc/census/geos.py
@@ -57,9 +57,9 @@
         "for": f"county:{','.join([morpc.CONST_COUNTY_NAME_TO_ID[x][2:6] for x in morpc.CONST_REGIONS['CEDS Region']])}"
         }
         },
-    {"regionmsa": {
+    {"regioncbsa": {
         "in": "state:39", 
-        "for": f"county:{','.join([morpc.CONST_COUNTY_NAME_TO_ID[x][2:6] for x in morpc.CONST_REGIONS['REGIONMSA']])}"
+        "for": f"county:{','.join([morpc.CONST_COUNTY_NAME_TO_ID[x][2:6] for x in morpc.CONST_REGIONS['CBSA']])}"
         }
         }
 ]
@@ -68,8 +68,8 @@
     "us": {
         'for': 'us:1'
         },
-    "columbusmsa": {
-        "for": f"metropolitan statistical area/micropolitan statistical area:{morpc.CONST_COLUMBUS_MSA_ID}"
+    "columbuscbsa": {
+        "for": f"metropolitan statistical area/micropolitan statistical area:{morpc.CONST_COLUMBUS_CBSA_ID}"
     }
 }
 

diff --git a/morpc/logs.py b/morpc/logs.py
@@ -19,12 +19,23 @@
     "critical": 50
 }
 
-def config_logs(filename, level, mode = 'w'):
+def config_logs(filename=None, level='info', mode = 'w'):
     """
     Set up logs within a notebook to store log outputs in filename, and display in output.
     """
     import logging
     import sys
+    import os
+    import ipykernel
+    import json
+
+    fileSpecified = True
+    if(filename is None):
+        fileSpecified = False
+        connectionInfo = json.loads(ipykernel.get_connection_info())
+        scriptFilename = os.path.basename(os.path.normpath(connectionInfo["jupyter_session"]))
+        filename = f"{scriptFilename}.log"
+
     logging.basicConfig(
         level=LEVEL_MAP[level],
         force=True,
@@ -36,4 +47,7 @@ def config_logs(filename, level, mode = 'w'):
                         )
     logging.getLogger(__name__).setLevel(LEVEL_MAP[level])
 
-    logger.info(f'Set up logging save to file {filename}')
+    if(fileSpecified == False):
+        logger.info(f'Log filename not specified.  Using default filename.')
+
+    logger.info(f'Set up logging save to file "{filename}", log level "{level}"')
diff --git a/morpc/morpc.py b/morpc/morpc.py
@@ -24,7 +24,7 @@
 
 # Commonly used geographic identifiers
 # The following are assigned by the U.S. Census Bureau
-CONST_COLUMBUS_MSA_ID = '18140'
+CONST_COLUMBUS_CBSA_ID = '18140'
 CONST_OHIO_STATE_ID = '39'
 CONST_OHIO_REGION_ID = '2'      # Midwest
 CONST_OHIO_DIVISION_ID = '3'    # East North Central
@@ -124,7 +124,7 @@ def get_state_ids():
 CONST_REGIONS["OneColumbus Region"] = CONST_REGIONS["REGIONONECBUS"]
 CONST_REGIONS["REGIONCEDS"] = CONST_REGIONS["REGION10"] + ["Logan"]
 CONST_REGIONS["CEDS Region"] = CONST_REGIONS["REGIONCEDS"]
-CONST_REGIONS["REGIONMSA"] = CONST_REGIONS["REGION7"] + ["Hocking","Morrow","Perry"]
+CONST_REGIONS["CBSA"] = CONST_REGIONS["REGION7"] + ["Hocking","Morrow","Perry"]
 
 # Region identifiers
 # Note that the Columbus MSA already has a GEOID that is defined by the Census Bureau.  See CONST_COLUMBUS_MSA_ID above.
@@ -138,11 +138,11 @@ def get_state_ids():
 CONST_REGIONS_GEOID["REGIONONECBUS"] = "001"
 CONST_REGIONS_GEOID["REGIONMPO"] = "001"
 CONST_REGIONS_GEOID["REGIONTDM"] = "001"
-CONST_REGIONS_GEOID["REGIONMSA"] = CONST_COLUMBUS_MSA_ID
+CONST_REGIONS_GEOID["CBSA"] = CONST_COLUMBUS_CBSA_ID
 
 # The following regions are comprised of collections of whole counties. Not all region definitions are county-based,
 # for example the MPO region.
-CONST_REGIONS_COUNTYBASED = ["REGION15","REGION10","REGION7","REGIONCEDS","REGIONCORPO","REGIONONECBUS","REGIONMSA"]
+CONST_REGIONS_COUNTYBASED = ["REGION15","REGION10","REGION7","REGIONCEDS","REGIONCORPO","REGIONONECBUS","CBSA"]
 
 # County name abbreviations
 ## CONST_COUNTY_ABBREV maps the full county name to its three-letter abbreviation
@@ -2807,7 +2807,7 @@ def write_table(df, path, format=None, index=None):
         print("morpc.write_table | ERROR | This function does not currently support format {}.  Add export arguments for this format in morpc.PANDAS_EXPORT_ARGS_OVERRIDE or use the native pandas export functions.".format(format))
         raise RuntimeError
 
-def reapportion_by_area(targetGeos, sourceGeos, apportionColumns=None, summaryType="sum", roundPreserveSum=None, partialCoverageStrategy="error", zeroCoverageStrategy="error", sourceShareTolerance=6, targetShareTolerance=6):
+def reapportion_by_area(targetGeos, sourceGeos, apportionColumns=None, summaryType="sum", roundPreserveSum=None, partialCoverageStrategy="error", zeroCoverageStrategy="error", sourceShareTolerance=6, targetShareTolerance=6, returnIntersectData=False):
     """
     Given you have some variable(s) summarized at one geography level, reapportion those variables to other geographies in proportion 
     # to the area of overlap of the target geographies with the source geographies.  This is accomplished by intersecting the target 
@@ -2855,11 +2855,17 @@ def reapportion_by_area(targetGeos, sourceGeos, apportionColumns=None, summaryTy
         geographies do not sum to 1.  Round the sums to the specified decimal place prior to evaluation.  Sum greater than 1 may indicate 
         that there are overlapping polygons in the target geos or source geos. Sum less than 1 may indicate that portions of the target geos
         do not overlap the source geos. Set to None to allow no tolerance (warning will be generated if shares do not sum to exactly 1).
+    returnIntersectData : bool
+        Optional. If False, return only one output consisting of the reapportioned data (default). If true, return a second output (GeoDataFrame)
+        consisting of the intersection geometries and their attributes.
 
     Returns
     -------
     targetGeosUpdated :  geopandas.geodataframe.GeoDataFrame with polygon geometry type
         An updated version of targetGeos that includes the reapportioned variables.
+    intersectGeosUpdate : geopandas.geodataframe.GeoDataFrame with polygon geometry type
+        If requested. The results of the intersection of the source geos and target geos, including
+        areas and shares for both, plus geometry and area for the intersection polygon.
     """
 
     import pandas as pd
@@ -2922,6 +2928,11 @@ def reapportion_by_area(targetGeos, sourceGeos, apportionColumns=None, summaryTy
     # Compute the share of the target geo that each intersection polygon represents
     intersectGeos["TARGET_SHARE"] = intersectGeos["INTERSECT_GEOS_AREA"] / intersectGeos["TARGET_GEOS_AREA"]
 
+    # Create a copy of the intersection data that we can output if the user requested it. Put the columns in a sensible order.
+    intersectGeosOutput = intersectGeos.copy()
+    intersectGeosOutput = intersectGeosOutput.filter(items=["sourceIndex","targetIndex","SOURCE_GEOS_AREA","TARGET_GEOS_AREA","INTERSECT_GEOS_AREA",
+                                                            "SOURCE_SHARE","TARGET_SHARE","geometry"], axis="columns")
+
     # Make a list of the source geo IDs that appeared in the original source data but do not appear in the intersection data.  
     # These are source geos that had zero overlap with the target geos. If there are entries in the list, throw an error if appropriate.
     if(summaryType == "sum"):
@@ -3043,8 +3054,11 @@ def reapportion_by_area(targetGeos, sourceGeos, apportionColumns=None, summaryTy
     # Reorder the target geos columns as they were originally and append the reapportioned variables
     # to the end.
     targetGeosUpdated = targetGeosUpdated.filter(items=list(targetGeos.columns)+apportionColumns, axis="columns")
-
-    return targetGeosUpdated
+
+    if(returnIntersectData == True):
+        return (targetGeosUpdated, intersectGeosOutput)
+    else:
+        return targetGeosUpdated
 
 def hist_scaled(series, logy="auto", yRatioThreshold=100, xClassify=False, xRatioThreshold=100, scheme="NaturalBreaks", bins=10, retBinsCounts=False, figsize=None):
     """
@@ -3340,4 +3354,136 @@ def ages_in_year(self, years, generations=None):
                 youngest_age = max(youngest_age, 0)
                 agesInYear[generation][year] = list(range(youngest_age, oldest_age+1))
 
-        return agesInYear
+        return agesInYear
+
+def updateExistingTable(newData, schema, existingData=None, sortColumns=None, overwrite=False):
+    """
+    This function takes a data table in a well-defined form (as captured in a Frictionless schema) and attempts to update
+    an existing table in the same form if it exists or otherwise outputs the new data as-is.  If records with identical primary
+    key values exist in both tables, the existing data will be overwritten by the new data for those records (unless overwrite is
+    set to False).  Records in the new data which are not already present in the existing data will be appended.  The resulting
+    dataframe will be transformed to be compliant with the provided schema and, optionally, will be sorted by a user-specified
+    set of columns.
+
+    Parameters
+    ----------
+    newData : pandas.dataframe.DataFrame
+        A Pandas DataFrame containing the new data to update the existing table if it exists
+    schema : frictionless.schema.schema.Schema
+        A Frictionless schema object that applies to both newData and existingData. Often created using morpc.frictionless.load_schema().
+    existingData : pandas.dataframe.DataFrame
+        If provided, a Pandas DataFrame containing existing data to which the updated data in newData will be applied. It may be the case
+        that no data exists yet (the first time a script is run, for example).  In that case, simply omit existingData or set it explicitly
+        to None and the function will return newData in a form that is compliant with the schema and (optionally) sorted.
+    sortColumns : str or list
+        Optional. A specification of the columns to use to sort the updated table.  If set to None, no sorting will occur.  If set to 
+        "primary_key", the columns identified in schema.primary_key will be used.  Otherwise, provide a list of strings representing the 
+        names of columns to be used.
+    overwrite : bool
+        Optional. If True, records that already exist in existingData will be overwritten by equivalent records in newData (as identified 
+        by identical primary key values in both dataframes).  If False, an error will be raised if this case occurs.
+
+    Returns
+    -------
+    outputData : pandas.dataframe.DataFrame
+        A dataframe that consists of the merged contents of existingData (if provided) and newData.
+    """
+
+    import morpc
+    import pandas as pd
+    import logging
+
+    logger  = logging.getLogger(__name__)    
+
+    myNewData = newData.copy()
+    # Verify that the primary key exists in the new data and that the index is in a known state
+    if(set(schema.primary_key).issubset(set(myNewData.columns))):
+        pass
+    elif(myNewData.index.names == schema.primary_key):
+        myNewData = myNewData.reset_index()
+    else:
+        logger.error("New data does not seem to contain the primary key, either as columns or as an index.")
+        raise RuntimeError
+
+    logger.info("Extracting required fields in new data and reordering them as specified in the schema.")
+    myNewData = myNewData.filter(items=schema.field_names, axis="columns")
+
+    logger.info("Casting new data to data types specified in schema.")
+    myNewData = morpc.frictionless.cast_field_types(myNewData, schema)
+
+    if(existingData is None):
+        myExistingData = None
+    else:
+        myExistingData = existingData.copy()   
+        # Verify that the primary key exists in the existing data and that the index is in a known state
+        if(set(schema.primary_key).issubset(set(myExistingData.columns))):
+            pass
+        elif(myExistingData.index.names == schema.primary_key):
+            myExistingData = myExistingData.reset_index()
+        else:
+            logger.error("Existing data does not seem to contain the primary key, either as columns or as an index.")
+            raise RuntimeError
+
+        logger.info("Confirming existing data includes only the required fields and that they are in the order specified in the schema.")
+        myNewData = myNewData.filter(items=schema.field_names, axis="columns")
+
+        logger.info("Confirming existing data is cast as data types specified in schema.")
+        myExistingData = morpc.frictionless.cast_field_types(myExistingData, schema)
+
+
+    if(myExistingData is None):
+        logger.info("No existing data was found. Creating output data from scratch.")
+        outputData = myNewData.copy()
+    else:
+        logger.info("Existing output data was found. Merging new data with existing data.")
+
+        outputData = myExistingData.copy()
+
+        logger.info("Setting new data index to primary key specified in schema.")
+        myNewData = myNewData.set_index(schema.primary_key)
+
+        logger.info("Setting existing data index to primary key specified in schema.")
+        outputData = outputData.set_index(schema.primary_key)   
+
+        logger.info("Analyzing differences between new and existing data.")
+        recordsToUpdate = myNewData.index.intersection(outputData.index)
+        logger.info(f"--> Found {len(recordsToUpdate)} records which are present in existing data and will be updated.")   
+        recordsToAppend = myNewData.index.difference(outputData.index)
+        logger.info(f"--> Found {len(recordsToAppend)} records which are not present in existing data and will be appended.")   
+
+        # Update entries that were already present in output table
+        if(not myNewData.loc[recordsToUpdate].empty):
+            try:
+                outputData.update(myNewData.loc[recordsToUpdate], overwrite=True, errors=("ignore" if overwrite == True else "raise"))
+            except Exception as e:
+                logger.error(f"Failed to update existing data with new data: {e}")
+                logger.info("To force update when data overlaps, set overwrite parameter to True.") 
+                raise
+
+        # Append new entries that were not present in output table
+        if(not myNewData.loc[recordsToAppend].empty):
+            outputData = pd.concat([outputData, myNewData.loc[recordsToAppend]], axis="index")
+
+        logger.info("Resetting indices of all datasets.") 
+        myNewData = myNewData.reset_index()
+        outputData = outputData.reset_index()
+
+    logger.info("Casting merged data to data types specified in schema.")
+    outputData = morpc.frictionless.cast_field_types(outputData, schema)
+
+    if(sortColumns == "primary_key"):
+        mySortColumns = schema.primary_key
+        logger.info(f"Sorting merged data by columns specified in primary key: {mySortColumns}")
+    elif(sortColumns is None):
+        logger.info(f"Column sort order is unspecified. Not sorting columns.")
+    elif(type(sortColumns) == "list"):
+        mySortColumns = sortColumns
+        logger.info(f'Sorting merged data by user-specified columns: {",".join(mySortColumns)}')
+    else:
+        logger.error('User-specified value for sortColumns parameter is not supported.')
+        raise RuntimeError
+
+    if(sortColumns is not None):
+        outputData = outputData.sort_values(mySortColumns) 
+
+    return outputData