Source code for milton_maps.process_assessor_db

"""Usage: milton_maps process_assessor_db INPUT LAYER OUTPUT

Arguments:
  INPUT       path to GDB file containing assessor DB in layer LAYER
  LAYER       layer within GDB file containing asessor DB
  OUTPUT      output path to write processed Assessor DB as a pickled dataframe
"""
import json
import sys

import click
import geopandas as gpd
import joblib
from fiona.errors import DriverError

import milton_maps as mm


[docs]def process_assessor_db(input_path, layer, output_path): try: assessor_df = gpd.read_file(input_path, layer=layer) except ValueError: raise ValueError(f"{input_path} does not contain the layer {layer}") except DriverError: raise ValueError(f"{input_path} does not exist") # LOC_ID is globally unique, but is sometimes missing. TOWN_ID + PROP_ID is guaranteed to be unique, # so we replace LOC_ID with the combination of those two fields when missting. assessor_df.index = assessor_df.LOC_ID.fillna( assessor_df.TOWN_ID.astype(str) + assessor_df.PROP_ID ) # Clean up multiple spaces in site address field assessor_df["SITE_ADDR"] = assessor_df["SITE_ADDR"].str.split().str.join(" ") # Append human-readable fields assessor_df["USE_DESCRIPTION"] = mm.transform_use_codes(assessor_df.USE_CODE) assessor_df["IS_RESIDENTIAL"] = ( assessor_df["USE_CODE"].str[:3].isin(mm.RESIDENTIAL_USE_CODES) ) with open("data/processed/town_ids.json", "r") as f: town_ids_map = json.load(f) assessor_df["TOWN"] = assessor_df["TOWN_ID"].astype(str).map(town_ids_map) # Check all IDs were mapped assert not assessor_df["TOWN"].isnull().values.any() joblib.dump(assessor_df, output_path)
@click.command() @click.argument("input_path") @click.argument("layer") @click.argument("output_path") def main(input_path, layer, output_path): """Console script for processing assessor DB""" if input_path[-3:].lower() != "gdb": raise ValueError(f"Input file must be a GDB file, got {input_path}") if output_path[-3:].lower() != "pkl": output_path += ".pkl" process_assessor_db(input_path, layer, output_path) if __name__ == "__main__": argv = sys.argv sys.exit(main(argv)) # pragma: no cover