################################################################################
## SCRIPT INITIALIZATION / LOADING LIBRARIES ###################################
################################################################################
## Load required libraries
library(dplyr)  # General tools for working with data frames.
library(sf)     # Simple Figures, a library for working with vector objects.
library(tigris) # Tools for loading Census geographies.
options(tigris_use_cache = TRUE)

## Create string object for working path.
path_str <- ""
################################################################################


################################################################################
## LOAD CLIMATE DATA ###########################################################
################################################################################
setwd(paste0(path_str,"/Data"))

# Load the dataset for the SPP 3-7.0 (middle path) climate projections at the
# county level.  Data downloaded on 20 May 2025 from:
# https://resilience.climate.gov/maps/bf2e80557cc3414da658248264979ff8/about
climate_df <- 
  read.csv("LOCA2_Ensemble_SSP245_Hot_Days_1950_2100_6510504288252866765.csv",
           colClasses = c("character","character","character","character",
                          "character","character","character","character",
                          "numeric","numeric","numeric","numeric",
                          "numeric","numeric","numeric","numeric"))

# For this project, we're going to look at estimated days with a maximum
# temperature of 90F or higher in the 2020's and again estimated for the
# 2050's.  Thus, we can break the dataset into data frames estimating for
# each of those two dates:
climate_20_df <- 
  climate_df[climate_df$Begin.Date == "1/1/2020 12:00:00 AM",]
climate_50_df <-
  climate_df[climate_df$Begin.Date == "1/1/2050 12:00:00 AM",]

# We can then simplify the dataframes to only have columns we need, and to
# have better variable names:
climate_20_df <-
  transmute(climate_20_df,
            CNTY_FIPS = County.Geographic.Identifier,
            HOTDAYS_20 = Annual.number.of.days.with.a.maximum.temperature.greater.than.or.equal.to.90.F)
climate_50_df <-
  transmute(climate_50_df,
            CNTY_FIPS = County.Geographic.Identifier,
            HOTDAYS_50 = Annual.number.of.days.with.a.maximum.temperature.greater.than.or.equal.to.90.F)

# And we can now merge the two into a single dataframe again.
climate_df <- left_join(climate_20_df,
                        climate_50_df,
                        by = c("CNTY_FIPS" = "CNTY_FIPS"))

# Now we can drop the decade-specific data frames.
rm(climate_20_df,climate_50_df)

# Note to self: on visual insepction of CNTY_FIPS field, the dataset is using
# the new Connecticut county-equivalents (councils of governments).  This can
# be confirmed because nine county-equivalents are present, all with FIPS codes
# starting "091xx" rather than "090xx".

# On the other hand, no data is present for Hawaii and Alaska.  According to
# the data source, data for Alaska and Hawaii will be available in the future.
################################################################################


################################################################################
## LOAD AIR CONDITIONING DATA ##################################################
################################################################################
setwd(paste0(path_str,"/Data/Local Air Conditioning Estimates 2023"))

# Load the dataset for the Census Bureau's experimental 2023 Local Air
# Conditioning Estimates.  Data downloaded on 20 May 2025 from:
# https://www.census.gov/data/experimental-data-products/lace.html
#
# Users should keep in mind that this data is generated by modeling on a number
# of data sources, including current wet-bulb temperatures.  Unfortunately,
# survey data still doesn't exist at the county level for this.
# 
# Detailed write-ups on this data can be found at:
# https://www2.census.gov/programs-surveys/demo/technical-documentation/lace/2023-LACE-Quick-Guide.pdf
# and
# https://www2.census.gov/library/working-papers/2025/demo/sehsd-wp2025-05.pdf
ac_df <- 
  read.csv("LACE_23_County.csv",
           colClasses = c("character","character","character","character",
                          "numeric","numeric","numeric","numeric","numeric",
                          "numeric","numeric","numeric","numeric","numeric"))

# We can reorganize teh data to have a format more useful to us, and limit
# fields to county FIPS code, county name, and the estimated fraction and 90%
# margin of error for the share of households with air conditioning.
ac_df <- transmute(ac_df,
                   CNTY_FIPS  = paste0(STATE,COUNTY),
                   AC_FRAC    = AC_PE,
                   AC_FRAC_ER = AC_PM)
################################################################################


################################################################################
## DOWNLOAD CENSUS GEOGRAPHIES FOR COUNTIES ####################################
################################################################################
# To map this data, we'll need an sf object of Census geographies.  We will use
# Census Bureau "cartographic boundary" geographies, which are lower resolution,
# but sufficient for our purposes and with the added benefit of having water
# areas removed.

# Download the 2024 cartographic boundary geography for counties.
counties_sf <- counties(cb = TRUE, year = 2024)

# Reorganize the sf object with the only information we actually need, county
# FIPS codes and names.
counties_sf <- transmute(counties_sf,
                         CNTY_FIPS = GEOID,
                         CNTY_NAME = paste0(NAMELSAD,", ",STUSPS))

# Order counties by FIPS code and replace row numbers with consecutive integers.
counties_sf <- 
  counties_sf[order(paste0(counties_sf$CSA_FIPS,
                           counties_sf$CBSA_FIPS,
                           counties_sf$CNTY_FIPS)),]
row.names(counties_sf) <- 1:length(counties_sf$CNTY_FIPS)

# Update old-style CRS.
st_crs(counties_sf) <- st_crs(counties_sf)

# Drop counties in the US territories, which we don't have data for.
counties_sf <- counties_sf[!(substr(counties_sf$CNTY_FIPS,1,2) %in%
                               c("60","66","69","72","78")),]

# It bothers me more than is probably justifiable that the county names for
# independent cities fail to capitalize "city", so I am fixing this.
for(i in 1:length(counties_sf$CNTY_FIPS)){
  if(grepl(" city,",counties_sf$CNTY_NAME[i])){
    counties_sf$CNTY_NAME[i] <-
      paste0(substr(counties_sf$CNTY_NAME[i],
                    1,
                    nchar(counties_sf$CNTY_NAME[i])-9),
             " City,",
             substr(counties_sf$CNTY_NAME[i],
                    nchar(counties_sf$CNTY_NAME[i])-2,
                    nchar(counties_sf$CNTY_NAME[i])))
  }
}
################################################################################


################################################################################
## COMBINE DATA INTO SINGLE SHAPEFILE AND EXPORT ###############################
################################################################################
# We can join all the data into a single object and export it as a shapefile.
counties_sf <- left_join(counties_sf,
                         ac_df,
                         by = c("CNTY_FIPS" = "CNTY_FIPS"))
counties_sf <- left_join(counties_sf,
                         climate_df,
                         by = c("CNTY_FIPS" = "CNTY_FIPS"))
rm(ac_df,climate_df)

setwd(path_str)
st_write(counties_sf,"County Climate and AC Data.shp")
################################################################################