Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mp branchone #8

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

##
.DS_Store

Expand Down Expand Up @@ -47,4 +48,8 @@ vignettes/*.pdf
energy/*.DS_Store

# diagnostic figures (meas)
energy/figures-and-results/compare-diagnostic-scenarios.R
energy/figures-and-results/compare-diagnostic-scenarios.R

# adding data folder
data/

35 changes: 19 additions & 16 deletions energy/data-processing-prep/create_ccs_scens.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
## Tracey Mangin
## October 22, 2021
## add infinity price to ccs
# revised: feb 16 2024 by Haejin
# Updated: 2/18/24 by Maxwell

library(tidyverse)
library(data.table)

# paths -----
scen_path = '/Volumes/GoogleDrive/Shared drives/emlab/projects/current-projects/calepa-cn/project-materials/scenario-inputs'
scen_path = '/capstone/freshcair/meds-freshcair-capstone/data/inputs/scenarios'
file_path = '/capstone/freshcair/meds-freshcair-capstone/data/processed' # added file path, b/c read and store at the same place

## files
ccs_ext_file = 'ccs_extraction_scenarios.csv'
Expand All @@ -25,24 +28,24 @@ setorder(ccs_infin, "year", "ccs_scenario", "ccs_price", "units")
## bind
ccs_ext_revised <- rbind(ccs_scens_ext, ccs_infin)

fwrite(ccs_ext_revised, file.path(scen_path, "ccs_extraction_scenarios_revised.csv"))
fwrite(ccs_ext_revised, file.path(file_path , "ccs_extraction_scenarios_revised.csv")) # revised file path


## refining - load ccs scenarios
ccs_scens_ref = fread(file.path(scen_path, ccs_ref_file), header = T)


## ccs infinity
ccs_infin_r <- unique(ccs_scens_ref[, .(year, units)])
ccs_infin_r[, ccs_scenario := "no ccs"]
ccs_infin_r[, ccs_price := Inf]

setorder(ccs_infin_r, "year", "ccs_scenario", "ccs_price", "units")

## bind
ccs_ref_revised <- rbind(ccs_scens_ref, ccs_infin_r)

fwrite(ccs_ref_revised, file.path(scen_path, "ccs_refining_scenarios_revised.csv"))
# ccs_scens_ref = fread(file.path(scen_path, ccs_ref_file), header = T)
#
#
# ## ccs infinity
# ccs_infin_r <- unique(ccs_scens_ref[, .(year, units)])
# ccs_infin_r[, ccs_scenario := "no ccs"]
# ccs_infin_r[, ccs_price := Inf]
#
# setorder(ccs_infin_r, "year", "ccs_scenario", "ccs_price", "units")
#
# ## bind
# ccs_ref_revised <- rbind(ccs_scens_ref, ccs_infin_r)
#
# fwrite(ccs_ref_revised, file.path(file_path, "ccs_refining_scenarios_revised.csv")) # revised the file path



120 changes: 71 additions & 49 deletions energy/data-processing-prep/extraction/clean_doc_prod.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,25 @@
## April 21, 2020
## Data cleaning -- oil production and injection data
## Data from DOC
# updated: 02/09/2024 by Maxwell

# add update: Feb 14 2024 by Haejin

## libraries
library(tidyverse)
library(readr)
library(lubridate)
library(rebus)
library(readtext)
#library(readtext) # update -haejin
library(readxl)
library(here)
library(dplyr) # update -haejin

## set directory
data_directory <- "/Volumes/GoogleDrive/Shared\ drives/emlab/projects/current-projects/calepa-cn/data/stocks-flows/"
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
setwd('/capstone/freshcair/meds-freshcair-capstone') # Sets directory based on Taylor structure
getwd()


## read in data
# -------------------------------------
Expand All @@ -25,33 +33,37 @@ data_directory <- "/Volumes/GoogleDrive/Shared\ drives/emlab/projects/current-pr
## WellTypeCode -- The code for the Completion type.
## OilorCondensateProduced

## all wells
all_wells <- read_xlsx(paste0(data_directory, "raw/All_wells_20200417.xlsx"))
# UPDATED - MP
all_wells <- read_xlsx("data/inputs/extraction/All_wells_20200417.xlsx")

# UPDATE - not all of the data is in the folders, must not have been loaded somehow
# But the paths will be correct when the data is inputted properly
## well production
prod_7785 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_1977_1985/CaliforniaOilAndGasWellMonthlyProduction.csv"))
prod_8689 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_1986_1989/CaliforniaOilAndGasWellMonthlyProduction.csv"))
prod_9094 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_1990_1994/CaliforniaOilAndGasWellMonthlyProduction.csv"))
prod_9599 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_1995_1999/CaliforniaOilAndGasWellMonthlyProduction.csv"))
prod_0004 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2000_2004/CaliforniaOilAndGasWellMonthlyProduction.csv"))
prod_0509 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2005_2009/CaliforniaOilAndGasWellMonthlyProduction.csv"))
prod_1514 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2010_2014/CaliforniaOilAndGasWellMonthlyProduction.csv"))
prod_15 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2015/CaliforniaOilAndGasWellMonthlyProduction.csv"))
prod_16 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2016/CaliforniaOilAndGasWellMonthlyProduction.csv"))
prod_17 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2017/CaliforniaOilAndGasWellMonthlyProduction.csv"))
prod_18 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2018/CaliforniaOilAndGasWellMonthlyProduction.csv"))
prod_19 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2019/CaliforniaOilAndGasWellMonthlyProduction.csv"))
prod_7785 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_1977_1985/CaliforniaOilAndGasWellMonthlyProduction.csv")
prod_8689 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_1986_1989/CaliforniaOilAndGasWellMonthlyProduction.csv")
prod_9094 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_1990_1994/CaliforniaOilAndGasWellMonthlyProduction.csv")
prod_9599 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_1995_1999/CaliforniaOilAndGasWellMonthlyProduction.csv")
prod_0004 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2000_2004/CaliforniaOilAndGasWellMonthlyProduction.csv")
prod_0509 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2005_2009/CaliforniaOilAndGasWellMonthlyProduction.csv")
prod_1514 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2010_2014/CaliforniaOilAndGasWellMonthlyProduction.csv")
prod_15 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2015/CaliforniaOilAndGasWellMonthlyProduction.csv")
prod_16 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2016/CaliforniaOilAndGasWellMonthlyProduction.csv")
prod_17 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2017/CaliforniaOilAndGasWellMonthlyProduction.csv")
prod_18 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2018/CaliforniaOilAndGasWellMonthlyProduction.csv")
prod_19 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2019/CaliforniaOilAndGasWellMonthlyProduction.csv")


## bind rows
monthly_prod <- rbind(prod_7785, prod_8689, prod_9094, prod_9599, prod_0004, prod_0509, prod_1514, prod_15, prod_16, prod_17,
prod_18, prod_19)

## county codes
ccodes <- read_csv(paste0(data_directory, "raw/prod/county_codes.csv")) %>%
## county codes - UPDATED - MP
ccodes <- read_csv("data/inputs/extraction/county_codes.csv") %>%
rename(county_name = county,
county = number) %>%
select(county_name, county)
as.data.frame() %>% # add this
dplyr::select(county_name, county) # add dplyr::


## well type code
welltype_df <- tibble(WellTypeCode = c("AI", "DG", "GD", "GS",
Expand All @@ -72,22 +84,27 @@ all_prod <- monthly_prod %>%
left_join(welltype_df) %>%
mutate(well_type_name = ifelse(is.na(well_type_name), WellTypeCode, well_type_name))

saveRDS(all_prod, file = paste0(data_directory, "processed/well_prod_m.rds"))



# UPDATED - MP
saveRDS(all_prod, file = "data/processed/well_prod_m.rds")

## injection data
# UPDATE - should work once data is inputted into all the CSV folders -Done!(haejin)
## ------------------------------
inj_7785 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_1977_1985/CaliforniaOilAndGasWellMonthlyInjection.csv"))
inj_8689 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_1986_1989/CaliforniaOilAndGasWellMonthlyInjection.csv"))
inj_9094 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_1990_1994/CaliforniaOilAndGasWellMonthlyInjection.csv"))
inj_9599 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_1995_1999/CaliforniaOilAndGasWellMonthlyInjection.csv"))
inj_0004 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2000_2004/CaliforniaOilAndGasWellMonthlyInjection.csv"))
inj_0509 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2005_2009/CaliforniaOilAndGasWellMonthlyInjection.csv"))
inj_1514 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2010_2014/CaliforniaOilAndGasWellMonthlyInjection.csv"))
inj_15 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2015/CaliforniaOilAndGasWellMonthlyInjection.csv"))
inj_16 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2016/CaliforniaOilAndGasWellMonthlyInjection.csv"))
inj_17 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2017/CaliforniaOilAndGasWellMonthlyInjection.csv"))
inj_18 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2018/CaliforniaOilAndGasWellMonthlyInjection.csv"))
inj_19 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2019/CaliforniaOilAndGasWellMonthlyInjection.csv"))
inj_7785 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_1977_1985/CaliforniaOilAndGasWellMonthlyInjection.csv")
inj_8689 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_1986_1989/CaliforniaOilAndGasWellMonthlyInjection.csv")
inj_9094 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_1990_1994/CaliforniaOilAndGasWellMonthlyInjection.csv")
inj_9599 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_1995_1999/CaliforniaOilAndGasWellMonthlyInjection.csv")
inj_0004 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2000_2004/CaliforniaOilAndGasWellMonthlyInjection.csv")
inj_0509 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2005_2009/CaliforniaOilAndGasWellMonthlyInjection.csv")
inj_1514 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2010_2014/CaliforniaOilAndGasWellMonthlyInjection.csv")
inj_15 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2015/CaliforniaOilAndGasWellMonthlyInjection.csv")
inj_16 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2016/CaliforniaOilAndGasWellMonthlyInjection.csv")
inj_17 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2017/CaliforniaOilAndGasWellMonthlyInjection.csv")
inj_18 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2018/CaliforniaOilAndGasWellMonthlyInjection.csv")
inj_19 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2019/CaliforniaOilAndGasWellMonthlyInjection.csv")

## bind rows
monthly_inj <- rbind(inj_7785, inj_8689, inj_9094, inj_9599, inj_0004, inj_0509, inj_1514, inj_15, inj_16, inj_17,
Expand All @@ -98,26 +115,31 @@ all_inject <- monthly_inj %>%
mutate(county = as.numeric(str_sub(APINumber, 3, 5)),
year = year(InjectionDate),
month = month(InjectionDate)) %>%
left_join(ccodes) %>%
left_join(ccodes) %>%
left_join(welltype_df) %>%
mutate(well_type_name = ifelse(is.na(well_type_name), WellTypeCode, well_type_name))

saveRDS(all_inject, file = paste0(data_directory, "processed/well_inject_m.rds"))
## missing data_directory -- Haejin
data_directory <- "/capstone/freshcair/meds-freshcair-capstone/data/"


saveRDS(all_inject, file = paste0(data_directory, "processed/well_inject_m.rds")) # have a error message : Error: Status code 401 returned by RStudio Server when executing 'console_input'

## well data
# UPDATE - should work once data is inputted into all the CSV folders
## -----------------------------
wells_7785 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_1977_1985/CaliforniaOilAndGasWells.csv"))
wells_8689 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_1986_1989/CaliforniaOilAndGasWells.csv"))
wells_9094 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_1990_1994/CaliforniaOilAndGasWells.csv"))
wells_9599 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_1995_1999/CaliforniaOilAndGasWells.csv"))
wells_0004 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2000_2004/CaliforniaOilAndGasWells.csv"))
wells_0509 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2005_2009/CaliforniaOilAndGasWells.csv"))
wells_1014 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2010_2014/CaliforniaOilAndGasWells.csv"))
wells_15 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2015/CaliforniaOilAndGasWells.csv"))
wells_16 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2016/CaliforniaOilAndGasWells.csv"))
wells_17 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2017/CaliforniaOilAndGasWells.csv"))
wells_18 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2018/CaliforniaOilAndGasWells.csv"))
wells_19 <- read_csv(paste0(data_directory, "raw/hist_well/CSV_2019/CaliforniaOilAndGasWells.csv"))
wells_7785 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_1977_1985/CaliforniaOilAndGasWells.csv")
wells_8689 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_1986_1989/CaliforniaOilAndGasWells.csv")
wells_9094 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_1990_1994/CaliforniaOilAndGasWells.csv")
wells_9599 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_1995_1999/CaliforniaOilAndGasWells.csv")
wells_0004 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2000_2004/CaliforniaOilAndGasWells.csv")
wells_0509 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2005_2009/CaliforniaOilAndGasWells.csv")
wells_1014 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2010_2014/CaliforniaOilAndGasWells.csv")
wells_15 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2015/CaliforniaOilAndGasWells.csv")
wells_16 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2016/CaliforniaOilAndGasWells.csv")
wells_17 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2017/CaliforniaOilAndGasWells.csv")
wells_18 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2018/CaliforniaOilAndGasWells.csv")
wells_19 <- read_csv("data/inputs/extraction/monthly-prod-inj-wells/CSV_2019/CaliforniaOilAndGasWells.csv")


## figure this ish out
Expand Down Expand Up @@ -209,7 +231,7 @@ wells2 <- wells_19 %>%
# test2 <- str_replace_all(test2, pattern = "Sterling, East " %R% OPEN_PAREN %R% "ABD" %R% CLOSE_PAREN, "Sterling East ABD")
# test2 <- str_replace_all(test2, pattern = "Compton Landing, S., Gas " %R% OPEN_PAREN %R% "ABD" %R% CLOSE_PAREN, "Compton Landing S. Gas ABD")

fix_2019 <- readLines(paste0(data_directory, "raw/hist_well/CSV_2019/CaliforniaOilAndGasWells.csv"))
fix_2019 <- readLines(paste0(data_directory, "inputs/extraction/monthly-prod-inj-wells/CSV_2019/CaliforniaOilAndGasWells.csv")) # update by Haejin
fix_20192<- str_replace_all(fix_2019, pattern = "8-9B INT, Sec. 32", "8-9B INT Sec. 32")


Expand All @@ -220,8 +242,8 @@ fix_20192<- str_replace_all(fix_2019, pattern = "8-9B INT, Sec. 32", "8-9B INT S
#040212008000
#040112009400

writeLines(fix_20192, paste0(data_directory, "processed/wells_19.csv"))
writeLines(fix_20192, paste0(data_directory, "processed/wells_19.csv")) # update by Haejin

wells_2019 <- read_csv(paste0(data_directory, "processed/parseprobs/wells_19.csv"))
wells_2019 <- read_csv(paste0(data_directory, "processed/wells_19.csv"))


13 changes: 7 additions & 6 deletions energy/data-processing-prep/extraction/income_data.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
## Tracey Mangin
## October 19, 2021
## Census data
## revised 02/13/2024 - Haejin

library(censusapi)
library(tidycensus)
library(tidyverse)
library(data.table)

main_path <- '/Volumes/GoogleDrive/Shared drives/emlab/projects/current-projects/calepa-cn/'
main_path <- '/capstone/freshcair/meds-freshcair-capstone/'

mycensuskey <- "ae05491f7dfe185b0af5b9d56f1287b4c2c78eca"

# apis <- listCensusApis()
#apis <- listCensusApis()
# View(apis)
#
# availablevars <- listCensusMetadata(name="cps/asec/mar", vintage = 2021)
availablevars <- listCensusMetadata(name="cps/asec/mar", vintage = 2021)
# View(availablevars)
#
#
Expand Down Expand Up @@ -44,7 +45,7 @@ income <- get_acs(state = "CA", geography = "tract",
income <- income %>%
mutate(source = "2015-2019 5-year ACS, 2019 dollars")

fwrite(income, paste0(main_path, "data/Census/ca-median-house-income.csv"))
fwrite(income, paste0(main_path, "data/inputs/gis/census-tract/ca-median-house-income.csv"))

## repeat for county

Expand All @@ -54,7 +55,7 @@ county_income <- get_acs(state = "CA", geography = "county",
county_income <- county_income %>%
mutate(county = str_remove(NAME, " County, California"),
source = "2015-2019 5-year ACS, 2019 dollars") %>%
select(county, variable, estimate, moe, source)
dplyr::select(county, variable, estimate, moe, source) # add dplyr - haejin

fwrite(county_income, paste0(main_path, "data/Census/ca-median-house-income-county.csv"))
fwrite(county_income, paste0(main_path, "data/inputs/gis/census-tract/ca-median-house-income-county.csv"))

6 changes: 3 additions & 3 deletions energy/data-processing-prep/extraction/opgee-carb-results.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

# ------------------------------------------- INPUTS -----------------------------------

data_dir = '/Volumes/GoogleDrive/Shared drives/emlab/projects/current-projects/calepa-cn/data/OPGEE/'
opgee_fil = 'OPGEE_v2.0_with-CARB-inputs.xlsm'
names_fil = 'opgee_field_names.csv'
data_dir = '/capstone/freshcair/meds-freshcair-capstone/data/inputs/'
opgee_fil = 'OPGEE_v2.0_with-CARB-inputs.xlsm' ## no file here - haejin
names_fil = 'opgee_field_names.csv' ## no file here - haejin

# ------------------------------------------- MAIN -----------------------------------

Expand Down
3 changes: 2 additions & 1 deletion energy/data-processing-prep/extraction/process-monthly-inj.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
## Tracey Mangin
## April 29, 2021
## process well injection, save for later use through out
# revised: feb 14, 2024 by Haejin

# ------------------------------------------- INPUTS -----------------------------------

data_dir <- "/Volumes/GoogleDrive/Shared\ drives/emlab/projects/current-projects/calepa-cn/data/stocks-flows/processed/"
data_dir <- "/capstone/freshcair/meds-freshcair-capstone/data/processed/"
minj_fil <- "well_inject_m.rds"
wells_19_fil <- "wells_19.csv"

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
## Tracey Mangin
## March 10, 2021
## process well production, save for later use through out
## revised: feb 14, 2024 -haejin

# ------------------------------------------- INPUTS -----------------------------------

data_dir <- "/Volumes/GoogleDrive/Shared\ drives/emlab/projects/current-projects/calepa-cn/data/stocks-flows/processed/"
data_dir <- "/capstone/freshcair/meds-freshcair-capstone/data/processed/"
mprod_fil <- "well_prod_m.rds"
wells_19_fil <- "wells_19.csv"

Expand Down
Loading