Skip to content

Commit ca50388

Browse files
Merge pull request #49 from abigailsnyder/acs-fldgen-subsetting-output-fcn
acs-intermediate fix to saved fldgen memory bloat
2 parents b85eff4 + d28b269 commit ca50388

10 files changed

+269
-123
lines changed

NAMESPACE

+1-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ export(concatGrids)
77
export(concatGrids.general)
88
export(coord_array)
99
export(drop_NAs)
10+
export(emulator_reducer)
1011
export(eof_analyze)
1112
export(extract_box)
1213
export(file.pairer)
@@ -16,7 +17,6 @@ export(fldgen_object_TP)
1617
export(fldts2df)
1718
export(generate.TP.fullgrids)
1819
export(generate.TP.resids)
19-
export(loadmodel)
2020
export(mkcorrts)
2121
export(normalize.resids)
2222
export(phase_eqn_coef)
@@ -32,7 +32,6 @@ export(read.temperatures)
3232
export(read_globalAvg)
3333
export(readtgav)
3434
export(reconst_fields)
35-
export(savemodel)
3635
export(splitGrids)
3736
export(splitGrids.general)
3837
export(split_eof)

R/generateTPresids.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
#' @export
2424
generate.TP.resids <- function(emulator, ngen, method = 1){
2525

26-
Ngrid <- ncol(emulator$meanfldT$r)
26+
Ngrid <- nrow(emulator$meanfldT$w)
2727

2828
newgrids <- lapply(1:ngen,
2929
function(x) {

R/readdata.R

-23
Original file line numberDiff line numberDiff line change
@@ -619,29 +619,6 @@ read.precipitations <- function(filename, len=NULL, tag=basename(filename), varn
619619
}
620620

621621

622-
#' @rdname saving_and_restoring
623-
#' @export
624-
loadmodel <- function(file, oldfmt=FALSE)
625-
{
626-
if(oldfmt) {
627-
load(file)
628-
if(!exists('modeldata', inherits=FALSE)) {
629-
modeldata <- NULL # silence check notes.
630-
stop('No model data in file.')
631-
}
632-
}
633-
else {
634-
modeldata <- readRDS(file)
635-
}
636-
637-
if(!inherits(modeldata, 'fldgen')) {
638-
stop('Object loaded from file is not of type "fldgen".')
639-
}
640-
641-
modeldata
642-
}
643-
644-
645622
#' Read and format global mean temperature
646623
#'
647624
#' Read global mean temperature from an input netCDF file and format for use

R/writedata.R

+92-31
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,97 @@
11
#### Output functions
22

3+
#' Subset a trained emulator.
4+
#'
5+
#' A trained fldgen emulator features a large amount of data for both
6+
#' using the emulator and rigorously validating an emulator.
7+
#'
8+
#' If one is just interested in the use of an emulator for generating
9+
#' felds, this function can be called to reduce a trained emulator to
10+
#' the bare essential list entries, which can then be saved and called
11+
#' the same as an unreduced emulator by generate.TP.resids and
12+
#' generate.TP.fullgrids
13+
#'
14+
#' Note that with this reduced emulator, there is NO way to reconstruct
15+
#' the training data. A fully trained emulator contains a copy of the
16+
#' training data, in addition to the training regressor values (tgav),
17+
#' and the estimated linear model parameters and residuals
18+
#' (meanfieldT$b, w, r), which together can also reconstruct the data.
19+
#'
20+
#' Even though the coordinate information stored in an emulator$griddataT
21+
#' is not needed directly to generate a new field of residuals or full data,
22+
#' it is often needed in downstream use of the fields. Therfore an entry
23+
#' reducedEmulator$griddataT$coord containg a matrix is saved in the
24+
#' reducedEmulator. Each is a matrix of coordinates for each grid cell, with
25+
#' cells in rows and latitude, longitude in the two columns. Keeping these
26+
#' coordinate matrices for T and P is negligible.
27+
#'
28+
#' Finally, the reduced emulator produced by this function is specifically
29+
#' meant for temperature and precipitation only, and is not robust to
30+
#' extension to other variables.
31+
#'
32+
#' Finally finally, if a user is interested in a different subset of
33+
#' list entries in a trained emulator, they are encouraged to subset and
34+
#' save themself, as appropriate for their project.
35+
#'
36+
#' @param emulator A trained fldgen emulator, with all entries needed
37+
#' for generating new residuals and for rigourously validating the
38+
#' quality of the trained emulator
39+
#'
40+
#' @return reducedEmulator A trained fldgen emulator with only the list
41+
#' entries needed by generate.TP.resids and generate.TP.fullgrids for
42+
#' generating new fields:
43+
#' \describe{
44+
#' \item{griddataT}{Only the coordinate ids and set information.}
45+
#' \item{griddataP}{Only the coordinate ids and set information, and the
46+
#' function to convert from logP to P.}\
47+
#' \item{tgav}{The Tgav data from training.}
48+
#' \item{meanfldT}{the slope (w) and intercept (b) terms from the mean field
49+
#' fit.}
50+
#' \item{meanfldP}{the slope (w) and intercept (b) terms from the mean field
51+
#' fit.}
52+
#'\item{tfuns}{The empirical quantile functions for temperature, mapping
53+
#'N(0,1) to the native distribution in each grid cell.}
54+
#'\item{pfuns}{The empirical quantile functions for logP, mapping
55+
#'N(0,1) to the native distribution in each grid cell.}
56+
#'\item{reof}{The EOFs.}
57+
#'\item{fx}{Time coefficients for each EOF from training data.}
58+
#'\item{infiles}{The names of the files used for training the emulator.}
59+
#' }
60+
#'
61+
#' @author ACS July 2020
62+
#' @export
63+
emulator_reducer <- function(emulator){
64+
65+
if(length(names(emulator)) < 10){ # a full emulator has 10 list entries, check
66+
# to make sure that's showing up.
67+
stop('Your emulator is already reduced (missing at least one list entry)')
68+
}
69+
70+
# This function reduces the size of the object while preserving the structure
71+
# expected by generate.TP.resids and generate.TP.fullgrids.
72+
list(griddataT = list(gridid_full = emulator$griddataT$gridid_full,
73+
coord = emulator$griddataT$coord),
74+
griddataP = list(gridid_full = emulator$griddataP$gridid_full,
75+
coord = emulator$griddataP$coord,
76+
pvarconvert_fcn = emulator$griddataP$pvarconvert_fcn),
77+
tgav = emulator$tgav,
78+
# not reconstructing training data, don't need residuals in the
79+
# mean fields
80+
meanfldT = list(w = emulator$meanfldT$w,
81+
b = emulator$meanfldT$b),
82+
meanfldP = list(w = emulator$meanfldP$w,
83+
b = emulator$meanfldP$b),
84+
tfuns = list(quant = emulator$tfuns$quant),
85+
pfuns = list(quant = emulator$pfuns$quant),
86+
reof = emulator$reof,
87+
fx = emulator$fx,
88+
infiles = emulator$infiles) ->
89+
reducedEmulator
90+
91+
return(reducedEmulator)
92+
}
93+
94+
395
#' Write a temperature field as a netcdf file.
496
#'
597
#' Format a field as a netcdf file and write it to the specified file. The lat,
@@ -56,34 +148,3 @@ write.temperature <- function(fld, file, griddata, varname='tas', varunit='K',
56148

57149
ncdf4::nc_close(ncout)
58150
}
59-
60-
#' Load and save emulator training data
61-
#'
62-
#' \code{savemodel} saves the results of training an emulator in a portable
63-
#' format. \code{loadmodel} loads a model from a file created this way and
64-
#' returns it as a \code{fldgen} object.
65-
#'
66-
#' @param modeldata A \code{fldgen} object returned by either
67-
#' \code{\link{train}} or \code{\link{fldgen_object}}.
68-
#' @param file Name of the file to write the data to.
69-
#' @param clobber Flag indicating whether it's ok to overwrite an existing file
70-
#' @param oldfmt Flag indicating that we should try to load the old (.rda) format from
71-
#' pre-2.1 versions of fldgen.
72-
#' @name saving_and_restoring
73-
NULL
74-
75-
#' @rdname saving_and_restoring
76-
#' @export
77-
savemodel <- function(modeldata, file, clobber=FALSE)
78-
{
79-
compress='xz'
80-
81-
if(!inherits(modeldata, 'fldgen')) {
82-
stop('modeldata must be a fldgen object.')
83-
}
84-
85-
if(!clobber && file.exists(file)) {
86-
stop('File ', file, ' exists, and noclobber is set.')
87-
}
88-
saveRDS(modeldata, file=file, compress=compress)
89-
}

inst/scripts/train-emulators.R

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
library('fldgen')
2+
3+
train_models <- function(models, tasvar='tasAdjust', prvar='prAdjust',
4+
datadir='./training-data') {
5+
6+
## The following would give you the complete set of models:
7+
## models <- c('GFDL-ESM2M', 'HadGEM2-ES', 'IPSL-CM5A-LR', 'MIROC5')
8+
9+
for (model in models) {
10+
datafiles <- list.files(path=datadir, pattern=model, full.names=TRUE)
11+
cat('Processing model ', model, ' datafiles:\n', paste(datafiles, collapse='\n'),'\n')
12+
emu <- trainTP(datafiles, tvarname=tasvar, pvarname=prvar)
13+
emu$griddataP$vardata_raw <- NULL
14+
outfilename <- paste0('fldgen-',model, '.rds')
15+
coord <- emu$griddataT$coord
16+
coord[67382, ] <- c(-49.75, 178.75)
17+
emu$griddataT$coord <- coord
18+
emu$griddataP$coord <- coord
19+
20+
saveRDS(emu, outfilename)
21+
22+
23+
emulator <- emulator_reducer(emu)
24+
outfilename <- paste0('fldgen-',model, '_reducedEmulator.rds')
25+
saveRDS(reducedEmulator, outfilename)
26+
}
27+
}
28+
29+

inst/scripts/train-emulators.zsh

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/zsh
2+
3+
#SBATCH -p short
4+
#SBATCH -t 180
5+
#SBATCH -A IHESD
6+
7+
8+
module purge
9+
module load gcc/8.1.0
10+
module load netcdf
11+
module load R/3.4.3
12+
13+
##
14+
echo Rscript -e \"source('train-emulators.R'); train_models('$1')\"
15+
16+
Rscript -e "source('train-emulators.R'); train_models('$1')"

man/emulator_reducer.Rd

+71
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/saving_and_restoring.Rd

-28
This file was deleted.

0 commit comments

Comments
 (0)