Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

speed up sample_polar #669

Merged
merged 5 commits into from
Jul 17, 2024
Merged

speed up sample_polar #669

merged 5 commits into from
Jul 17, 2024

Conversation

bart1
Copy link
Collaborator

@bart1 bart1 commented Jul 17, 2024

I noticed projecting to ppi of rasters became rather slow I guess this relates to the sp/sf changes. These issued extent do project_as_ppi and maybe integrate_to_ppi. This was mainly a problem for rasters. This pull should fix that. Here is a small comparisons of the old and new (note bench::mark also checks the results are equal). Now sample_polar is 4 times quicker for rasters. Profiling shows that now most time is spend in the st_transform function (about 80%) which we cant really get around.

require(bioRad)
#> Loading required package: bioRad
#> Welcome to bioRad version 0.7.3.9000
#> Attempting to load MistNet from: /home/bart/R/x86_64-pc-linux-gnu-library/4.4/vol2birdR/lib 
#> MistNet successfully initialized.
#> using vol2birdR version 1.0.2 (MistNet installed)
sp <- sp::SpatialPoints(expand.grid(seq(100, 30000, length.out = 360), seq(-5600, 5700, length.out = 180)),
  proj4string = sp::CRS("+proj=aeqd +lat_0=56.3675003051758 +lon_0=12.8516998291016 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs")
)

sp_rpj <- sp::SpatialPoints(expand.grid(seq(100, 30000, length.out = 360), seq(-5600, 5700, length.out = 180)),
                        proj4string = sp::CRS("+proj=aeqd +lat_0=56.4 +lon_0=12.6 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs")
)

ras <- raster::raster(
  nrows = 180, ncols = 360, xmn = 12, xmx = 13, ymn = 56, ymx = 57,
  crs = "+proj=longlat"
)
data(example_scan)

sample_polar_old <- function(param, grid_size, range_max, project, ylim, xlim, k = 4 / 3, re = 6378, rp = 6357) {
  # proj4string=CRS(paste("+proj=aeqd +lat_0=",attributes(param)$geo$lat," +lon_0=",attributes(param)$geo$lon," +ellps=WGS84 +datum=WGS84 +units=m +no_defs",sep=""))
  proj4string <- sp::CRS(paste("+proj=aeqd +lat_0=", attributes(param)$geo$lat,
    " +lon_0=", attributes(param)$geo$lon,
    " +units=m",
    sep = ""
  ))
  if (inherits(grid_size, c("RasterLayer", "SpatialPoints"))) {
    if (sp::proj4string(grid_size) != as.character(proj4string)) {
      gridTopo <- sp::spTransform(methods::as(methods::as(grid_size, "SpatialGrid"), "SpatialPoints"), proj4string)
    } else if (inherits(grid_size, "RasterLayer")) {
      gridTopo <- methods::as(methods::as(grid_size, "SpatialGrid"), "SpatialPoints")
    } else {
      gridTopo <- methods::as(grid_size, "SpatialPoints")
    }
  } else {
    bboxlatlon <- bioRad:::proj_to_wgs(
      c(-range_max, range_max),
      c(-range_max, range_max),
      proj4string
    )@bbox
    if (!missing(ylim) & !is.null(ylim)) {
      bboxlatlon["lat", ] <- ylim
    }
    if (!missing(xlim) & !is.null(xlim)) {
      bboxlatlon["lon", ] <- xlim
    }
    if (missing(ylim) & missing(xlim)) {
      cellcentre.offset <- -c(range_max, range_max)
      cells.dim <- ceiling(rep(2 * range_max / grid_size, 2))
    } else {
      bbox <- bioRad:::wgs_to_proj(bboxlatlon["lon", ], bboxlatlon["lat", ], proj4string)
      cellcentre.offset <- c(
        min(bbox@coords[, "x"]),
        min(bbox@coords[, "y"])
      )
      cells.dim <- c(
        ceiling((max(bbox@coords[, "x"]) -
          min(bbox@coords[, "x"])) / grid_size),
        ceiling((max(bbox@coords[, "y"]) -
          min(bbox@coords[, "y"])) / grid_size)
      )
    }
    # define cartesian grid
    gridTopo <- sp::GridTopology(cellcentre.offset, c(grid_size, grid_size), cells.dim)
  }
  # if projecting, account for elevation angle
  if (project) {
    elev <- attributes(param)$geo$elangle
  } else {
    elev <- 0
  }
  # get scan parameter indices, and extract data
  index <- bioRad:::polar_to_index(
    bioRad:::cartesian_to_polar(sp::coordinates(gridTopo), elev, k = k, lat = attributes(param)$geo$lat, re = re, rp = rp),
    rangebin = attributes(param)$geo$rscale,
    azimbin = attributes(param)$geo$ascale,
    azimstart <- max(c(0, attributes(param)$geo$astart), na.rm = TRUE),
    rangestart <- max(c(0, attributes(param)$geo$rstart), na.rm = TRUE)
  )
  # set indices outside the scan's matrix to NA
  nrang <- dim(param)[1]
  nazim <- dim(param)[2]
  index$row[index$row > nrang] <- NA
  index$col[index$col > nazim] <- NA
  # rstart can result in locations outside of the radar scope close to the radar
  index$row[index$row < 1] <- NA
  stopifnot(all(index$col >= 1))
  # convert 2D index to 1D index
  index <- (index$col - 1) * nrang + index$row
  data <- as.data.frame(param[index])

  #  data <- data.frame(mapply(
  #    function(x, y) {
  #      safe_subset(param, x, y)
  #    },
  #    x = index$row,
  #    y = index$col
  #  ))

  colnames(data) <- attributes(param)$param

  if (inherits(grid_size, "RasterLayer")) {
    output <- sp::SpatialGridDataFrame(methods::as(grid_size, "SpatialGrid"), data)
  } else if (inherits(grid_size, "SpatialPoints")) {
    output <- sp::SpatialPointsDataFrame(grid_size, data)
  } else {
    output <- sp::SpatialGridDataFrame(
      grid = sp::SpatialGrid(
        grid = gridTopo,
        proj4string = proj4string
      ),
      data = data
    )
    attributes(output)$bboxlatlon <- bboxlatlon
  }
  output
}

bench::mark(
  simple_old = sample_polar_old(example_scan$params[[1]], 100, range_max = 10000,  ylim = 56:57, xlim = 12:13, project = T),
  simple_new = bioRad:::sample_polar(example_scan$params[[1]], 100, range_max = 10000, ylim = 56:57, xlim = 12:13, project = T),
  iterations = 5
)
#> Warning: Some expressions had a GC in every iteration; so filtering is
#> disabled.
#> # A tibble: 2 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 simple_old    328ms    414ms      1.97     189MB     9.08
#> 2 simple_new    332ms    467ms      2.06     166MB     9.04

bench::mark(
  sp_old = sample_polar_old(example_scan$params[[1]], sp, project = T),
  sp_new = bioRad:::sample_polar(example_scan$params[[1]], sp, project = T),
  iterations = 5
)
#> # A tibble: 2 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 sp_old       87.2ms   89.3ms      10.4    19.3MB     6.95
#> 2 sp_new       87.2ms   87.3ms      11.4    19.4MB     7.63

# Differently projects sp failed before
sample_polar_old(example_scan$params[[1]], sp_rpj, project = T)
#> Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'spTransform': no method or default for coercing "SpatialPoints" to "SpatialGrid"
# Works now
bioRad:::sample_polar(example_scan$params[[1]], sp_rpj, project = T)
#> class       : SpatialPointsDataFrame 
#> features    : 64800 
#> extent      : -15462.11, 14479.03, -2062.488, 9346.83  (xmin, xmax, ymin, ymax)
#> crs         : +proj=aeqd +lat_0=56.3675003051758 +lon_0=12.8516998291016 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs 
#> variables   : 1
#> names       : DBZH 
#> min values  :  -30 
#> max values  :   44

# This is now 4 times as fast
bench::mark(
  ras_old = sample_polar_old(example_scan$params[[1]], ras, project = T),
  ras_new = bioRad:::sample_polar(example_scan$params[[1]], ras, project = T),
  iterations = 5
)
#> Warning: Some expressions had a GC in every iteration; so filtering is
#> disabled.
#> # A tibble: 2 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 ras_old       1.88s    2.28s     0.450    48.5MB     4.68
#> 2 ras_new    363.42ms 395.21ms     2.14     25.2MB     3.00

Created on 2024-07-17 with reprex v2.1.0

@bart1 bart1 requested a review from adokter July 17, 2024 12:38
@bart1
Copy link
Collaborator Author

bart1 commented Jul 17, 2024

I now also avoid sp::spTransform in wgs_to_proj this should speed up in some other cases:

wgs_to_proj <- function(lon, lat, proj4string) {
  xy <- data.frame(x = lon, y = lat)
  sp::coordinates(xy) <- c("x", "y")
  sp::proj4string(xy) <- sp::CRS("+proj=longlat +datum=WGS84")
  
  res <- sp::spTransform(xy, proj4string)
  
  # Check if the result is a SpatialPointsDataFrame
  if (inherits(res, "SpatialPointsDataFrame")) {
    # If it is, convert it to a SpatialPoints object
    rownames(res@bbox) <- c("x", "y")
    colnames(res@coords) <- c("x", "y")
    res <- sp::SpatialPoints(coords = res@coords, proj4string = res@proj4string, bbox = res@bbox)
  }
  return(res)
}

wgs_to_proj_new2 <- function(lon, lat, proj4string) {
  xy<-sf::st_as_sf(data.frame(x = lon, y = lat), coords=c('x','y'), crs=4326L)
  res <- sf::st_transform(xy, proj4string)
  
  res <- sf::as_Spatial(res)
  rownames(res@bbox) <- c("x", "y")
  colnames(res@coords) <- c("x", "y")
  
  return(res)
}


bench::mark(
wgs_to_proj(1:2,52+1:2, "+proj=aeqd +lat_0=56.3675003051758 +lon_0=12.8516998291016 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs"),
wgs_to_proj_new2(1:2,52+1:2, "+proj=aeqd +lat_0=56.3675003051758 +lon_0=12.8516998291016 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs")

)
#> # A tibble: 2 × 6
#>   expression                             min median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                          <bch:> <bch:>     <dbl> <bch:byt>    <dbl>
#> 1 "wgs_to_proj(1:2, 52 + 1:2, \"+pro… 46.2ms 57.6ms      17.2    32.8MB     2.15
#> 2 "wgs_to_proj_new2(1:2, 52 + 1:2, \… 20.8ms 23.3ms      40.4    24.8KB     2.02

Created on 2024-07-17 with reprex v2.1.0

@bart1
Copy link
Collaborator Author

bart1 commented Jul 17, 2024

The same can be done to integrate_to_ppi saving about 25% of time. There is room for improvement still there as as_Spatial still takes 25% of the time:

require(bioRad)
#> Loading required package: bioRad
#> Welcome to bioRad version 0.7.3.9000
#> Attempting to load MistNet from: /home/bart/R/x86_64-pc-linux-gnu-library/4.4/vol2birdR/lib 
#> MistNet successfully initialized.
#> using vol2birdR version 1.0.2 (MistNet installed)
template_raster <- raster::raster(ncol=300, nrow=300,
  raster::extent(12, 13, 56, 57),
  crs = sp::CRS("+proj=longlat")
)
pvolfile <- system.file("extdata", "volume.h5", package = "bioRad")

data(example_vp)
pvol <- read_pvolfile(pvolfile)

bench::mark(ppi <- integrate_to_ppi(pvol, example_vp, raster = template_raster), iterations = 5)
#> Warning: Some expressions had a GC in every iteration; so filtering is
#> disabled.
#> # A tibble: 1 × 6
#>   expression                             min median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                           <bch> <bch:>     <dbl> <bch:byt>    <dbl>
#> 1 ppi <- integrate_to_ppi(pvol, examp…  4.5s  4.95s     0.201     618MB     3.58
unloadNamespace("bioRad")
devtools::load_all("~/bioRad")
#> ℹ Loading bioRad
#> Welcome to bioRad version 0.7.3.9000
#> using vol2birdR version 1.0.2 (MistNet installed)
bench::mark(ppi2 <- integrate_to_ppi(pvol, example_vp, raster = template_raster), iterations = 5)
#> Warning: Some expressions had a GC in every iteration; so filtering is
#> disabled.
#> # A tibble: 1 × 6
#>   expression                             min median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                           <bch> <bch:>     <dbl> <bch:byt>    <dbl>
#> 1 ppi2 <- integrate_to_ppi(pvol, exam… 2.94s  3.39s     0.301     584MB     3.92
all.equal(ppi, ppi2)
#> [1] TRUE

Created on 2024-07-17 with reprex v2.1.0

@adokter
Copy link
Owner

adokter commented Jul 17, 2024

Thanks @bart1, this is very helpful

@adokter adokter merged commit f6e6e7c into master Jul 17, 2024
6 checks passed
@adokter adokter deleted the sample_polar_speed branch July 17, 2024 17:06
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants