From 88069d30a4dc06eb901d6f7f62d632d2fd5f6c71 Mon Sep 17 00:00:00 2001 From: Florian Angerer <florian.angerer@oracle.com> Date: Mon, 20 Nov 2017 19:58:58 +0100 Subject: [PATCH] Storing compressed package dir into cache. --- .../r/install.packages.R | 94 ++++++++----------- documentation/dev/pkgtest.md | 5 +- 2 files changed, 42 insertions(+), 57 deletions(-) diff --git a/com.oracle.truffle.r.test.packages/r/install.packages.R b/com.oracle.truffle.r.test.packages/r/install.packages.R index 426ff6a00b..0ef36a2b32 100644 --- a/com.oracle.truffle.r.test.packages/r/install.packages.R +++ b/com.oracle.truffle.r.test.packages/r/install.packages.R @@ -746,26 +746,8 @@ pkg.cache.install <- function(pkgname, install.cmd) { } pkg.cache.get <- function(pkgname, lib) { - # check if caching is enabled - if (!pkg.cache$enabled) { - return (FALSE) - } - - # check if package cache directory can be accessed - if (dir.exists(pkg.cache$dir) && any(file.access(pkg.cache$dir, mode = 6) == -1)) { - log.message("package cache error: cannot access package cache dir ", pkg.cache$dir, level=1) - return (FALSE) - } - - # check cache directory has valid structure - if (!is.valid.cache.dir(pkg.cache$dir)) { - pkg.cache.init(pkg.cache$dir, pkg.cache$version) - } - - # get version sub-directory - version.dir <- pkg.cache.get.version(pkg.cache$dir, pkg.cache$version) - if (is.null(version.dir)) { - log.message("package cache error: cannot access or create version subdir for ", pkg.cache$version, level=1) + version.dir <- pkg.cache.check() + if(is.null(version.dir)) { return (FALSE) } @@ -778,11 +760,12 @@ pkg.cache.get <- function(pkgname, lib) { fromPath <- file.path(version.dir, pkgname) toPath <- lib - # copy from cache to package library - if(!pkg.cache.import.dir(fromPath, toPath)) { - log.message("could not copy/link package dir from ", fromPath , " to ", toPath, level=1) + # extract cached package to library directory + if (untar(fromPath, exdir=toPath) != 0L) { + log.message("could not extract cached package from ", fromPath , " to ", toPath, level=1) return (FALSE) } + log.message("package cache hit, using package from ", fromPath) return (TRUE) } @@ -791,24 +774,49 @@ pkg.cache.get <- function(pkgname, lib) { FALSE } -pkg.cache.import.dir <- function(fromPath, toPath) { - if (any(as.logical(pkg.cache$link), na.rm=T)) { - file.symlink(fromPath, toPath) - } else { - file.copy(fromPath, toPath, recursive=TRUE) +pkg.cache.insert <- function(pkgname, lib) { + version.dir <- pkg.cache.check() + if(is.null(version.dir)) { + return (FALSE) } + + tryCatch({ + # Create version directory if inexisting + if (!dir.exists(version.dir)) { + log.message("creating version directory ", version.dir, level=1) + dir.create(version.dir) + } + + fromPath <- file.path(lib, pkgname) + toPath <- file.path(version.dir, paste0(pkgname, ".tar.gz")) + + # to produce a TAR with relative paths, we need to change the working dir + prev.wd <- getwd() + setwd(lib) + if(tar(toPath, pkgname, compression="gzip") != 0L) { + log.message("could not compress package dir ", fromPath , " and store it to ", toPath, level=1) + return (FALSE) + } + setwd(prev.wd) + + log.message("successfully inserted package ", pkgname , " to package cache (", toPath, ")") + return (TRUE) + }, error = function(e) { + log.message("could not insert package '", pkgname, "' because: ", e$message) + }) + FALSE } -pkg.cache.insert <- function(pkgname, lib) { +pkg.cache.check <- function() { # check if caching is enabled if (!pkg.cache$enabled) { - return (FALSE) + return (NULL) } # check if package cache directory can be accessed if (dir.exists(pkg.cache$dir) && any(file.access(pkg.cache$dir, mode = 6) == -1)) { log.message("cannot access package cache dir ", pkg.cache$dir, level=1) - return (FALSE) + return (NULL) } # check cache directory has valid structure @@ -820,30 +828,9 @@ pkg.cache.insert <- function(pkgname, lib) { version.dir <- pkg.cache.get.version(pkg.cache$dir, as.character(pkg.cache$version)) if (is.null(version.dir)) { log.message("cannot access or create version subdir for ", as.character(pkg.cache$version), level=1) - return (FALSE) } - tryCatch({ - # Create version directory if inexisting - if (!dir.exists(version.dir)) { - log.message("creating version directory ", version.dir, level=1) - dir.create(version.dir) - } - - fromPath <- file.path(lib, pkgname) - toPath <- version.dir - - # copy from cache to package library - if(!file.copy(fromPath, toPath, recursive=TRUE)) { - log.message("could not copy dir ", fromPath , " from package cache to ", toPath, level=1) - return (FALSE) - } - log.message("successfully inserted package ", pkgname , " to package cache (", toPath, ")") - return (TRUE) - }, error = function(e) { - log.message("could not insert package '", pkgname, "' because: ", e$message) - }) - FALSE + version.dir } is.valid.cache.dir <- function(cache.dir) { @@ -859,7 +846,6 @@ is.valid.cache.dir <- function(cache.dir) { tryCatch({ version.table <- read.csv(version.table.name) - # TODO: check if versions have appropriate subdirs TRUE }, error = function(e) { log.message("could not read package cache's version table: ", e$message, level=1) diff --git a/documentation/dev/pkgtest.md b/documentation/dev/pkgtest.md index 859ca62b36..ae9f1b7fed 100644 --- a/documentation/dev/pkgtest.md +++ b/documentation/dev/pkgtest.md @@ -23,15 +23,14 @@ The API checksum must be provided because we do not want to rely on some R packa ### Usage -Run `mx pkgtest --cache-pkgs version=<checksum>,dir=<pkg-cache-dir>,size=<cache-size>,link=<TRUE|FALSE>`, e.g. +Run `mx pkgtest --cache-pkgs version=<checksum>,dir=<pkg-cache-dir>,size=<cache-size>`, e.g. ``` mx pkgtest --cache-pkgs version=730e109bd7a8a32b1cb9d9a09aa2325d2430587ddbc0c38bad911525,dir=/tmp/cache_dir ``` The `version` key specifies the API version to use, i.e., a checksum of the header files of the native API (mandatory, no default). The `pkg-cache-dir` key specifies the directory of the cache (mandatory, no default). -The `size` key specifies the number of API version for which to cache packages (optional, default=`2L`). -The `link` key specifies is the number of API version for which to cache packages (optional, default=`FALSE`). +The `size` key specifies the number of different API versions for which to cache packages (optional, default=`2L`). ### Details -- GitLab