From 88069d30a4dc06eb901d6f7f62d632d2fd5f6c71 Mon Sep 17 00:00:00 2001
From: Florian Angerer <florian.angerer@oracle.com>
Date: Mon, 20 Nov 2017 19:58:58 +0100
Subject: [PATCH] Storing compressed package dir into cache.

---
 .../r/install.packages.R                      | 94 ++++++++-----------
 documentation/dev/pkgtest.md                  |  5 +-
 2 files changed, 42 insertions(+), 57 deletions(-)

diff --git a/com.oracle.truffle.r.test.packages/r/install.packages.R b/com.oracle.truffle.r.test.packages/r/install.packages.R
index 426ff6a00b..0ef36a2b32 100644
--- a/com.oracle.truffle.r.test.packages/r/install.packages.R
+++ b/com.oracle.truffle.r.test.packages/r/install.packages.R
@@ -746,26 +746,8 @@ pkg.cache.install <- function(pkgname, install.cmd) {
 }
 
 pkg.cache.get <- function(pkgname, lib) {
-    # check if caching is enabled
-    if (!pkg.cache$enabled) {
-        return (FALSE)
-    }
-
-    # check if package cache directory can be accessed
-    if (dir.exists(pkg.cache$dir) && any(file.access(pkg.cache$dir, mode = 6) == -1)) {
-        log.message("package cache error: cannot access package cache dir ", pkg.cache$dir, level=1)
-        return (FALSE)
-    }
-
-    # check cache directory has valid structure
-    if (!is.valid.cache.dir(pkg.cache$dir)) {
-        pkg.cache.init(pkg.cache$dir, pkg.cache$version)
-    }
-
-    # get version sub-directory
-    version.dir <- pkg.cache.get.version(pkg.cache$dir, pkg.cache$version)
-    if (is.null(version.dir)) {
-        log.message("package cache error: cannot access or create version subdir for ", pkg.cache$version, level=1)
+    version.dir <- pkg.cache.check()
+    if(is.null(version.dir)) {
         return (FALSE)
     }
 
@@ -778,11 +760,12 @@ pkg.cache.get <- function(pkgname, lib) {
         fromPath <- file.path(version.dir, pkgname)
         toPath <- lib
 
-        # copy from cache to package library
-        if(!pkg.cache.import.dir(fromPath, toPath)) {
-            log.message("could not copy/link package dir from ", fromPath , " to ", toPath, level=1)
+        # extract cached package to library directory
+        if (untar(fromPath, exdir=toPath) != 0L) {
+            log.message("could not extract cached package from ", fromPath , " to ", toPath, level=1)
             return (FALSE)
         }
+
         log.message("package cache hit, using package from ", fromPath)
         return (TRUE)
     } 
@@ -791,24 +774,49 @@ pkg.cache.get <- function(pkgname, lib) {
     FALSE
 }
 
-pkg.cache.import.dir <- function(fromPath, toPath) {
-    if (any(as.logical(pkg.cache$link), na.rm=T)) {
-        file.symlink(fromPath, toPath)
-    } else {
-        file.copy(fromPath, toPath, recursive=TRUE)
+pkg.cache.insert <- function(pkgname, lib) {
+    version.dir <- pkg.cache.check()
+    if(is.null(version.dir)) {
+        return (FALSE)
     }
+
+    tryCatch({
+        # Create version directory if inexisting
+        if (!dir.exists(version.dir)) {
+            log.message("creating version directory ", version.dir, level=1)
+            dir.create(version.dir)
+        }
+
+        fromPath <- file.path(lib, pkgname)
+        toPath <- file.path(version.dir, paste0(pkgname, ".tar.gz"))
+
+        # to produce a TAR with relative paths, we need to change the working dir
+        prev.wd <- getwd()
+        setwd(lib)
+        if(tar(toPath, pkgname, compression="gzip") != 0L) {
+            log.message("could not compress package dir ", fromPath , " and store it to ", toPath, level=1)
+            return (FALSE)
+        }
+        setwd(prev.wd)
+
+        log.message("successfully inserted package ", pkgname , " to package cache (", toPath, ")")
+        return (TRUE)
+    }, error = function(e) {
+        log.message("could not insert package '", pkgname, "' because: ", e$message)
+    })
+    FALSE
 }
 
-pkg.cache.insert <- function(pkgname, lib) {
+pkg.cache.check <- function() {
     # check if caching is enabled
     if (!pkg.cache$enabled) {
-        return (FALSE)
+        return (NULL)
     }
 
     # check if package cache directory can be accessed
     if (dir.exists(pkg.cache$dir) && any(file.access(pkg.cache$dir, mode = 6) == -1)) {
         log.message("cannot access package cache dir ", pkg.cache$dir, level=1)
-        return (FALSE)
+        return (NULL)
     }
 
     # check cache directory has valid structure
@@ -820,30 +828,9 @@ pkg.cache.insert <- function(pkgname, lib) {
     version.dir <- pkg.cache.get.version(pkg.cache$dir, as.character(pkg.cache$version))
     if (is.null(version.dir)) {
         log.message("cannot access or create version subdir for ", as.character(pkg.cache$version), level=1)
-        return (FALSE)
     }
 
-    tryCatch({
-        # Create version directory if inexisting
-        if (!dir.exists(version.dir)) {
-            log.message("creating version directory ", version.dir, level=1)
-            dir.create(version.dir)
-        }
-
-        fromPath <- file.path(lib, pkgname)
-        toPath <- version.dir
-
-        # copy from cache to package library
-        if(!file.copy(fromPath, toPath, recursive=TRUE)) {
-            log.message("could not copy dir ", fromPath , " from package cache to ", toPath, level=1)
-            return (FALSE)
-        }
-        log.message("successfully inserted package ", pkgname , " to package cache (", toPath, ")")
-        return (TRUE)
-    }, error = function(e) {
-        log.message("could not insert package '", pkgname, "' because: ", e$message)
-    })
-    FALSE
+    version.dir
 }
 
 is.valid.cache.dir <- function(cache.dir) {
@@ -859,7 +846,6 @@ is.valid.cache.dir <- function(cache.dir) {
 
     tryCatch({
         version.table <- read.csv(version.table.name)
-        # TODO: check if versions have appropriate subdirs
         TRUE
     }, error = function(e) {
         log.message("could not read package cache's version table: ", e$message, level=1)
diff --git a/documentation/dev/pkgtest.md b/documentation/dev/pkgtest.md
index 859ca62b36..ae9f1b7fed 100644
--- a/documentation/dev/pkgtest.md
+++ b/documentation/dev/pkgtest.md
@@ -23,15 +23,14 @@ The API checksum must be provided because we do not want to rely on some R packa
 
 ### Usage
 
-Run `mx pkgtest --cache-pkgs version=<checksum>,dir=<pkg-cache-dir>,size=<cache-size>,link=<TRUE|FALSE>`, e.g.
+Run `mx pkgtest --cache-pkgs version=<checksum>,dir=<pkg-cache-dir>,size=<cache-size>`, e.g.
 ```
 mx pkgtest --cache-pkgs version=730e109bd7a8a32b1cb9d9a09aa2325d2430587ddbc0c38bad911525,dir=/tmp/cache_dir
 ```
 
 The `version` key specifies the API version to use, i.e., a checksum of the header files of the native API (mandatory, no default).  
 The `pkg-cache-dir` key specifies the directory of the cache (mandatory, no default).  
-The `size` key specifies the number of API version for which to cache packages (optional, default=`2L`).  
-The `link` key specifies is the number of API version for which to cache packages (optional, default=`FALSE`).  
+The `size` key specifies the number of different API versions for which to cache packages (optional, default=`2L`).  
 
 ### Details
 
-- 
GitLab