From 66d236779ca98db802cce7f0aeb7f3c7a91f1b12 Mon Sep 17 00:00:00 2001
From: Florian Angerer <florian.angerer@oracle.com>
Date: Thu, 16 Nov 2017 18:43:24 +0100
Subject: [PATCH] Implemented installed package cache for package testing.

---
 .../r/install.packages.R                      | 198 +++++++++++++++++-
 mx.fastr/mx_fastr_pkgs.py                     |  55 ++++-
 2 files changed, 249 insertions(+), 4 deletions(-)

diff --git a/com.oracle.truffle.r.test.packages/r/install.packages.R b/com.oracle.truffle.r.test.packages/r/install.packages.R
index 2c13fef37b..b892faa530 100644
--- a/com.oracle.truffle.r.test.packages/r/install.packages.R
+++ b/com.oracle.truffle.r.test.packages/r/install.packages.R
@@ -106,6 +106,7 @@ args <- commandArgs(TRUE)
 usage <- function() {
 	cat(paste("usage: Rscript ",
 					  "[--repos name=value,...]",
+					  "[--cache-pkgs name=value,...]",
                       "[--verbose | -v] [-V] [--dryrun]",
                       "[--no-install | -n] ",
 				      "[--create-blacklist] [--blacklist-file file] [--ignore-blacklist]",
@@ -126,6 +127,7 @@ usage <- function() {
 					  "[--count-daily count]",
 					  "[--ok-only]",
                       "[--pkg-pattern package-pattern] \n"))
+    print(sys.calls())
 	quit(status=100)
 }
 
@@ -720,12 +722,13 @@ fastr_error_log_size <- function() {
 	}
 }
 
+# installs a single package or retrieves it from the cache
 install.pkg <- function(pkgname) {
 	error_log_size <- fastr_error_log_size()
 	if (run.mode == "system") {
-		system.install(pkgname)
+        pkg.cache.install(pkgname, function() system.install(pkgname))
 	} else if (run.mode == "internal") {
-		install.packages(pkgname, type="source", lib=lib.install, INSTALL_opts="--install-tests")
+        pkg.cache.install(pkgname, function() install.packages(pkgname, type="source", lib=lib.install, INSTALL_opts="--install-tests"))
 	} else if (run.mode == "context") {
 		stop("context run-mode not implemented\n")
 	}
@@ -735,6 +738,187 @@ install.pkg <- function(pkgname) {
 	return(rc)
 }
 
+pkg.cache.install <- function(pkgname, install.cmd) {
+    is.cached <- pkg.cache.get(pkgname, lib=lib.install)
+    if (!is.cached) {
+        install.cmd()
+        pkg.cache.insert(pkgname, lib.install)
+    }
+}
+
+pkg.cache.get <- function(pkgname, lib) {
+    # check if caching is enabled
+    if (!pkg.cache$enabled) {
+        return (FALSE)
+    }
+
+    # check if package cache directory can be accessed
+    if (dir.exists(pkg.cache$dir) && any(file.access(pkg.cache$dir, mode = 6) == -1)) {
+        # TODO: log
+        cat(" ######## PKG CACHE ERROR : cannot access package cache dir ", pkg.cache$dir , "\n")
+        return (FALSE)
+    }
+
+    # check cache directory has valid structure
+    if (!is.valid.cache.dir(pkg.cache$dir)) {
+        pkg.cache.init(pkg.cache$dir, pkg.cache$version)
+    }
+
+    # get version sub-directory
+    version.dir <- pkg.cache.get.version(pkg.cache$dir, pkg.cache$version)
+    if (is.null(version.dir)) {
+        # TODO: log error
+        cat(" ######## PKG CACHE ERROR : cannot access or create version subdir for ", pkg.cache$version , "\n")
+        return (FALSE)
+    }
+
+    cat(" ######## PKG CACHE : using version dir ", version.dir, "\n")
+
+    # lookup package dir
+    pkg.dirs <- list.dirs(version.dir, full.names=FALSE, recursive=FALSE)
+    if (!is.na(match(pkgname, pkg.dirs))) {
+        # cache hit
+        fromPath <- file.path(version.dir, pkgname)
+        toPath <- lib
+
+        # copy from cache to package library
+        if(!file.copy(fromPath, toPath, recursive=TRUE)) {
+            # TODO: log error copying dir
+            cat(" ######## PKG CACHE ERROR : could not copy package dir from ", fromPath , " to ", toPath, "\n")
+            return (FALSE)
+        }
+        cat(" ######## PKG CACHE : cache hit, using package from ", fromPath, "\n")
+        return (TRUE)
+    } 
+    # TODO: log cache miss
+    cat(" ######## PKG CACHE : cache miss for package ", pkgname , "\n")
+    
+    FALSE
+}
+
+pkg.cache.insert <- function(pkgname, lib) {
+    # check if caching is enabled
+    if (!pkg.cache$enabled) {
+        return (FALSE)
+    }
+
+    # check if package cache directory can be accessed
+    if (dir.exists(pkg.cache$dir) && any(file.access(pkg.cache$dir, mode = 6) == -1)) {
+        # TODO: log
+        cat(" ######## PKG CACHE ERROR : cannot access package cache dir ", pkg.cache$dir , "\n")
+        return (FALSE)
+    }
+
+    # check cache directory has valid structure
+    if (!is.valid.cache.dir(pkg.cache$dir)) {
+        pkg.cache.init(pkg.cache$dir, as.character(pkg.cache$version))
+    }
+
+    # get version sub-directory
+    version.dir <- pkg.cache.get.version(pkg.cache$dir, as.character(pkg.cache$version))
+    if (is.null(version.dir)) {
+        # TODO: log error
+        cat(" ######## PKG CACHE ERROR : cannot access or create version subdir for ", as.character(pkg.cache$version) , "\n")
+        return (FALSE)
+    }
+
+    fromPath <- file.path(lib, pkgname)
+    toPath <- version.dir
+
+    # copy from cache to package library
+    if(!file.copy(fromPath, toPath, recursive=TRUE)) {
+        # TODO: log error copying dir
+        cat(" ######## PKG CACHE ERROR : could not copy package dir from ", fromPath , " to ", toPath, "\n")
+        return (FALSE)
+    }
+    cat(" ######## PKG CACHE : successfully inserted package ", pkgname , " to cache (", toPath, ")\n")
+    return (TRUE)
+}
+
+is.valid.cache.dir <- function(cache.dir) {
+    if (!dir.exists(cache.dir)) {
+        return (FALSE)
+    }
+
+    # look for 'version.table'
+    version.table.name <- file.path(cache.dir, "version.table")
+    if (any(file.access(version.table.name, mode = 6) == -1)) {
+        return (FALSE)
+    }
+
+    f <- file(version.table.name, "r")
+    tryCatch({
+        version.table <- read.csv(f)
+        # TODO: check if versions have appropriate subdirs
+        TRUE
+    }, error = function(e) FALSE, finally = function() close(f))
+}
+
+pkg.cache.gen.version.dir.name <- function(version) {
+    paste0("library", substr(version, 1, max(20,length(version))))
+}
+
+pkg.cache.init <- function(cache.dir, version) {
+    if (is.null(version)) {
+        return (NULL)
+    }
+
+    if (!dir.exists(cache.dir)) {
+        cat("Creating cache directory ", cache.dir, "\n")
+        dir.create(cache.dir)
+    }
+
+    version.table.name <- file.path(cache.dir, "version.table")
+
+    # create package lib dir for this version (if not existing)
+    version.table <- pkg.cache.create.version(cache.dir, version, data.frame())
+    write.csv(version.table, version.table.name)
+    NULL
+}
+
+# creates package lib dir for this version (if not existing)
+pkg.cache.create.version <- function(cache.dir, version, version.table) {
+    version.table.name <- file.path(cache.dir, "version.table")
+    version.subdir <- pkg.cache.gen.version.dir.name(version)
+    version.dir <- file.path(cache.dir, version.subdir)
+
+    if (!dir.exists(version.dir)) {
+        cat("Creating version directory ", version.dir, "\n")
+        dir.create(version.dir)
+    }
+
+    rbind(version.table, data.frame(version=version,dir=version.subdir))
+}
+
+pkg.cache.get.version <- function(cache.dir, cache.version) {
+    if (is.null(cache.version)) {
+        return (NULL)
+    }
+
+    # look for 'version.table'
+    version.table.name <- file.path(cache.dir, "version.table")
+    if (any(file.access(version.table.name, mode = 6) == -1)) {
+        return (NULL)
+    }
+
+    tryCatch({
+        version.table <- read.csv(version.table.name)
+        version.subdir <- as.character(version.table[version.table$version == cache.version, "dir"])
+        print(paste0("------------ QUERY: ", version.subdir))
+        if (length(version.subdir) == 0) {
+            updated.version.table <- pkg.cache.create.version(cache.dir, cache.version, version.table)
+        }
+        if (version.table != updated.version.table) {
+            version.subdir <- as.character(updated.version.table[updated.version.table$version == cache.version, "dir"])
+            write.csv(updated.version.table, version.table.name)
+        }
+        print(paste0("------------ QUERY: ", version.subdir))
+
+        # return the version directory
+        file.path(cache.dir, version.subdir)
+    }, error = function(e) NULL)
+}
+
 # when testing under graalvm, fastr is not built so we must use the (assumed) sibling gnur repo
 check_graalvm <- function() {
 	if (!is.na(Sys.getenv('FASTR_GRAALVM', unset=NA)) || !is.na(Sys.getenv('GRAALVM_FASTR', unset=NA))) {
@@ -855,6 +1039,14 @@ parse.args <- function() {
 			initial.blacklist.file <<- get.argvalue()
 		} else if (a == "--repos") {
 			repo.list <<- strsplit(get.argvalue(), ",")[[1]]
+		} else if (a == "--cache-pkgs") {
+            cache.opts <- list(enabled=TRUE)
+            svalue <- strsplit(get.argvalue(), ",")[[1]]
+	        for (s in svalue) {
+                arg <- strsplit(s, "=", fixed=T)[[1]]
+                cache.opts[arg[[1]]] <- arg[[2]]
+            }
+            pkg.cache <<- cache.opts
 		} else if (a == "--random") {
 			random.count <<- as.integer(get.argvalue())
 			if (is.na(random.count)) {
@@ -945,6 +1137,7 @@ cat.args <- function() {
 		cat("use.installed.pkgs:", use.installed.pkgs, "\n")
 		cat("invert.pkgset:", invert.pkgset, "\n")
 		cat("testdir.path", testdir, "\n")
+		cat("pkg.cache:", pkg.cache$enabled, "\n")
 	}
 }
 
@@ -1017,6 +1210,7 @@ run <- function() {
 
 quiet <- F
 repo.list <- c("CRAN")
+pkg.cache <- list(enabled=FALSE)
 cran.mirror <- NA
 blacklist.file <- NA
 initial.blacklist.file <- NA
diff --git a/mx.fastr/mx_fastr_pkgs.py b/mx.fastr/mx_fastr_pkgs.py
index 9bd585cc92..8f455220f3 100644
--- a/mx.fastr/mx_fastr_pkgs.py
+++ b/mx.fastr/mx_fastr_pkgs.py
@@ -33,9 +33,11 @@ In either case all the output is placed in the fastr suite dir. Separate directo
 and tests, namely 'lib.install.packages.{fastr,gnur}' and 'test.{fastr,gnur}' (sh syntax).
 '''
 from os.path import join, relpath
+from os import walk
 from datetime import datetime
 import shutil, os, re
 import subprocess
+import hashlib
 import mx
 import mx_fastr
 
@@ -54,6 +56,12 @@ def _gnur_rscript():
     '''
     return _mx_gnur().extensions._gnur_rscript_path()
 
+def _gnur_include_path():
+    return _mx_gnur().extensions._gnur_include_path()
+
+def _fastr_include_path():
+    return join(_fastr_suite_dir(), 'include')
+
 def _graalvm_rscript():
     assert graalvm is not None
     return join(graalvm, 'bin', 'Rscript')
@@ -248,6 +256,9 @@ def pkgtest(args):
         if not '--print-install-status' in install_args:
             install_args += ['--print-install-status']
 
+    # If '--cache-pkgs' is set, then also set the native API version value
+    _set_pkg_cache_api_version(install_args, _fastr_include_path())
+
     _log_step('BEGIN', 'install/test', 'FastR')
     # Currently installpkgs does not set a return code (in install.packages.R)
     rc = _installpkgs(install_args, nonZeroIsFatal=False, env=env, out=out, err=out)
@@ -267,7 +278,12 @@ def pkgtest(args):
         # in order to compare the test output with GnuR we have to install/test the same
         # set of packages with GnuR
         ok_pkgs = [k for k, v in out.install_status.iteritems() if v]
-        _gnur_install_test(_args_to_forward_to_gnur(args), ok_pkgs, gnur_libinstall, gnur_install_tmp)
+        gnur_args = _args_to_forward_to_gnur(args)
+
+        # If '--cache-pkgs' is set, then also set the native API version value
+        _set_pkg_cache_api_version(gnur_args, _gnur_include_path())
+
+        _gnur_install_test(gnur_args, ok_pkgs, gnur_libinstall, gnur_install_tmp)
         _set_test_status(out.test_info)
         print 'Test Status'
         for pkg, test_status in out.test_info.iteritems():
@@ -283,6 +299,15 @@ def pkgtest(args):
     shutil.rmtree(fastr_install_tmp, ignore_errors=True)
     return rc
 
+def _set_pkg_cache_api_version(arg_list, include_dir):
+    '''
+    Looks for argument '--cache-pkgs' and appends the native API version to the value list of this argument.
+    '''
+    if "--cache-pkgs" in arg_list:
+        pkg_cache_values_idx = arg_list.index("--cache-pkgs") + 1
+        if pkg_cache_values_idx < len(arg_list):
+            arg_list[pkg_cache_values_idx] = arg_list[pkg_cache_values_idx] + ",version={0}".format(computeApiChecksum(include_dir))
+
 class TestFileStatus:
     '''
     Records the status of a test file. status is either "OK" or "FAILED".
@@ -330,7 +355,7 @@ def _get_test_outputs(rvm, pkg_name, test_info):
             test_info[pkg_name].testfile_outputs[relfile] = TestFileStatus(status, absfile)
 
 def _args_to_forward_to_gnur(args):
-    forwarded_args = ['--repos', '--run-mode']
+    forwarded_args = ['--repos', '--run-mode', '--cache-pkgs']
     result = []
     i = 0
     while i < len(args):
@@ -616,3 +641,29 @@ def remove_dup_pkgs(args):
     for p in x.iterkeys():
         result += p
     return result
+
+def computeApiChecksum(includeDir):
+    '''
+    Computes a checksum of the header files found in the provided directory (recursively).
+    The result is a SHA256 checksum (as string with hex digits) of all header files.
+    '''
+    m = hashlib.sha256()
+    rootDir = includeDir
+    for root, dirs, files in os.walk(rootDir):
+        mx.log("Visiting directory {0}".format(root))
+        for f in files:
+            fileName = join(root, f)
+            if fileName.endswith('.h'):
+                try:
+                    mx.log("Including file {0}".format(fileName))
+                    with open(fileName) as f:
+                        m.update(f.read())
+                except IOError as e:
+                    # Ignore errors on broken symlinks
+                    if not os.path.islink(fileName) or os.path.exists(fileName):
+                        raise e
+
+
+    hxdigest = m.hexdigest()
+    mx.log("Computed API version checksum {0}".format(hxdigest))
+    return hxdigest
-- 
GitLab