From 66d236779ca98db802cce7f0aeb7f3c7a91f1b12 Mon Sep 17 00:00:00 2001 From: Florian Angerer <florian.angerer@oracle.com> Date: Thu, 16 Nov 2017 18:43:24 +0100 Subject: [PATCH] Implemented installed package cache for package testing. --- .../r/install.packages.R | 198 +++++++++++++++++- mx.fastr/mx_fastr_pkgs.py | 55 ++++- 2 files changed, 249 insertions(+), 4 deletions(-) diff --git a/com.oracle.truffle.r.test.packages/r/install.packages.R b/com.oracle.truffle.r.test.packages/r/install.packages.R index 2c13fef37b..b892faa530 100644 --- a/com.oracle.truffle.r.test.packages/r/install.packages.R +++ b/com.oracle.truffle.r.test.packages/r/install.packages.R @@ -106,6 +106,7 @@ args <- commandArgs(TRUE) usage <- function() { cat(paste("usage: Rscript ", "[--repos name=value,...]", + "[--cache-pkgs name=value,...]", "[--verbose | -v] [-V] [--dryrun]", "[--no-install | -n] ", "[--create-blacklist] [--blacklist-file file] [--ignore-blacklist]", @@ -126,6 +127,7 @@ usage <- function() { "[--count-daily count]", "[--ok-only]", "[--pkg-pattern package-pattern] \n")) + print(sys.calls()) quit(status=100) } @@ -720,12 +722,13 @@ fastr_error_log_size <- function() { } } +# installs a single package or retrieves it from the cache install.pkg <- function(pkgname) { error_log_size <- fastr_error_log_size() if (run.mode == "system") { - system.install(pkgname) + pkg.cache.install(pkgname, function() system.install(pkgname)) } else if (run.mode == "internal") { - install.packages(pkgname, type="source", lib=lib.install, INSTALL_opts="--install-tests") + pkg.cache.install(pkgname, function() install.packages(pkgname, type="source", lib=lib.install, INSTALL_opts="--install-tests")) } else if (run.mode == "context") { stop("context run-mode not implemented\n") } @@ -735,6 +738,187 @@ install.pkg <- function(pkgname) { return(rc) } +pkg.cache.install <- function(pkgname, install.cmd) { + is.cached <- pkg.cache.get(pkgname, lib=lib.install) + if (!is.cached) { + install.cmd() + pkg.cache.insert(pkgname, lib.install) + } +} + +pkg.cache.get <- function(pkgname, lib) { + # check if caching is enabled + if (!pkg.cache$enabled) { + return (FALSE) + } + + # check if package cache directory can be accessed + if (dir.exists(pkg.cache$dir) && any(file.access(pkg.cache$dir, mode = 6) == -1)) { + # TODO: log + cat(" ######## PKG CACHE ERROR : cannot access package cache dir ", pkg.cache$dir , "\n") + return (FALSE) + } + + # check cache directory has valid structure + if (!is.valid.cache.dir(pkg.cache$dir)) { + pkg.cache.init(pkg.cache$dir, pkg.cache$version) + } + + # get version sub-directory + version.dir <- pkg.cache.get.version(pkg.cache$dir, pkg.cache$version) + if (is.null(version.dir)) { + # TODO: log error + cat(" ######## PKG CACHE ERROR : cannot access or create version subdir for ", pkg.cache$version , "\n") + return (FALSE) + } + + cat(" ######## PKG CACHE : using version dir ", version.dir, "\n") + + # lookup package dir + pkg.dirs <- list.dirs(version.dir, full.names=FALSE, recursive=FALSE) + if (!is.na(match(pkgname, pkg.dirs))) { + # cache hit + fromPath <- file.path(version.dir, pkgname) + toPath <- lib + + # copy from cache to package library + if(!file.copy(fromPath, toPath, recursive=TRUE)) { + # TODO: log error copying dir + cat(" ######## PKG CACHE ERROR : could not copy package dir from ", fromPath , " to ", toPath, "\n") + return (FALSE) + } + cat(" ######## PKG CACHE : cache hit, using package from ", fromPath, "\n") + return (TRUE) + } + # TODO: log cache miss + cat(" ######## PKG CACHE : cache miss for package ", pkgname , "\n") + + FALSE +} + +pkg.cache.insert <- function(pkgname, lib) { + # check if caching is enabled + if (!pkg.cache$enabled) { + return (FALSE) + } + + # check if package cache directory can be accessed + if (dir.exists(pkg.cache$dir) && any(file.access(pkg.cache$dir, mode = 6) == -1)) { + # TODO: log + cat(" ######## PKG CACHE ERROR : cannot access package cache dir ", pkg.cache$dir , "\n") + return (FALSE) + } + + # check cache directory has valid structure + if (!is.valid.cache.dir(pkg.cache$dir)) { + pkg.cache.init(pkg.cache$dir, as.character(pkg.cache$version)) + } + + # get version sub-directory + version.dir <- pkg.cache.get.version(pkg.cache$dir, as.character(pkg.cache$version)) + if (is.null(version.dir)) { + # TODO: log error + cat(" ######## PKG CACHE ERROR : cannot access or create version subdir for ", as.character(pkg.cache$version) , "\n") + return (FALSE) + } + + fromPath <- file.path(lib, pkgname) + toPath <- version.dir + + # copy from cache to package library + if(!file.copy(fromPath, toPath, recursive=TRUE)) { + # TODO: log error copying dir + cat(" ######## PKG CACHE ERROR : could not copy package dir from ", fromPath , " to ", toPath, "\n") + return (FALSE) + } + cat(" ######## PKG CACHE : successfully inserted package ", pkgname , " to cache (", toPath, ")\n") + return (TRUE) +} + +is.valid.cache.dir <- function(cache.dir) { + if (!dir.exists(cache.dir)) { + return (FALSE) + } + + # look for 'version.table' + version.table.name <- file.path(cache.dir, "version.table") + if (any(file.access(version.table.name, mode = 6) == -1)) { + return (FALSE) + } + + f <- file(version.table.name, "r") + tryCatch({ + version.table <- read.csv(f) + # TODO: check if versions have appropriate subdirs + TRUE + }, error = function(e) FALSE, finally = function() close(f)) +} + +pkg.cache.gen.version.dir.name <- function(version) { + paste0("library", substr(version, 1, max(20,length(version)))) +} + +pkg.cache.init <- function(cache.dir, version) { + if (is.null(version)) { + return (NULL) + } + + if (!dir.exists(cache.dir)) { + cat("Creating cache directory ", cache.dir, "\n") + dir.create(cache.dir) + } + + version.table.name <- file.path(cache.dir, "version.table") + + # create package lib dir for this version (if not existing) + version.table <- pkg.cache.create.version(cache.dir, version, data.frame()) + write.csv(version.table, version.table.name) + NULL +} + +# creates package lib dir for this version (if not existing) +pkg.cache.create.version <- function(cache.dir, version, version.table) { + version.table.name <- file.path(cache.dir, "version.table") + version.subdir <- pkg.cache.gen.version.dir.name(version) + version.dir <- file.path(cache.dir, version.subdir) + + if (!dir.exists(version.dir)) { + cat("Creating version directory ", version.dir, "\n") + dir.create(version.dir) + } + + rbind(version.table, data.frame(version=version,dir=version.subdir)) +} + +pkg.cache.get.version <- function(cache.dir, cache.version) { + if (is.null(cache.version)) { + return (NULL) + } + + # look for 'version.table' + version.table.name <- file.path(cache.dir, "version.table") + if (any(file.access(version.table.name, mode = 6) == -1)) { + return (NULL) + } + + tryCatch({ + version.table <- read.csv(version.table.name) + version.subdir <- as.character(version.table[version.table$version == cache.version, "dir"]) + print(paste0("------------ QUERY: ", version.subdir)) + if (length(version.subdir) == 0) { + updated.version.table <- pkg.cache.create.version(cache.dir, cache.version, version.table) + } + if (version.table != updated.version.table) { + version.subdir <- as.character(updated.version.table[updated.version.table$version == cache.version, "dir"]) + write.csv(updated.version.table, version.table.name) + } + print(paste0("------------ QUERY: ", version.subdir)) + + # return the version directory + file.path(cache.dir, version.subdir) + }, error = function(e) NULL) +} + # when testing under graalvm, fastr is not built so we must use the (assumed) sibling gnur repo check_graalvm <- function() { if (!is.na(Sys.getenv('FASTR_GRAALVM', unset=NA)) || !is.na(Sys.getenv('GRAALVM_FASTR', unset=NA))) { @@ -855,6 +1039,14 @@ parse.args <- function() { initial.blacklist.file <<- get.argvalue() } else if (a == "--repos") { repo.list <<- strsplit(get.argvalue(), ",")[[1]] + } else if (a == "--cache-pkgs") { + cache.opts <- list(enabled=TRUE) + svalue <- strsplit(get.argvalue(), ",")[[1]] + for (s in svalue) { + arg <- strsplit(s, "=", fixed=T)[[1]] + cache.opts[arg[[1]]] <- arg[[2]] + } + pkg.cache <<- cache.opts } else if (a == "--random") { random.count <<- as.integer(get.argvalue()) if (is.na(random.count)) { @@ -945,6 +1137,7 @@ cat.args <- function() { cat("use.installed.pkgs:", use.installed.pkgs, "\n") cat("invert.pkgset:", invert.pkgset, "\n") cat("testdir.path", testdir, "\n") + cat("pkg.cache:", pkg.cache$enabled, "\n") } } @@ -1017,6 +1210,7 @@ run <- function() { quiet <- F repo.list <- c("CRAN") +pkg.cache <- list(enabled=FALSE) cran.mirror <- NA blacklist.file <- NA initial.blacklist.file <- NA diff --git a/mx.fastr/mx_fastr_pkgs.py b/mx.fastr/mx_fastr_pkgs.py index 9bd585cc92..8f455220f3 100644 --- a/mx.fastr/mx_fastr_pkgs.py +++ b/mx.fastr/mx_fastr_pkgs.py @@ -33,9 +33,11 @@ In either case all the output is placed in the fastr suite dir. Separate directo and tests, namely 'lib.install.packages.{fastr,gnur}' and 'test.{fastr,gnur}' (sh syntax). ''' from os.path import join, relpath +from os import walk from datetime import datetime import shutil, os, re import subprocess +import hashlib import mx import mx_fastr @@ -54,6 +56,12 @@ def _gnur_rscript(): ''' return _mx_gnur().extensions._gnur_rscript_path() +def _gnur_include_path(): + return _mx_gnur().extensions._gnur_include_path() + +def _fastr_include_path(): + return join(_fastr_suite_dir(), 'include') + def _graalvm_rscript(): assert graalvm is not None return join(graalvm, 'bin', 'Rscript') @@ -248,6 +256,9 @@ def pkgtest(args): if not '--print-install-status' in install_args: install_args += ['--print-install-status'] + # If '--cache-pkgs' is set, then also set the native API version value + _set_pkg_cache_api_version(install_args, _fastr_include_path()) + _log_step('BEGIN', 'install/test', 'FastR') # Currently installpkgs does not set a return code (in install.packages.R) rc = _installpkgs(install_args, nonZeroIsFatal=False, env=env, out=out, err=out) @@ -267,7 +278,12 @@ def pkgtest(args): # in order to compare the test output with GnuR we have to install/test the same # set of packages with GnuR ok_pkgs = [k for k, v in out.install_status.iteritems() if v] - _gnur_install_test(_args_to_forward_to_gnur(args), ok_pkgs, gnur_libinstall, gnur_install_tmp) + gnur_args = _args_to_forward_to_gnur(args) + + # If '--cache-pkgs' is set, then also set the native API version value + _set_pkg_cache_api_version(gnur_args, _gnur_include_path()) + + _gnur_install_test(gnur_args, ok_pkgs, gnur_libinstall, gnur_install_tmp) _set_test_status(out.test_info) print 'Test Status' for pkg, test_status in out.test_info.iteritems(): @@ -283,6 +299,15 @@ def pkgtest(args): shutil.rmtree(fastr_install_tmp, ignore_errors=True) return rc +def _set_pkg_cache_api_version(arg_list, include_dir): + ''' + Looks for argument '--cache-pkgs' and appends the native API version to the value list of this argument. + ''' + if "--cache-pkgs" in arg_list: + pkg_cache_values_idx = arg_list.index("--cache-pkgs") + 1 + if pkg_cache_values_idx < len(arg_list): + arg_list[pkg_cache_values_idx] = arg_list[pkg_cache_values_idx] + ",version={0}".format(computeApiChecksum(include_dir)) + class TestFileStatus: ''' Records the status of a test file. status is either "OK" or "FAILED". @@ -330,7 +355,7 @@ def _get_test_outputs(rvm, pkg_name, test_info): test_info[pkg_name].testfile_outputs[relfile] = TestFileStatus(status, absfile) def _args_to_forward_to_gnur(args): - forwarded_args = ['--repos', '--run-mode'] + forwarded_args = ['--repos', '--run-mode', '--cache-pkgs'] result = [] i = 0 while i < len(args): @@ -616,3 +641,29 @@ def remove_dup_pkgs(args): for p in x.iterkeys(): result += p return result + +def computeApiChecksum(includeDir): + ''' + Computes a checksum of the header files found in the provided directory (recursively). + The result is a SHA256 checksum (as string with hex digits) of all header files. + ''' + m = hashlib.sha256() + rootDir = includeDir + for root, dirs, files in os.walk(rootDir): + mx.log("Visiting directory {0}".format(root)) + for f in files: + fileName = join(root, f) + if fileName.endswith('.h'): + try: + mx.log("Including file {0}".format(fileName)) + with open(fileName) as f: + m.update(f.read()) + except IOError as e: + # Ignore errors on broken symlinks + if not os.path.islink(fileName) or os.path.exists(fileName): + raise e + + + hxdigest = m.hexdigest() + mx.log("Computed API version checksum {0}".format(hxdigest)) + return hxdigest -- GitLab