From 4ed1ade4c858238f18f101d5663905d53ada0f6d Mon Sep 17 00:00:00 2001 From: Lukas Stadler <lukas.stadler@oracle.com> Date: Wed, 19 Jul 2017 15:17:48 +0200 Subject: [PATCH] implement unzip builtin --- .../oracle/truffle/r/library/utils/Unzip.java | 165 ++++++++++++++++++ .../r/nodes/builtin/base/IsTypeFunctions.java | 2 +- .../foreign/CallAndExternalFunctions.java | 2 + .../com/oracle/truffle/r/runtime/RError.java | 3 +- .../com/oracle/truffle/r/runtime/ffi/DLL.java | 2 +- .../truffle/r/test/ExpectedTestOutput.test | 15 ++ .../r/test/builtins/TestBuiltin_unzip.java | 53 ++++++ 7 files changed, 239 insertions(+), 3 deletions(-) create mode 100644 com.oracle.truffle.r.library/src/com/oracle/truffle/r/library/utils/Unzip.java create mode 100644 com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_unzip.java diff --git a/com.oracle.truffle.r.library/src/com/oracle/truffle/r/library/utils/Unzip.java b/com.oracle.truffle.r.library/src/com/oracle/truffle/r/library/utils/Unzip.java new file mode 100644 index 0000000000..334edb9283 --- /dev/null +++ b/com.oracle.truffle.r.library/src/com/oracle/truffle/r/library/utils/Unzip.java @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2017, 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package com.oracle.truffle.r.library.utils; + +import static com.oracle.truffle.r.nodes.builtin.CastBuilder.Predef.logicalValue; +import static com.oracle.truffle.r.nodes.builtin.CastBuilder.Predef.singleElement; +import static com.oracle.truffle.r.nodes.builtin.CastBuilder.Predef.stringValue; +import static com.oracle.truffle.r.nodes.builtin.CastBuilder.Predef.toBoolean; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Date; +import java.util.function.Predicate; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; +import com.oracle.truffle.api.dsl.Specialization; +import com.oracle.truffle.r.nodes.builtin.RExternalBuiltinNode; +import com.oracle.truffle.r.runtime.RError; +import com.oracle.truffle.r.runtime.RError.Message; +import com.oracle.truffle.r.runtime.Utils; +import com.oracle.truffle.r.runtime.data.RDataFactory; +import com.oracle.truffle.r.runtime.data.RIntVector; +import com.oracle.truffle.r.runtime.data.RNull; +import com.oracle.truffle.r.runtime.data.model.RAbstractStringVector; +import com.oracle.truffle.r.runtime.nodes.RBaseNode; + +public abstract class Unzip extends RExternalBuiltinNode.Arg7 { + + static { + Casts casts = new Casts(Unzip.class); + casts.arg(0, "zipfile").mustBe(stringValue()).asStringVector().mustBe(singleElement()).findFirst(); + casts.arg(1, "files").allowNull().mustBe(stringValue()).asStringVector(); + casts.arg(2, "exdir").mustBe(stringValue()).asStringVector().mustBe(singleElement()).findFirst(); + casts.arg(3, "list").mustBe(logicalValue()).asLogicalVector().mustBe(singleElement()).findFirst().map(toBoolean()); + casts.arg(4, "overwrite").mustBe(logicalValue()).asLogicalVector().mustBe(singleElement()).findFirst().map(toBoolean()); + casts.arg(5, "junkpaths").mustBe(logicalValue()).asLogicalVector().mustBe(singleElement()).findFirst().map(toBoolean()); + casts.arg(6, "setTimes").mustBe(logicalValue()).asLogicalVector().mustBe(singleElement()).findFirst().map(toBoolean()); + } + + @Specialization + @TruffleBoundary + protected Object unzip(String zipfile, @SuppressWarnings("unused") RNull files, String exdir, boolean list, boolean overwrite, boolean junkpaths, boolean setTimes) { + return unzip(zipfile, (RAbstractStringVector) null, exdir, list, overwrite, junkpaths, setTimes); + } + + @Override + protected RBaseNode getErrorContext() { + return RError.SHOW_CALLER; + } + + @Specialization + @TruffleBoundary + protected Object unzip(String zipfile, RAbstractStringVector files, String exdir, boolean list, boolean overwrite, boolean junkpaths, boolean setTimes) { + if (list) { + return list(zipfile); + } + Predicate<String> filter; + boolean[] found; + if (files == null) { + found = null; + filter = x -> true; + } else { + found = new boolean[files.getLength()]; + filter = x -> { + for (int i = 0; i < files.getLength(); i++) { + if (x.equals(files.getDataAt(i))) { + found[i] = true; + return true; + } + } + return false; + }; + } + + File targetDir = new File(exdir); + if (!targetDir.exists() || !targetDir.isDirectory()) { + throw error(Message.GENERIC, "invalid target directory"); + } + try (ZipInputStream stream = new ZipInputStream(new FileInputStream(Utils.tildeExpand(zipfile)))) { + ZipEntry entry; + ArrayList<String> extracted = new ArrayList<>(); + byte[] buffer = new byte[2048]; + while ((entry = stream.getNextEntry()) != null) { + if (filter.test(entry.getName())) { + File target = new File(targetDir, junkpaths ? new File(entry.getName()).getName() : entry.getName()); + if (!target.exists() || overwrite) { + try (FileOutputStream output = new FileOutputStream(target)) { + extracted.add(target.getPath()); + int length; + while ((length = stream.read(buffer)) > 0) { + output.write(buffer, 0, length); + } + } + if (setTimes) { + target.setLastModified(entry.getTime()); + } + } + } + } + if (files != null) { + for (int i = 0; i < found.length; i++) { + if (!found[i]) { + warning(Message.FILE_NOT_FOUND_IN_ZIP); + break; + } + } + } + RIntVector result = RDataFactory.createIntVector(new int[]{0}, true); + result.setAttr("extracted", RDataFactory.createStringVector(extracted.toArray(new String[0]), true)); + return result; + } catch (IOException e) { + throw error(Message.GENERIC, "error while extracting zip: " + e.getMessage()); + } + } + + @SuppressWarnings("deprecation") + private Object list(String zipfile) { + try (ZipInputStream stream = new ZipInputStream(new FileInputStream(Utils.tildeExpand(zipfile)))) { + ArrayList<ZipEntry> entryList = new ArrayList<>(); + ZipEntry entry; + while ((entry = stream.getNextEntry()) != null) { + entryList.add(entry); + } + String[] names = new String[entryList.size()]; + double[] sizes = new double[entryList.size()]; + String[] dates = new String[entryList.size()]; + for (int i = 0; i < entryList.size(); i++) { + entry = entryList.get(i); + names[i] = entry.getName(); + sizes[i] = entry.getSize(); + // rounding up to minutes + Date date = new Date(entry.getTime() + (30 * 1000)); + dates[i] = String.format("%04d-%02d-%02d %02d:%02d", date.getYear() + 1900, date.getMonth() + 1, date.getDate(), date.getHours(), date.getMinutes()); + } + return RDataFactory.createList(new Object[]{RDataFactory.createStringVector(names, true), RDataFactory.createDoubleVector(sizes, true), RDataFactory.createStringVector(dates, true)}); + } catch (IOException e) { + throw error(Message.GENERIC, "error while extracting zip: " + e.getMessage()); + } + } +} diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IsTypeFunctions.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IsTypeFunctions.java index 124bb99da5..58c61e58ea 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IsTypeFunctions.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IsTypeFunctions.java @@ -600,7 +600,7 @@ public class IsTypeFunctions { return RRuntime.LOGICAL_FALSE; } - private boolean typesMatch(RType expected, RType actual) { + private static boolean typesMatch(RType expected, RType actual) { return expected == RType.Numeric ? actual == RType.Integer || actual == RType.Double : actual == expected; } diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/foreign/CallAndExternalFunctions.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/foreign/CallAndExternalFunctions.java index 7f0f565864..ad7d504a4e 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/foreign/CallAndExternalFunctions.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/foreign/CallAndExternalFunctions.java @@ -62,6 +62,7 @@ import com.oracle.truffle.r.library.tools.ToolsTextFactory.DoTabExpandNodeGen; import com.oracle.truffle.r.library.utils.CountFieldsNodeGen; import com.oracle.truffle.r.library.utils.Crc64NodeGen; import com.oracle.truffle.r.library.utils.DownloadNodeGen; +import com.oracle.truffle.r.library.utils.UnzipNodeGen; import com.oracle.truffle.r.library.utils.MenuNodeGen; import com.oracle.truffle.r.library.utils.ObjectSizeNodeGen; import com.oracle.truffle.r.library.utils.RprofNodeGen; @@ -760,6 +761,7 @@ public class CallAndExternalFunctions { case "signrank_free": return new SignrankFreeNode(); case "unzip": + return UnzipNodeGen.create(); case "addhistory": case "loadhistory": case "savehistory": diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RError.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RError.java index 983421fd3a..c9cefc000a 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RError.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RError.java @@ -892,7 +892,8 @@ public final class RError extends RuntimeException implements TruffleException { NO_BINDING_FOR("no binding for \"%s\""), INVALID_SUBSTRING_ARGS("invalid substring arguments"), OBJECT_SIZE_ESTIMATE("The object size is only estimated."), - REPLACING_IN_NON_CHAR_OBJ("replacing substrings in a non-character object"); + REPLACING_IN_NON_CHAR_OBJ("replacing substrings in a non-character object"), + FILE_NOT_FOUND_IN_ZIP("requested file not found in the zip file"); public final String message; final boolean hasArgs; diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/ffi/DLL.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/ffi/DLL.java index 37031964a3..0573b87c6a 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/ffi/DLL.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/ffi/DLL.java @@ -459,7 +459,7 @@ public class DLL { invokeVoidCallNode.execute(new NativeCallInfo(pkgInit, initFunc, dllInfo), new Object[]{dllInfo}); } catch (ReturnException ex) { // An error call can, due to condition handling, throw this which we must - // propogate + // propagate throw ex; } catch (Throwable ex) { if (RContext.isInitialContextInitialized()) { diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test index 995ee48459..8bd3b2c347 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test @@ -72953,6 +72953,21 @@ Slot "y": [1] 77 88 +##com.oracle.truffle.r.test.builtins.TestBuiltin_unzip.testunzip# +#n <- tempfile(); writeBin(con=n,c(67324752L, 10L, 1851785216L, -273200397L, 99292L, 65536L, 262144L, <<<NEWLINE>>>1868955676L, 1414869359L, -1560084471L, -1386647737L, 1968795463L, <<<NEWLINE>>>16780152L, 128260L, 1311744L, 1345388544L, 503447883L, 2563L, <<<NEWLINE>>>1610612736L, -1219824786L, 25418991L, 16777216L, 67108864L, 6144L, <<<NEWLINE>>>16777216L, -1543503872L, 129L, 1869571584L, 89412913L, 1201865472L, <<<NEWLINE>>>2020956527L, 67174411L, 501L, 5124L, 88821760L, 6L, 16777472L, <<<NEWLINE>>>18944L, 16128L))<<<NEWLINE>>>length(unzip(n,list=T))<<<NEWLINE>>>names(unzip(n,list=T))<<<NEWLINE>>>unzip(n,list=T)[1:2]<<<NEWLINE>>>target <- tempdir()<<<NEWLINE>>>v <- unzip(n,exdir=target, files=c('bar','baz'))<<<NEWLINE>>>v<<<NEWLINE>>>file.exists(paste0(target, '/foo1'))<<<NEWLINE>>>v <- unzip(n,exdir=target)<<<NEWLINE>>>length(v)<<<NEWLINE>>>file.exists(v)<<<NEWLINE>>>readBin(paste0(target, '/foo1'), what='raw', n=1000) +[1] 3 +[1] "Name" "Length" "Date" + Name Length +1 foo1 1 +Warning message: +In unzip(n, exdir = target, files = c("bar", "baz")) : + requested file not found in the zip file +character(0) +[1] FALSE +[1] 1 +[1] TRUE +[1] 31 + ##com.oracle.truffle.r.test.builtins.TestBuiltin_utf8ToInt.testUtf8ToInt# #utf8ToInt('') integer(0) diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_unzip.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_unzip.java new file mode 100644 index 0000000000..4f755e7ab1 --- /dev/null +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_unzip.java @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2017, 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package com.oracle.truffle.r.test.builtins; + +import org.junit.Test; + +import com.oracle.truffle.r.test.TestBase; + +public class TestBuiltin_unzip extends TestBase { + + @Test + public void testunzip() { + // writes out a small dummy zip file + assertEval("n <- tempfile(); writeBin(con=n,c(67324752L, 10L, 1851785216L, -273200397L, 99292L, 65536L, 262144L, \n" + + "1868955676L, 1414869359L, -1560084471L, -1386647737L, 1968795463L, \n" + + "16780152L, 128260L, 1311744L, 1345388544L, 503447883L, 2563L, \n" + + "1610612736L, -1219824786L, 25418991L, 16777216L, 67108864L, 6144L, \n" + + "16777216L, -1543503872L, 129L, 1869571584L, 89412913L, 1201865472L, \n" + + "2020956527L, 67174411L, 501L, 5124L, 88821760L, 6L, 16777472L, \n" + + "18944L, 16128L))\n" + + "length(unzip(n,list=T))\n" + + "names(unzip(n,list=T))\n" + + "unzip(n,list=T)[1:2]\n" + // leave out date (depends on time zone) + "target <- tempdir()\n" + + "v <- unzip(n,exdir=target, files=c('bar','baz'))\n" + + "v\n" + + "file.exists(paste0(target, '/foo1'))\n" + + "v <- unzip(n,exdir=target)\n" + + "length(v)\n" + + "file.exists(v)\n" + + "readBin(paste0(target, '/foo1'), what='raw', n=1000)"); + } +} -- GitLab