diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/BasePackage.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/BasePackage.java index f7a538efcb30b60ecc2e58b55c4394d720d35ad2..7231a8e6d83326cddde9bc8cef7594df5ed6851c 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/BasePackage.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/BasePackage.java @@ -250,6 +250,8 @@ public class BasePackage extends RBuiltinPackage { add(ConnectionFunctions.File.class, ConnectionFunctionsFactory.FileNodeGen::create); add(ConnectionFunctions.Flush.class, ConnectionFunctionsFactory.FlushNodeGen::create); add(ConnectionFunctions.GZFile.class, ConnectionFunctionsFactory.GZFileNodeGen::create); + add(ConnectionFunctions.BZFile.class, ConnectionFunctionsFactory.BZFileNodeGen::create); + add(ConnectionFunctions.XZFile.class, ConnectionFunctionsFactory.XZFileNodeGen::create); add(ConnectionFunctions.GetAllConnections.class, ConnectionFunctionsFactory.GetAllConnectionsNodeGen::create); add(ConnectionFunctions.GetConnection.class, ConnectionFunctionsFactory.GetConnectionNodeGen::create); add(ConnectionFunctions.IsOpen.class, ConnectionFunctionsFactory.IsOpenNodeGen::create); diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/ConnectionFunctions.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/ConnectionFunctions.java index df46b16704bc913cca3af30da9027533d27569f2..8c4cff9d5b23a1d7120484793d863a7fbc99dd2c 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/ConnectionFunctions.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/ConnectionFunctions.java @@ -52,7 +52,6 @@ import java.nio.ByteOrder; import java.nio.DoubleBuffer; import java.nio.IntBuffer; import java.util.ArrayList; -import java.util.zip.ZipException; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.Cached; @@ -66,10 +65,11 @@ import com.oracle.truffle.r.runtime.RError; import com.oracle.truffle.r.runtime.RError.Message; import com.oracle.truffle.r.runtime.RInternalError; import com.oracle.truffle.r.runtime.RRuntime; +import com.oracle.truffle.r.runtime.RCompression; import com.oracle.truffle.r.runtime.builtins.RBuiltin; import com.oracle.truffle.r.runtime.conn.ConnectionSupport.BaseRConnection; import com.oracle.truffle.r.runtime.conn.FileConnections.FileRConnection; -import com.oracle.truffle.r.runtime.conn.GZIPConnections.GZIPRConnection; +import com.oracle.truffle.r.runtime.conn.CompressedConnections.CompressedRConnection; import com.oracle.truffle.r.runtime.conn.RConnection; import com.oracle.truffle.r.runtime.conn.SocketConnections.RSocketConnection; import com.oracle.truffle.r.runtime.conn.TextConnections.TextRConnection; @@ -225,34 +225,31 @@ public abstract class ConnectionFunctions { } - /** - * {@code gzfile} is very versatile (unfortunately); it can open uncompressed files, and files - * compressed by {@code bzip2, xz, lzma}. Currently we only support {@code gzip} and - * uncompressed. + /* + * In GNUR R {@code gzfile, bzfile, xzfile} are very versatile on input; they can open + * uncompressed files, and files compressed by {@code bzip2, xz, lzma}. */ - @RBuiltin(name = "gzfile", kind = INTERNAL, parameterNames = {"description", "open", "encoding", "compression"}, behavior = IO) - public abstract static class GZFile extends RBuiltinNode { + + public abstract static class ZZFileAdapter extends RBuiltinNode { + private final RCompression.Type cType; + + protected ZZFileAdapter(RCompression.Type cType) { + this.cType = cType; + } + @Override protected void createCasts(CastBuilder casts) { Casts.description(casts); Casts.open(casts); Casts.encoding(casts); - casts.arg("compression").asIntegerVector().findFirst().notNA().mustBe(gte(0).and(lte(9))); + casts.arg("compression").asIntegerVector().findFirst().notNA().mustBe(gte(cType == RCompression.Type.XZ ? -9 : 0).and(lte(9))); } @Specialization @TruffleBoundary - @SuppressWarnings("unused") - protected RAbstractIntVector gzFile(RAbstractStringVector description, String open, RAbstractStringVector encoding, int compression) { + protected RAbstractIntVector zzFile(RAbstractStringVector description, String open, String encoding, int compression) { try { - return new GZIPRConnection(description.getDataAt(0), open).asVector(); - } catch (ZipException ex) { - // wasn't a gzip file, try uncompressed text - try { - return new FileRConnection(description.getDataAt(0), "r").asVector(); - } catch (IOException ex1) { - throw reportError(description.getDataAt(0), ex1); - } + return new CompressedRConnection(description.getDataAt(0), open, cType, encoding, compression).asVector(); } catch (IOException ex) { throw reportError(description.getDataAt(0), ex); } @@ -264,6 +261,30 @@ public abstract class ConnectionFunctions { } } + @RBuiltin(name = "gzfile", kind = INTERNAL, parameterNames = {"description", "open", "encoding", "compression"}, behavior = IO) + public abstract static class GZFile extends ZZFileAdapter { + protected GZFile() { + super(RCompression.Type.GZIP); + } + + } + + @RBuiltin(name = "bzfile", kind = INTERNAL, parameterNames = {"description", "open", "encoding", "compression"}, behavior = IO) + public abstract static class BZFile extends ZZFileAdapter { + protected BZFile() { + super(RCompression.Type.BZIP2); + } + + } + + @RBuiltin(name = "xzfile", kind = INTERNAL, parameterNames = {"description", "open", "encoding", "compression"}, behavior = IO) + public abstract static class XZFile extends ZZFileAdapter { + protected XZFile() { + super(RCompression.Type.XZ); + } + + } + @RBuiltin(name = "textConnection", kind = INTERNAL, parameterNames = {"description", "text", "open", "env", "encoding"}, behavior = IO) public abstract static class TextConnection extends RBuiltinNode { @Override diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/HiddenInternalFunctions.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/HiddenInternalFunctions.java index d04b57fd08082c853c5738796cd4fa9a5d2f151a..7d4077d857cbb8216a7206c87de87fb8ce63885c 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/HiddenInternalFunctions.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/HiddenInternalFunctions.java @@ -399,7 +399,7 @@ public class HiddenInternalFunctions { throw RError.error(this, Message.GENERIC, "zlib compress error"); } } else if (compression == 3) { - ctype = RCompression.Type.LZMA; + ctype = RCompression.Type.XZ; offset = 5; outLen = data.length; cdata = new byte[outLen]; @@ -446,8 +446,8 @@ public class HiddenInternalFunctions { byte[] ulenData = new byte[4]; dataLengthBuf.get(ulenData); out.write(ulenData); - if (type == RCompression.Type.LZMA) { - out.write(RCompression.Type.LZMA.typeByte); + if (type == RCompression.Type.XZ) { + out.write(RCompression.Type.XZ.typeByte); } out.write(cdata); return result; diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RCompression.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RCompression.java index 785970bd521f4d6985aa39935494d65c5a51df3a..ca59c7286623759f03f7c9623aa1f4af9a706e41 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RCompression.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RCompression.java @@ -22,14 +22,19 @@ */ package com.oracle.truffle.r.runtime; +import java.io.ByteArrayInputStream; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.lang.ProcessBuilder.Redirect; +import java.nio.file.Files; +import java.nio.file.OpenOption; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.Arrays; import java.util.zip.GZIPInputStream; - -import com.oracle.truffle.r.runtime.conn.GZIPConnections.GZIPRConnection; +import org.tukaani.xz.LZMA2InputStream; import com.oracle.truffle.r.runtime.ffi.RFFIFactory; /** @@ -41,7 +46,7 @@ public class RCompression { NONE('0'), GZIP('1'), BZIP2('2'), - LZMA('Z'); + XZ('Z'); public final byte typeByte; @@ -70,7 +75,7 @@ public class RCompression { } else if (buf[0] == 'B' && buf[1] == 'Z' && buf[2] == 'h') { return RCompression.Type.BZIP2; } else if (buf[0] == (byte) 0xFD && buf[1] == '7' && buf[2] == 'z' && buf[3] == 'X' && buf[4] == 'Z') { - return RCompression.Type.LZMA; + return RCompression.Type.XZ; } else { return RCompression.Type.NONE; } @@ -88,6 +93,15 @@ public class RCompression { return RCompression.Type.NONE; } + /** + * Uncompress for internal use in {@code LazyLoadDBFetch} where size of uncompressed data is + * known. + * + * @param type compression type + * @param udata where to store uncompressed data + * @param cdata data to uncompress + * @return {@code true} iff success + */ public static boolean uncompress(Type type, byte[] udata, byte[] cdata) { switch (type) { case NONE: @@ -97,7 +111,7 @@ public class RCompression { return gzipUncompress(udata, cdata); case BZIP2: throw RInternalError.unimplemented("BZIP2 compression"); - case LZMA: + case XZ: return lzmaUncompress(udata, cdata); default: assert false; @@ -105,6 +119,15 @@ public class RCompression { } } + /** + * Uncompress for internal use in {@code LazyLoadDBInsertValue} where size of uncompressed data + * is known. + * + * @param type compression type + * @param udata uncompressed data + * @param cdata where to store compressed data + * @return {@code true} iff success + */ public static boolean compress(Type type, byte[] udata, byte[] cdata) { switch (type) { case NONE: @@ -114,7 +137,7 @@ public class RCompression { return gzipCompress(udata, cdata); case BZIP2: throw RInternalError.unimplemented("BZIP2 compression"); - case LZMA: + case XZ: return lzmaCompress(udata, cdata); default: assert false; @@ -132,6 +155,10 @@ public class RCompression { return rc == 0; } + /** + * There is no obvious counterpart to {@link LZMA2InputStream} and according to the XZ forum it + * is not implemented for Java, so have to use sub-process. + */ private static boolean lzmaCompress(byte[] udata, byte[] cdata) { int rc; ProcessBuilder pb = new ProcessBuilder("xz", "--compress", "--format=raw", "--lzma2", "--stdout"); @@ -157,67 +184,64 @@ public class RCompression { } private static boolean lzmaUncompress(byte[] udata, byte[] data) { - int rc; - ProcessBuilder pb = new ProcessBuilder("xz", "--decompress", "--format=raw", "--lzma2", "--stdout"); + int dictSize = udata.length < LZMA2InputStream.DICT_SIZE_MIN ? LZMA2InputStream.DICT_SIZE_MIN : udata.length; + try (LZMA2InputStream lzmaStream = new LZMA2InputStream(new ByteArrayInputStream(data), dictSize)) { + int totalRead = 0; + int n; + while ((n = lzmaStream.read(udata, totalRead, udata.length - totalRead)) > 0) { + totalRead += n; + } + return totalRead == udata.length; + } catch (IOException ex) { + return false; + } + } + + public static byte[] bzipUncompressFromFile(String path) throws IOException { + String[] command = new String[]{"bzip2", "-dc", path}; + ProcessBuilder pb = new ProcessBuilder(command); pb.redirectError(Redirect.INHERIT); + Process p = pb.start(); + InputStream is = p.getInputStream(); + ProcessOutputManager.OutputThreadVariable readThread = new ProcessOutputManager.OutputThreadVariable(command[0], is); + readThread.start(); try { - Process p = pb.start(); - OutputStream os = p.getOutputStream(); - InputStream is = p.getInputStream(); - ProcessOutputManager.OutputThread readThread = new ProcessOutputManager.OutputThreadFixed("xz", is, udata); - readThread.start(); - os.write(data); - os.close(); - rc = p.waitFor(); + int rc = p.waitFor(); if (rc == 0) { readThread.join(); - if (readThread.totalRead != udata.length) { - return false; - } + return Arrays.copyOf(readThread.getData(), readThread.getTotalRead()); } - } catch (InterruptedException | IOException ex) { - return false; + } catch (InterruptedException ex) { + // fall through } - return rc == 0; - } - - /** - * This is used by {@link GZIPRConnection}. - */ - public static byte[] lzmaUncompressFromFile(String path) { - return genericUncompressFromFile(new String[]{"xz", "--decompress", "--lzma2", "--stdout", path}); - } - - public static byte[] bzipUncompressFromFile(String path) { - return genericUncompressFromFile(new String[]{"bzip2", "-dc", path}); + throw new IOException(); } - private static byte[] genericUncompressFromFile(String[] command) { + public static void bzipCompressToFile(byte[] data, String path, boolean append) throws IOException { + String[] command = new String[]{"bzip2", "-zc"}; int rc; ProcessBuilder pb = new ProcessBuilder(command); pb.redirectError(Redirect.INHERIT); + Process p = pb.start(); + InputStream is = p.getInputStream(); + OutputStream os = p.getOutputStream(); + ProcessOutputManager.OutputThreadVariable readThread = new ProcessOutputManager.OutputThreadVariable(command[0], is); + readThread.start(); + os.write(data); + os.close(); try { - Process p = pb.start(); - InputStream is = p.getInputStream(); - ProcessOutputManager.OutputThreadVariable readThread = new ProcessOutputManager.OutputThreadVariable(command[0], is); - readThread.start(); rc = p.waitFor(); if (rc == 0) { readThread.join(); - return readThread.getData(); + byte[] cData = Arrays.copyOf(readThread.getData(), readThread.getTotalRead()); + OpenOption[] openOptions = append ? new OpenOption[]{StandardOpenOption.APPEND} : new OpenOption[0]; + Files.write(Paths.get(path), cData, openOptions); + return; } - } catch (InterruptedException | IOException ex) { + } catch (InterruptedException ex) { // fall through } - throw RInternalError.shouldNotReachHere(join(command)); + throw new IOException(); } - private static String join(String[] args) { - StringBuilder sb = new StringBuilder(); - for (String s : args) { - sb.append(s); - sb.append(' '); - } - return sb.toString(); - } } diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/GZIPConnections.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/CompressedConnections.java similarity index 55% rename from com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/GZIPConnections.java rename to com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/CompressedConnections.java index 0928fe7df3d31e81432526ff5988f60fb0eebb83..4530f65c9fdf88e51cd6f695f3837f15e0fe423d 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/GZIPConnections.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/CompressedConnections.java @@ -23,6 +23,7 @@ package com.oracle.truffle.r.runtime.conn; import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; @@ -32,8 +33,15 @@ import java.nio.ByteBuffer; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; +import org.tukaani.xz.LZMA2Options; +import org.tukaani.xz.XZ; +import org.tukaani.xz.XZInputStream; +import org.tukaani.xz.XZOutputStream; + import com.oracle.truffle.r.runtime.RCompression; +import com.oracle.truffle.r.runtime.RCompression.Type; import com.oracle.truffle.r.runtime.RError; +import com.oracle.truffle.r.runtime.RInternalError; import com.oracle.truffle.r.runtime.conn.ConnectionSupport.AbstractOpenMode; import com.oracle.truffle.r.runtime.conn.ConnectionSupport.BasePathRConnection; import com.oracle.truffle.r.runtime.conn.ConnectionSupport.ConnectionClass; @@ -43,20 +51,42 @@ import com.oracle.truffle.r.runtime.conn.ConnectionSupport.DelegateWriteRConnect import com.oracle.truffle.r.runtime.conn.ConnectionSupport.ReadWriteHelper; import com.oracle.truffle.r.runtime.data.model.RAbstractStringVector; -public class GZIPConnections { +public class CompressedConnections { public static final int GZIP_BUFFER_SIZE = (2 << 20); /** - * Base class for all modes of gzfile connections. N.B. gzfile is defined to be able to read - * gzip, bzip, lzma and uncompressed files, which has to be implemented by reading the first few - * bytes of the file and detecting the type of the file. + * Base class for all modes of gzfile/bzfile/xzfile connections. N.B. In GNU R these can read + * gzip, bzip, lzma and uncompressed files, and this has to be implemented by reading the first + * few bytes of the file and detecting the type of the file. */ - public static class GZIPRConnection extends BasePathRConnection { - public GZIPRConnection(String path, String modeString) throws IOException { - super(path, ConnectionClass.GZFile, modeString, AbstractOpenMode.ReadBinary); + public static class CompressedRConnection extends BasePathRConnection { + private final RCompression.Type cType; + @SuppressWarnings("unused") private final String encoding; // TODO + @SuppressWarnings("unused") private final int compression; // TODO + + public CompressedRConnection(String path, String modeString, Type cType, String encoding, int compression) throws IOException { + super(path, mapConnectionClass(cType), modeString, AbstractOpenMode.ReadBinary); + this.cType = cType; + this.encoding = encoding; + this.compression = compression; openNonLazyConnection(); } + private static ConnectionClass mapConnectionClass(RCompression.Type cType) { + switch (cType) { + case NONE: + return ConnectionClass.File; + case GZIP: + return ConnectionClass.GZFile; + case BZIP2: + return ConnectionClass.BZFile; + case XZ: + return ConnectionClass.XZFile; + default: + throw RInternalError.shouldNotReachHere(); + } + } + @Override protected void createDelegateConnection() throws IOException { DelegateRConnection delegate = null; @@ -64,8 +94,15 @@ public class GZIPConnections { switch (openMode) { case Read: case ReadBinary: - RCompression.Type cType = RCompression.getCompressionType(path); - switch (cType) { + /* + * For input, we check the actual compression type as GNU R is permissive about + * the claimed type. + */ + RCompression.Type cTypeActual = RCompression.getCompressionType(path); + if (cTypeActual != cType) { + updateConnectionClass(mapConnectionClass(cTypeActual)); + } + switch (cTypeActual) { case NONE: if (openMode == AbstractOpenMode.ReadBinary) { delegate = new FileConnections.FileReadBinaryRConnection(this); @@ -74,26 +111,36 @@ public class GZIPConnections { } break; case GZIP: - delegate = new GZIPInputRConnection(this); + delegate = new CompressedInputRConnection(this, new GZIPInputStream(new FileInputStream(path), GZIP_BUFFER_SIZE)); break; - case LZMA: - /* - * no lzma support in Java. For now we use RCompression to a byte array - * and return a ByteArrayInputStream on that. - */ - byte[] lzmaUdata = RCompression.lzmaUncompressFromFile(path); - delegate = new ByteGZipInputRConnection(this, new ByteArrayInputStream(lzmaUdata)); + case XZ: + delegate = new CompressedInputRConnection(this, new XZInputStream(new FileInputStream(path))); break; case BZIP2: - // ditto + // no in Java support, so go via byte array byte[] bzipUdata = RCompression.bzipUncompressFromFile(path); - delegate = new ByteGZipInputRConnection(this, new ByteArrayInputStream(bzipUdata)); + delegate = new ByteStreamCompressedInputRConnection(this, new ByteArrayInputStream(bzipUdata)); } break; + + case Append: + case AppendBinary: case Write: - case WriteBinary: - delegate = new GZIPOutputRConnection(this); + case WriteBinary: { + boolean append = openMode == AbstractOpenMode.Append || openMode == AbstractOpenMode.AppendBinary; + switch (cType) { + case GZIP: + delegate = new CompressedOutputRConnection(this, new GZIPOutputStream(new FileOutputStream(path, append), GZIP_BUFFER_SIZE)); + break; + case BZIP2: + delegate = new BZip2OutputRConnection(this, new ByteArrayOutputStream(), append); + break; + case XZ: + delegate = new CompressedOutputRConnection(this, new XZOutputStream(new FileOutputStream(path, append), new LZMA2Options(), XZ.CHECK_CRC32)); + break; + } break; + } default: throw RError.nyi(RError.SHOW_CALLER2, "open mode: " + getOpenMode()); } @@ -109,15 +156,10 @@ public class GZIPConnections { // } } - private static class GZIPInputRConnection extends DelegateReadRConnection implements ReadWriteHelper { + private static class CompressedInputRConnection extends DelegateReadRConnection implements ReadWriteHelper { private InputStream inputStream; - GZIPInputRConnection(GZIPRConnection base) throws IOException { - super(base); - inputStream = new GZIPInputStream(new FileInputStream(base.path), GZIP_BUFFER_SIZE); - } - - protected GZIPInputRConnection(GZIPRConnection base, InputStream is) { + protected CompressedInputRConnection(CompressedRConnection base, InputStream is) { super(base); this.inputStream = is; } @@ -159,18 +201,18 @@ public class GZIPConnections { } } - private static class ByteGZipInputRConnection extends GZIPInputRConnection { - ByteGZipInputRConnection(GZIPRConnection base, ByteArrayInputStream is) { + private static class ByteStreamCompressedInputRConnection extends CompressedInputRConnection { + ByteStreamCompressedInputRConnection(CompressedRConnection base, ByteArrayInputStream is) { super(base, is); } } - private static class GZIPOutputRConnection extends DelegateWriteRConnection implements ReadWriteHelper { - private GZIPOutputStream outputStream; + private static class CompressedOutputRConnection extends DelegateWriteRConnection implements ReadWriteHelper { + protected OutputStream outputStream; - GZIPOutputRConnection(GZIPRConnection base) throws IOException { + protected CompressedOutputRConnection(CompressedRConnection base, OutputStream os) { super(base); - outputStream = new GZIPOutputStream(new FileOutputStream(base.path), GZIP_BUFFER_SIZE); + this.outputStream = os; } @Override @@ -215,4 +257,25 @@ public class GZIPConnections { outputStream.flush(); } } + + private static class BZip2OutputRConnection extends CompressedOutputRConnection { + private final ByteArrayOutputStream bos; + private final boolean append; + + BZip2OutputRConnection(CompressedRConnection base, ByteArrayOutputStream os, boolean append) { + super(base, os); + this.bos = os; + this.append = append; + } + + @Override + public void close() throws IOException { + flush(); + outputStream.close(); + // Now actually do the compression using sub-process + byte[] data = bos.toByteArray(); + RCompression.bzipCompressToFile(data, ((BasePathRConnection) base).path, append); + } + } + } diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java index e53c581f1a97c8d6741feb1e4dbb16d038e5a219..25922b8644237f0845edce7260dae41b5560c662 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java @@ -308,6 +308,8 @@ public class ConnectionSupport { Terminal("terminal"), File("file"), GZFile("gzfile"), + BZFile("bzfile"), + XZFile("xzfile"), Socket("sockconn"), Text("textConnection"), URL("url"), @@ -511,7 +513,7 @@ public class ConnectionSupport { */ private int descriptor; - private final ConnectionClass conClass; + private ConnectionClass conClass; /** * The constructor for every connection class except {@link StdConnections}. @@ -548,6 +550,13 @@ public class ConnectionSupport { return conClass; } + /** + * {@code gzfile} can open other connection classes, and this isn't known initially. + */ + public final void updateConnectionClass(ConnectionClass conClass) { + this.conClass = conClass; + } + protected void openNonLazyConnection() throws IOException { if (openMode.abstractOpenMode != AbstractOpenMode.Lazy) { createDelegateConnection(); diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/FileConnections.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/FileConnections.java index 0c02b5bbe9c8164f46e13be57d5b16e3ef3aeccb..0d9d532e6b8067c4cd06176db2782bde0f2c6513 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/FileConnections.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/FileConnections.java @@ -24,6 +24,7 @@ package com.oracle.truffle.r.runtime.conn; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; +import java.io.ByteArrayInputStream; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; @@ -33,6 +34,8 @@ import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.util.zip.GZIPInputStream; +import org.tukaani.xz.XZInputStream; + import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.r.runtime.RCompression; import com.oracle.truffle.r.runtime.RError; @@ -110,7 +113,15 @@ public class FileConnections { inputStream = new BufferedInputStream(new FileInputStream(base.path)); break; case GZIP: - inputStream = new GZIPInputStream(new FileInputStream(base.path), GZIPConnections.GZIP_BUFFER_SIZE); + inputStream = new GZIPInputStream(new FileInputStream(base.path), CompressedConnections.GZIP_BUFFER_SIZE); + break; + case BZIP2: + // no in Java support, so go via byte array + byte[] bzipUdata = RCompression.bzipUncompressFromFile(base.path); + inputStream = new ByteArrayInputStream(bzipUdata); + break; + case XZ: + inputStream = new XZInputStream(new FileInputStream(base.path)); break; default: throw RError.nyi(RError.SHOW_CALLER2, "compression type: " + cType.name()); diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test index 576bd6515b2c3733527ceff50539b67f3102739d..bef395ff97c3f3532232a6fe9a6497723cbd4646 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test @@ -55194,6 +55194,66 @@ attr(,"id") attr(,"id") [1] "An Example" +##com.oracle.truffle.r.test.builtins.TestBuiltin_zzfile.test1# +#{ f <- tempfile(); c <- bzfile(f); writeLines(as.character(1:100), c); close(c); readLines(f) } + [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" + [13] "13" "14" "15" "16" "17" "18" "19" "20" "21" "22" "23" "24" + [25] "25" "26" "27" "28" "29" "30" "31" "32" "33" "34" "35" "36" + [37] "37" "38" "39" "40" "41" "42" "43" "44" "45" "46" "47" "48" + [49] "49" "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" "60" + [61] "61" "62" "63" "64" "65" "66" "67" "68" "69" "70" "71" "72" + [73] "73" "74" "75" "76" "77" "78" "79" "80" "81" "82" "83" "84" + [85] "85" "86" "87" "88" "89" "90" "91" "92" "93" "94" "95" "96" + [97] "97" "98" "99" "100" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_zzfile.test1# +#{ f <- tempfile(); c <- gzfile(f); writeLines(as.character(1:100), c); close(c); readLines(f) } + [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" + [13] "13" "14" "15" "16" "17" "18" "19" "20" "21" "22" "23" "24" + [25] "25" "26" "27" "28" "29" "30" "31" "32" "33" "34" "35" "36" + [37] "37" "38" "39" "40" "41" "42" "43" "44" "45" "46" "47" "48" + [49] "49" "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" "60" + [61] "61" "62" "63" "64" "65" "66" "67" "68" "69" "70" "71" "72" + [73] "73" "74" "75" "76" "77" "78" "79" "80" "81" "82" "83" "84" + [85] "85" "86" "87" "88" "89" "90" "91" "92" "93" "94" "95" "96" + [97] "97" "98" "99" "100" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_zzfile.test1# +#{ f <- tempfile(); c <- xzfile(f); writeLines(as.character(1:100), c); close(c); readLines(f) } + [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" + [13] "13" "14" "15" "16" "17" "18" "19" "20" "21" "22" "23" "24" + [25] "25" "26" "27" "28" "29" "30" "31" "32" "33" "34" "35" "36" + [37] "37" "38" "39" "40" "41" "42" "43" "44" "45" "46" "47" "48" + [49] "49" "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" "60" + [61] "61" "62" "63" "64" "65" "66" "67" "68" "69" "70" "71" "72" + [73] "73" "74" "75" "76" "77" "78" "79" "80" "81" "82" "83" "84" + [85] "85" "86" "87" "88" "89" "90" "91" "92" "93" "94" "95" "96" + [97] "97" "98" "99" "100" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_zzfile.test2# +#{ f <- tempfile(); c <- bzfile(f); writeLines(as.character(1:50), c); close(c); c <- bzfile(f, "a"); writeLines(as.character(51:70), c); close(c); readLines(f) } + [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15" +[16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30" +[31] "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42" "43" "44" "45" +[46] "46" "47" "48" "49" "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" "60" +[61] "61" "62" "63" "64" "65" "66" "67" "68" "69" "70" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_zzfile.test2# +#{ f <- tempfile(); c <- gzfile(f); writeLines(as.character(1:50), c); close(c); c <- gzfile(f, "a"); writeLines(as.character(51:70), c); close(c); readLines(f) } + [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15" +[16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30" +[31] "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42" "43" "44" "45" +[46] "46" "47" "48" "49" "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" "60" +[61] "61" "62" "63" "64" "65" "66" "67" "68" "69" "70" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_zzfile.test2# +#{ f <- tempfile(); c <- xzfile(f); writeLines(as.character(1:50), c); close(c); c <- xzfile(f, "a"); writeLines(as.character(51:70), c); close(c); readLines(f) } + [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15" +[16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30" +[31] "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42" "43" "44" "45" +[46] "46" "47" "48" "49" "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" "60" +[61] "61" "62" "63" "64" "65" "66" "67" "68" "69" "70" + ##com.oracle.truffle.r.test.builtins.TestMiscBuiltins.testArrayConstructors# #{ character(1L) } [1] "" diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_zzfile.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_zzfile.java new file mode 100644 index 0000000000000000000000000000000000000000..b3db1793f86d90d0b93a8e558cda4078154e9e1b --- /dev/null +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_zzfile.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package com.oracle.truffle.r.test.builtins; + +import org.junit.Test; + +import com.oracle.truffle.r.test.TestBase; + +public class TestBuiltin_zzfile extends TestBase { + private static final String[] CTYPES = new String[]{"g", "b", "x"}; + + @Test + public void test1() { + assertEval(TestBase.template("{ f <- tempfile(); c <- %0zfile(f); writeLines(as.character(1:100), c); close(c); readLines(f) }", CTYPES)); + } + + @Test + public void test2() { + assertEval(TestBase.template( + "{ f <- tempfile(); c <- %0zfile(f); writeLines(as.character(1:50), c); close(c); c <- %0zfile(f, \"a\"); writeLines(as.character(51:70), c); close(c); readLines(f) }", + CTYPES)); + } + +} diff --git a/mx.fastr/suite.py b/mx.fastr/suite.py index a27f13a384330cec954bb79607a2f9ddc5825583..725759b14b7099b48061e32f878c82a1c1979123 100644 --- a/mx.fastr/suite.py +++ b/mx.fastr/suite.py @@ -142,7 +142,6 @@ suite = { "sourceDirs" : ["src"], "dependencies" : [ "com.oracle.truffle.r.library", - "XZ-1.5" ], "checkstyle" : "com.oracle.truffle.r.runtime", "javaCompliance" : "1.8", @@ -217,6 +216,7 @@ suite = { "dependencies" : [ "truffle:TRUFFLE_API", "truffle:TRUFFLE_DEBUG", + "XZ-1.5", ], "checkstyle" : "com.oracle.truffle.r.runtime", "javaCompliance" : "1.8", @@ -311,6 +311,7 @@ suite = { "ANTLR-3.5", "GNUR", "GNU_ICONV", + "XZ-1.5", ], "distDependencies" : [ "truffle:TRUFFLE_API",