From abbc94d159eaeb1e17b7676ec021a8e59db81183 Mon Sep 17 00:00:00 2001 From: Florian Angerer <florian.angerer@oracle.com> Date: Wed, 1 Mar 2017 13:53:24 +0100 Subject: [PATCH] Fixed serious bug during decoding: Did not properly use the decoder when reading single 'elements' from a connection using method 'getc'. An element is either a character or a byte depending on the connection mode text or binary, respectively. Now, a stream decoder is created on demand and used for any subsequent character-based reading. As in GnuR, mixing binary- and text-based reading and writing leads to undefined behavior since the decoder may already have consumed some bytes. --- .../r/runtime/conn/ConnectionSupport.java | 2 +- .../r/runtime/conn/DelegateRConnection.java | 82 +++++++++++++------ .../runtime/conn/DelegateReadRConnection.java | 18 ++-- .../conn/DelegateReadWriteRConnection.java | 6 +- .../truffle/r/runtime/conn/RConnection.java | 15 +++- 5 files changed, 90 insertions(+), 33 deletions(-) diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java index b772be488a..3e0941dce2 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java @@ -1061,7 +1061,7 @@ public class ConnectionSupport { if (conn instanceof BaseRConnection) { return (BaseRConnection) conn; } else if (conn instanceof DelegateReadRConnection) { - return ((DelegateReadRConnection) conn).base; + return ((DelegateRConnection) conn).base; } else { throw RInternalError.shouldNotReachHere(); } diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateRConnection.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateRConnection.java index 91359d131f..0139c2e9a7 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateRConnection.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateRConnection.java @@ -30,6 +30,8 @@ import java.nio.channels.ReadableByteChannel; import java.nio.channels.SeekableByteChannel; import java.nio.channels.WritableByteChannel; import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; import java.util.ArrayList; import java.util.Objects; @@ -52,6 +54,7 @@ import sun.nio.cs.StreamDecoder; */ abstract class DelegateRConnection implements RConnection { protected final BaseRConnection base; + private StreamDecoder decoder; DelegateRConnection(BaseRConnection base) { this.base = Objects.requireNonNull(base); @@ -275,34 +278,54 @@ abstract class DelegateRConnection implements RConnection { return new String(chars, 0, j); } - public static String readCharHelper(int nchars, ReadableByteChannel channel, boolean useBytes) throws IOException { - if (useBytes) { - ByteBuffer buf = ByteBuffer.allocate(nchars); - channel.read(buf); - int j = 0; - for (; j < buf.position(); j++) { - // strings end at 0 - if (buf.get(j) == 0) { - break; - } + /** + * Reads a specified number of single-byte characters.<br> + * <p> + * This method is meant to be used if R's function {@code readChar} is called with parameter + * {@code useBytes=TRUE}. + * </p> + * + * @param nchars The number of single-byte characters to read. + * @param channel The channel to read from (must not be {@code null}). + * @throws IOException + */ + public static String readCharHelper(int nchars, ReadableByteChannel channel) throws IOException { + ByteBuffer buf = ByteBuffer.allocate(nchars); + channel.read(buf); + int j = 0; + for (; j < buf.position(); j++) { + // strings end at 0 + if (buf.get(j) == 0) { + break; } + } - return new String(buf.array(), 0, j); - } else { - // we need a decoder - StreamDecoder decoder = StreamDecoder.forDecoder(channel, Charset.defaultCharset().newDecoder(), nchars); - char[] chars = new char[nchars]; - decoder.read(chars); - int j = 0; - for (; j < chars.length; j++) { - // strings end at 0 - if (chars[j] == 0) { - break; - } - } + return new String(buf.array(), 0, j); + } - return new String(chars, 0, j); + /** + * Reads a specified number of characters (not bytes).<br> + * <p> + * This method is meant to be used if R's function {@code readChar} is called with parameter + * {@code useBytes=FALSE}. + * </p> + * + * @param nchars The number of characters to read. + * @param decoder The stream decoder to use (must not be {@code null}). + * @throws IOException + */ + public static String readCharHelper(int nchars, StreamDecoder decoder) throws IOException { + // we need a decoder + char[] chars = new char[nchars]; + decoder.read(chars); + int j = 0; + for (; j < chars.length; j++) { + // strings end at 0 + if (chars[j] == 0) { + break; + } } + return new String(chars, 0, j); } /** @@ -355,4 +378,15 @@ abstract class DelegateRConnection implements RConnection { public void pushBack(RAbstractStringVector lines, boolean addNewLine) { throw RInternalError.shouldNotReachHere(); } + + /** + * Creates the stream decoder on demand and returns it. + */ + protected StreamDecoder getDecoder() throws IOException { + if (decoder == null) { + CharsetDecoder charsetEncoder = base.getEncoding().newDecoder().onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE); + decoder = StreamDecoder.forDecoder(getChannel(), charsetEncoder, -1); + } + return decoder; + } } diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateReadRConnection.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateReadRConnection.java index d317847217..70a2bac8df 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateReadRConnection.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateReadRConnection.java @@ -63,15 +63,23 @@ public abstract class DelegateReadRConnection extends DelegateRConnection { @Override public int getc() throws IOException { - tmp.clear(); - int nread = getChannel().read(tmp); - tmp.rewind(); - return nread > 0 ? tmp.get() : -1; + if (isTextMode()) { + return getDecoder().read(); + } else { + tmp.clear(); + int nread = getChannel().read(tmp); + tmp.rewind(); + return nread > 0 ? tmp.get() : -1; + } } @Override public String readChar(int nchars, boolean useBytes) throws IOException { - return DelegateRConnection.readCharHelper(nchars, getChannel(), useBytes); + if (useBytes) { + return DelegateRConnection.readCharHelper(nchars, getChannel()); + } else { + return DelegateRConnection.readCharHelper(nchars, getDecoder()); + } } @Override diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateReadWriteRConnection.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateReadWriteRConnection.java index 664f74183e..011cf49d1b 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateReadWriteRConnection.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateReadWriteRConnection.java @@ -59,7 +59,11 @@ abstract class DelegateReadWriteRConnection extends DelegateRConnection { @Override public String readChar(int nchars, boolean useBytes) throws IOException { - return DelegateRConnection.readCharHelper(nchars, getChannel(), useBytes); + if (useBytes) { + return DelegateRConnection.readCharHelper(nchars, getChannel()); + } else { + return DelegateRConnection.readCharHelper(nchars, getDecoder()); + } } @Override diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/RConnection.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/RConnection.java index 761e543d6c..341d12e3ba 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/RConnection.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/RConnection.java @@ -43,12 +43,23 @@ public interface RConnection extends AutoCloseable { } /** - * Return the underlying input stream (for internal use). + * Return the underlying input stream (for internal use).<br> + * <p> + * <b>NOTE:</b> The connection may do some caching internally! Therefore, the behavior is + * undefined if you mix using the input stream directly and using the read methods of the + * connection. + * </p> + * */ InputStream getInputStream() throws IOException; /** - * Return the underlying output stream (for internal use). + * Return the underlying output stream (for internal use).<br> + * <p> + * <b>NOTE:</b> The connection may do some caching internally! Therefore, the behavior is + * undefined if you mix using the output stream directly and using the write methods of the + * connection. + * </p> */ OutputStream getOutputStream() throws IOException; -- GitLab