From abbc94d159eaeb1e17b7676ec021a8e59db81183 Mon Sep 17 00:00:00 2001
From: Florian Angerer <florian.angerer@oracle.com>
Date: Wed, 1 Mar 2017 13:53:24 +0100
Subject: [PATCH] Fixed serious bug during decoding: Did not properly use the
 decoder when reading single 'elements' from a connection using method 'getc'.
 An element is either a character or a byte depending on the connection mode
 text or binary, respectively. Now, a stream decoder is created on demand and
 used for any subsequent character-based reading. As in GnuR, mixing binary-
 and text-based reading and writing leads to undefined behavior since the
 decoder may already have consumed some bytes.

---
 .../r/runtime/conn/ConnectionSupport.java     |  2 +-
 .../r/runtime/conn/DelegateRConnection.java   | 82 +++++++++++++------
 .../runtime/conn/DelegateReadRConnection.java | 18 ++--
 .../conn/DelegateReadWriteRConnection.java    |  6 +-
 .../truffle/r/runtime/conn/RConnection.java   | 15 +++-
 5 files changed, 90 insertions(+), 33 deletions(-)

diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java
index b772be488a..3e0941dce2 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java
@@ -1061,7 +1061,7 @@ public class ConnectionSupport {
         if (conn instanceof BaseRConnection) {
             return (BaseRConnection) conn;
         } else if (conn instanceof DelegateReadRConnection) {
-            return ((DelegateReadRConnection) conn).base;
+            return ((DelegateRConnection) conn).base;
         } else {
             throw RInternalError.shouldNotReachHere();
         }
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateRConnection.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateRConnection.java
index 91359d131f..0139c2e9a7 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateRConnection.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateRConnection.java
@@ -30,6 +30,8 @@ import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.SeekableByteChannel;
 import java.nio.channels.WritableByteChannel;
 import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
 import java.util.ArrayList;
 import java.util.Objects;
 
@@ -52,6 +54,7 @@ import sun.nio.cs.StreamDecoder;
  */
 abstract class DelegateRConnection implements RConnection {
     protected final BaseRConnection base;
+    private StreamDecoder decoder;
 
     DelegateRConnection(BaseRConnection base) {
         this.base = Objects.requireNonNull(base);
@@ -275,34 +278,54 @@ abstract class DelegateRConnection implements RConnection {
         return new String(chars, 0, j);
     }
 
-    public static String readCharHelper(int nchars, ReadableByteChannel channel, boolean useBytes) throws IOException {
-        if (useBytes) {
-            ByteBuffer buf = ByteBuffer.allocate(nchars);
-            channel.read(buf);
-            int j = 0;
-            for (; j < buf.position(); j++) {
-                // strings end at 0
-                if (buf.get(j) == 0) {
-                    break;
-                }
+    /**
+     * Reads a specified number of single-byte characters.<br>
+     * <p>
+     * This method is meant to be used if R's function {@code readChar} is called with parameter
+     * {@code useBytes=TRUE}.
+     * </p>
+     *
+     * @param nchars The number of single-byte characters to read.
+     * @param channel The channel to read from (must not be {@code null}).
+     * @throws IOException
+     */
+    public static String readCharHelper(int nchars, ReadableByteChannel channel) throws IOException {
+        ByteBuffer buf = ByteBuffer.allocate(nchars);
+        channel.read(buf);
+        int j = 0;
+        for (; j < buf.position(); j++) {
+            // strings end at 0
+            if (buf.get(j) == 0) {
+                break;
             }
+        }
 
-            return new String(buf.array(), 0, j);
-        } else {
-            // we need a decoder
-            StreamDecoder decoder = StreamDecoder.forDecoder(channel, Charset.defaultCharset().newDecoder(), nchars);
-            char[] chars = new char[nchars];
-            decoder.read(chars);
-            int j = 0;
-            for (; j < chars.length; j++) {
-                // strings end at 0
-                if (chars[j] == 0) {
-                    break;
-                }
-            }
+        return new String(buf.array(), 0, j);
+    }
 
-            return new String(chars, 0, j);
+    /**
+     * Reads a specified number of characters (not bytes).<br>
+     * <p>
+     * This method is meant to be used if R's function {@code readChar} is called with parameter
+     * {@code useBytes=FALSE}.
+     * </p>
+     *
+     * @param nchars The number of characters to read.
+     * @param decoder The stream decoder to use (must not be {@code null}).
+     * @throws IOException
+     */
+    public static String readCharHelper(int nchars, StreamDecoder decoder) throws IOException {
+        // we need a decoder
+        char[] chars = new char[nchars];
+        decoder.read(chars);
+        int j = 0;
+        for (; j < chars.length; j++) {
+            // strings end at 0
+            if (chars[j] == 0) {
+                break;
+            }
         }
+        return new String(chars, 0, j);
     }
 
     /**
@@ -355,4 +378,15 @@ abstract class DelegateRConnection implements RConnection {
     public void pushBack(RAbstractStringVector lines, boolean addNewLine) {
         throw RInternalError.shouldNotReachHere();
     }
+
+    /**
+     * Creates the stream decoder on demand and returns it.
+     */
+    protected StreamDecoder getDecoder() throws IOException {
+        if (decoder == null) {
+            CharsetDecoder charsetEncoder = base.getEncoding().newDecoder().onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE);
+            decoder = StreamDecoder.forDecoder(getChannel(), charsetEncoder, -1);
+        }
+        return decoder;
+    }
 }
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateReadRConnection.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateReadRConnection.java
index d317847217..70a2bac8df 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateReadRConnection.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateReadRConnection.java
@@ -63,15 +63,23 @@ public abstract class DelegateReadRConnection extends DelegateRConnection {
 
     @Override
     public int getc() throws IOException {
-        tmp.clear();
-        int nread = getChannel().read(tmp);
-        tmp.rewind();
-        return nread > 0 ? tmp.get() : -1;
+        if (isTextMode()) {
+            return getDecoder().read();
+        } else {
+            tmp.clear();
+            int nread = getChannel().read(tmp);
+            tmp.rewind();
+            return nread > 0 ? tmp.get() : -1;
+        }
     }
 
     @Override
     public String readChar(int nchars, boolean useBytes) throws IOException {
-        return DelegateRConnection.readCharHelper(nchars, getChannel(), useBytes);
+        if (useBytes) {
+            return DelegateRConnection.readCharHelper(nchars, getChannel());
+        } else {
+            return DelegateRConnection.readCharHelper(nchars, getDecoder());
+        }
     }
 
     @Override
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateReadWriteRConnection.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateReadWriteRConnection.java
index 664f74183e..011cf49d1b 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateReadWriteRConnection.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/DelegateReadWriteRConnection.java
@@ -59,7 +59,11 @@ abstract class DelegateReadWriteRConnection extends DelegateRConnection {
 
     @Override
     public String readChar(int nchars, boolean useBytes) throws IOException {
-        return DelegateRConnection.readCharHelper(nchars, getChannel(), useBytes);
+        if (useBytes) {
+            return DelegateRConnection.readCharHelper(nchars, getChannel());
+        } else {
+            return DelegateRConnection.readCharHelper(nchars, getDecoder());
+        }
     }
 
     @Override
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/RConnection.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/RConnection.java
index 761e543d6c..341d12e3ba 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/RConnection.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/RConnection.java
@@ -43,12 +43,23 @@ public interface RConnection extends AutoCloseable {
     }
 
     /**
-     * Return the underlying input stream (for internal use).
+     * Return the underlying input stream (for internal use).<br>
+     * <p>
+     * <b>NOTE:</b> The connection may do some caching internally! Therefore, the behavior is
+     * undefined if you mix using the input stream directly and using the read methods of the
+     * connection.
+     * </p>
+     *
      */
     InputStream getInputStream() throws IOException;
 
     /**
-     * Return the underlying output stream (for internal use).
+     * Return the underlying output stream (for internal use).<br>
+     * <p>
+     * <b>NOTE:</b> The connection may do some caching internally! Therefore, the behavior is
+     * undefined if you mix using the output stream directly and using the write methods of the
+     * connection.
+     * </p>
      */
     OutputStream getOutputStream() throws IOException;
 
-- 
GitLab