Skip to content
Snippets Groups Projects
Commit abbc94d1 authored by Florian Angerer's avatar Florian Angerer
Browse files

Fixed serious bug during decoding: Did not properly use the decoder when...

Fixed serious bug during decoding: Did not properly use the decoder when reading single 'elements' from a connection using method 'getc'. An element is either a character or a byte depending on the connection mode text or binary, respectively.
Now, a stream decoder is created on demand and used for any subsequent character-based reading.
As in GnuR, mixing binary- and text-based reading and writing leads to undefined behavior since the decoder may already have consumed some bytes.
parent 14b00d34
No related branches found
No related tags found
No related merge requests found
......@@ -1061,7 +1061,7 @@ public class ConnectionSupport {
if (conn instanceof BaseRConnection) {
return (BaseRConnection) conn;
} else if (conn instanceof DelegateReadRConnection) {
return ((DelegateReadRConnection) conn).base;
return ((DelegateRConnection) conn).base;
} else {
throw RInternalError.shouldNotReachHere();
}
......
......@@ -30,6 +30,8 @@ import java.nio.channels.ReadableByteChannel;
import java.nio.channels.SeekableByteChannel;
import java.nio.channels.WritableByteChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.ArrayList;
import java.util.Objects;
......@@ -52,6 +54,7 @@ import sun.nio.cs.StreamDecoder;
*/
abstract class DelegateRConnection implements RConnection {
protected final BaseRConnection base;
private StreamDecoder decoder;
DelegateRConnection(BaseRConnection base) {
this.base = Objects.requireNonNull(base);
......@@ -275,34 +278,54 @@ abstract class DelegateRConnection implements RConnection {
return new String(chars, 0, j);
}
public static String readCharHelper(int nchars, ReadableByteChannel channel, boolean useBytes) throws IOException {
if (useBytes) {
ByteBuffer buf = ByteBuffer.allocate(nchars);
channel.read(buf);
int j = 0;
for (; j < buf.position(); j++) {
// strings end at 0
if (buf.get(j) == 0) {
break;
}
/**
* Reads a specified number of single-byte characters.<br>
* <p>
* This method is meant to be used if R's function {@code readChar} is called with parameter
* {@code useBytes=TRUE}.
* </p>
*
* @param nchars The number of single-byte characters to read.
* @param channel The channel to read from (must not be {@code null}).
* @throws IOException
*/
public static String readCharHelper(int nchars, ReadableByteChannel channel) throws IOException {
ByteBuffer buf = ByteBuffer.allocate(nchars);
channel.read(buf);
int j = 0;
for (; j < buf.position(); j++) {
// strings end at 0
if (buf.get(j) == 0) {
break;
}
}
return new String(buf.array(), 0, j);
} else {
// we need a decoder
StreamDecoder decoder = StreamDecoder.forDecoder(channel, Charset.defaultCharset().newDecoder(), nchars);
char[] chars = new char[nchars];
decoder.read(chars);
int j = 0;
for (; j < chars.length; j++) {
// strings end at 0
if (chars[j] == 0) {
break;
}
}
return new String(buf.array(), 0, j);
}
return new String(chars, 0, j);
/**
* Reads a specified number of characters (not bytes).<br>
* <p>
* This method is meant to be used if R's function {@code readChar} is called with parameter
* {@code useBytes=FALSE}.
* </p>
*
* @param nchars The number of characters to read.
* @param decoder The stream decoder to use (must not be {@code null}).
* @throws IOException
*/
public static String readCharHelper(int nchars, StreamDecoder decoder) throws IOException {
// we need a decoder
char[] chars = new char[nchars];
decoder.read(chars);
int j = 0;
for (; j < chars.length; j++) {
// strings end at 0
if (chars[j] == 0) {
break;
}
}
return new String(chars, 0, j);
}
/**
......@@ -355,4 +378,15 @@ abstract class DelegateRConnection implements RConnection {
public void pushBack(RAbstractStringVector lines, boolean addNewLine) {
throw RInternalError.shouldNotReachHere();
}
/**
* Creates the stream decoder on demand and returns it.
*/
protected StreamDecoder getDecoder() throws IOException {
if (decoder == null) {
CharsetDecoder charsetEncoder = base.getEncoding().newDecoder().onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE);
decoder = StreamDecoder.forDecoder(getChannel(), charsetEncoder, -1);
}
return decoder;
}
}
......@@ -63,15 +63,23 @@ public abstract class DelegateReadRConnection extends DelegateRConnection {
@Override
public int getc() throws IOException {
tmp.clear();
int nread = getChannel().read(tmp);
tmp.rewind();
return nread > 0 ? tmp.get() : -1;
if (isTextMode()) {
return getDecoder().read();
} else {
tmp.clear();
int nread = getChannel().read(tmp);
tmp.rewind();
return nread > 0 ? tmp.get() : -1;
}
}
@Override
public String readChar(int nchars, boolean useBytes) throws IOException {
return DelegateRConnection.readCharHelper(nchars, getChannel(), useBytes);
if (useBytes) {
return DelegateRConnection.readCharHelper(nchars, getChannel());
} else {
return DelegateRConnection.readCharHelper(nchars, getDecoder());
}
}
@Override
......
......@@ -59,7 +59,11 @@ abstract class DelegateReadWriteRConnection extends DelegateRConnection {
@Override
public String readChar(int nchars, boolean useBytes) throws IOException {
return DelegateRConnection.readCharHelper(nchars, getChannel(), useBytes);
if (useBytes) {
return DelegateRConnection.readCharHelper(nchars, getChannel());
} else {
return DelegateRConnection.readCharHelper(nchars, getDecoder());
}
}
@Override
......
......@@ -43,12 +43,23 @@ public interface RConnection extends AutoCloseable {
}
/**
* Return the underlying input stream (for internal use).
* Return the underlying input stream (for internal use).<br>
* <p>
* <b>NOTE:</b> The connection may do some caching internally! Therefore, the behavior is
* undefined if you mix using the input stream directly and using the read methods of the
* connection.
* </p>
*
*/
InputStream getInputStream() throws IOException;
/**
* Return the underlying output stream (for internal use).
* Return the underlying output stream (for internal use).<br>
* <p>
* <b>NOTE:</b> The connection may do some caching internally! Therefore, the behavior is
* undefined if you mix using the output stream directly and using the write methods of the
* connection.
* </p>
*/
OutputStream getOutputStream() throws IOException;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment