initial support for split internal

6d184801 · Michael Haupt · dfc82308 · 6d184801 · 6d184801
Commit 6d184801 authored 10 years ago by Michael Haupt
--- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/R/split.R
+++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/R/split.R
+#  File src/library/base/R/split.R
+#  Part of the R package, http://www.R-project.org
+#
+#  Copyright (C) 1995-2013 The R Core Team
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  A copy of the GNU General Public License is available at
+#  http://www.r-project.org/Licenses/
+
+split <- function(x, f, drop = FALSE, ...) UseMethod("split")
+
+split.default <- function(x, f, drop = FALSE, sep = ".", ...)
+{
+    if(!missing(...)) .NotYetUsed(deparse(...), error = FALSE)
+    
+    if (is.list(f)) f <- interaction(f, drop = drop, sep = sep)
+    else if (!is.factor(f)) f <- as.factor(f) # docs say as.factor
+    else if (drop) f <- factor(f) # drop extraneous levels
+    storage.mode(f) <- "integer"  # some factors have had double in the past
+    if (is.null(attr(x, "class")))
+        return(.Internal(split(x, f)))
+    ## else
+    lf <- levels(f)
+    y <- vector("list", length(lf))
+    names(y) <- lf
+    ind <- .Internal(split(seq_along(x), f))
+    for(k in lf) y[[k]] <- x[ind[[k]]]
+    y
+}
+
+## This is documented to work for matrices too
+split.data.frame <- function(x, f, drop = FALSE, ...)
+    lapply(split(x = seq_len(nrow(x)), f = f, drop = drop, ...),
+            function(ind) x[ind, , drop = FALSE])
+
+#`split<-` <- function(x, f, drop = FALSE, ..., value) UseMethod("split<-")
+
+#`split<-.default` <- function(x, f, drop = FALSE, ..., value)
+#{
+#    ix <- split(seq_along(x), f, drop = drop, ...)
+#    n <- length(value)
+#    j <- 0
+#    for (i in ix) {
+#        j <- j %% n + 1
+#        x[i] <- value[[j]]
+#    }
+#    x
+#}
+
+## This is documented to work for matrices too
+#`split<-.data.frame` <- function(x, f, drop = FALSE, ..., value)
+#{
+#    ix <- split(seq_len(nrow(x)), f, drop = drop, ...)
+#    n <- length(value)
+#    j <- 0
+#    for (i in ix) {
+#        j <- j %% n + 1
+#        x[i,] <- value[[j]]
+#    }
+#    x
+#}
+
+#unsplit <- function (value, f, drop = FALSE)
+#{
+#    len <- length(if (is.list(f)) f[[1L]] else f)
+#    if (is.data.frame(value[[1L]])) {
+#        x <- value[[1L]][rep(NA, len),, drop = FALSE]
+#        rownames(x) <- unsplit(lapply(value, rownames), f, drop = drop)
+#    } else
+#        x <- value[[1L]][rep(NA, len)]
+#    split(x, f, drop = drop) <- value
+#    x
+#}
--- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Split.java
+++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Split.java
+/*
+ * Copyright (c) 2014, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.truffle.r.nodes.builtin.base;
+
+import static com.oracle.truffle.r.runtime.RBuiltinKind.*;
+
+import java.util.*;
+
+import com.oracle.truffle.api.CompilerDirectives.*;
+import com.oracle.truffle.api.dsl.*;
+import com.oracle.truffle.r.nodes.builtin.*;
+import com.oracle.truffle.r.runtime.*;
+import com.oracle.truffle.r.runtime.data.*;
+import com.oracle.truffle.r.runtime.data.model.*;
+
+@RBuiltin(name = "split", kind = INTERNAL, parameterNames = {"x", "f"})
+public abstract class Split extends RBuiltinNode {
+
+    private static final int INITIAL_SIZE = 5;
+    private static final int SCALE_FACTOR = 2;
+
+    @Specialization
+    protected RList split(RAbstractIntVector x, RFactor f) {
+        HashMap<Object, Integer> levelMapping = getLevelMapping(f);
+        final int nLevels = levelMapping.size();
+
+        // initialise result arrays
+        int[][] collectResults = new int[nLevels][];
+        int[] collectResultSize = new int[nLevels];
+        for (int i = 0; i < collectResults.length; ++i) {
+            collectResults[i] = new int[INITIAL_SIZE];
+        }
+
+        // perform split
+        for (int i = 0, fi = 0; i < x.getLength(); ++i, fi = Utils.incMod(fi, f.getLength())) {
+            int resultIndex = levelMapping.get(f.getDataAtAsObject(fi));
+            int[] collect = collectResults[resultIndex];
+            if (collect.length == collectResultSize[resultIndex]) {
+                collectResults[resultIndex] = Arrays.copyOf(collect, collect.length * SCALE_FACTOR);
+                collect = collectResults[resultIndex];
+            }
+            collect[collectResultSize[resultIndex]++] = x.getDataAt(i);
+        }
+
+        // assemble result vectors and level names
+        String[] names = new String[nLevels];
+        Object[] results = new Object[nLevels];
+        for (HashMap.Entry<Object, Integer> e : levelMapping.entrySet()) {
+            Object level = e.getKey();
+            int index = e.getValue();
+            names[index] = RRuntime.toString(level);
+            results[index] = RDataFactory.createIntVector(Arrays.copyOfRange(collectResults[index], 0, collectResultSize[index]), RDataFactory.COMPLETE_VECTOR);
+        }
+
+        return RDataFactory.createList(results, RDataFactory.createStringVector(names, RDataFactory.COMPLETE_VECTOR));
+    }
+
+    @Specialization
+    protected RList split(RAbstractDoubleVector x, RFactor f) {
+        HashMap<Object, Integer> levelMapping = getLevelMapping(f);
+        final int nLevels = levelMapping.size();
+
+        // initialise result arrays
+        double[][] collectResults = new double[nLevels][];
+        int[] collectResultSize = new int[nLevels];
+        for (int i = 0; i < collectResults.length; ++i) {
+            collectResults[i] = new double[INITIAL_SIZE];
+        }
+
+        // perform split
+        for (int i = 0, fi = 0; i < x.getLength(); ++i, fi = Utils.incMod(fi, f.getLength())) {
+            int resultIndex = levelMapping.get(f.getDataAtAsObject(fi));
+            double[] collect = collectResults[resultIndex];
+            if (collect.length == collectResultSize[resultIndex]) {
+                collectResults[resultIndex] = Arrays.copyOf(collect, collect.length * SCALE_FACTOR);
+                collect = collectResults[resultIndex];
+            }
+            collect[collectResultSize[resultIndex]++] = x.getDataAt(i);
+        }
+
+        // assemble result vectors and level names
+        String[] names = new String[nLevels];
+        Object[] results = new Object[nLevels];
+        for (HashMap.Entry<Object, Integer> e : levelMapping.entrySet()) {
+            Object level = e.getKey();
+            int index = e.getValue();
+            names[index] = RRuntime.toString(level);
+            results[index] = RDataFactory.createDoubleVector(Arrays.copyOfRange(collectResults[index], 0, collectResultSize[index]), RDataFactory.COMPLETE_VECTOR);
+        }
+
+        return RDataFactory.createList(results, RDataFactory.createStringVector(names, RDataFactory.COMPLETE_VECTOR));
+    }
+
+    /**
+     * Obtain a mapping from factor levels to integers, in the order of appearance of the levels in
+     * the factor.
+     */
+    @TruffleBoundary
+    private static HashMap<Object, Integer> getLevelMapping(RFactor f) {
+        HashMap<Object, Integer> map = new HashMap<>();
+        int lastIndex = 0;
+        for (int i = 0; i < f.getLength(); ++i) {
+            Object level = f.getDataAtAsObject(i);
+            if (!map.containsKey(level)) {
+                map.put(level, lastIndex++);
+            }
+        }
+        return map;
+    }
+
+}