Skip to content
Snippets Groups Projects
Commit 6d184801 authored by Michael Haupt's avatar Michael Haupt
Browse files

initial support for split internal

parent dfc82308
Branches
No related tags found
No related merge requests found
# File src/library/base/R/split.R
# Part of the R package, http://www.R-project.org
#
# Copyright (C) 1995-2013 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# http://www.r-project.org/Licenses/
split <- function(x, f, drop = FALSE, ...) UseMethod("split")
split.default <- function(x, f, drop = FALSE, sep = ".", ...)
{
if(!missing(...)) .NotYetUsed(deparse(...), error = FALSE)
if (is.list(f)) f <- interaction(f, drop = drop, sep = sep)
else if (!is.factor(f)) f <- as.factor(f) # docs say as.factor
else if (drop) f <- factor(f) # drop extraneous levels
storage.mode(f) <- "integer" # some factors have had double in the past
if (is.null(attr(x, "class")))
return(.Internal(split(x, f)))
## else
lf <- levels(f)
y <- vector("list", length(lf))
names(y) <- lf
ind <- .Internal(split(seq_along(x), f))
for(k in lf) y[[k]] <- x[ind[[k]]]
y
}
## This is documented to work for matrices too
split.data.frame <- function(x, f, drop = FALSE, ...)
lapply(split(x = seq_len(nrow(x)), f = f, drop = drop, ...),
function(ind) x[ind, , drop = FALSE])
#`split<-` <- function(x, f, drop = FALSE, ..., value) UseMethod("split<-")
#`split<-.default` <- function(x, f, drop = FALSE, ..., value)
#{
# ix <- split(seq_along(x), f, drop = drop, ...)
# n <- length(value)
# j <- 0
# for (i in ix) {
# j <- j %% n + 1
# x[i] <- value[[j]]
# }
# x
#}
## This is documented to work for matrices too
#`split<-.data.frame` <- function(x, f, drop = FALSE, ..., value)
#{
# ix <- split(seq_len(nrow(x)), f, drop = drop, ...)
# n <- length(value)
# j <- 0
# for (i in ix) {
# j <- j %% n + 1
# x[i,] <- value[[j]]
# }
# x
#}
#unsplit <- function (value, f, drop = FALSE)
#{
# len <- length(if (is.list(f)) f[[1L]] else f)
# if (is.data.frame(value[[1L]])) {
# x <- value[[1L]][rep(NA, len),, drop = FALSE]
# rownames(x) <- unsplit(lapply(value, rownames), f, drop = drop)
# } else
# x <- value[[1L]][rep(NA, len)]
# split(x, f, drop = drop) <- value
# x
#}
/*
* Copyright (c) 2014, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.oracle.truffle.r.nodes.builtin.base;
import static com.oracle.truffle.r.runtime.RBuiltinKind.*;
import java.util.*;
import com.oracle.truffle.api.CompilerDirectives.*;
import com.oracle.truffle.api.dsl.*;
import com.oracle.truffle.r.nodes.builtin.*;
import com.oracle.truffle.r.runtime.*;
import com.oracle.truffle.r.runtime.data.*;
import com.oracle.truffle.r.runtime.data.model.*;
@RBuiltin(name = "split", kind = INTERNAL, parameterNames = {"x", "f"})
public abstract class Split extends RBuiltinNode {
private static final int INITIAL_SIZE = 5;
private static final int SCALE_FACTOR = 2;
@Specialization
protected RList split(RAbstractIntVector x, RFactor f) {
HashMap<Object, Integer> levelMapping = getLevelMapping(f);
final int nLevels = levelMapping.size();
// initialise result arrays
int[][] collectResults = new int[nLevels][];
int[] collectResultSize = new int[nLevels];
for (int i = 0; i < collectResults.length; ++i) {
collectResults[i] = new int[INITIAL_SIZE];
}
// perform split
for (int i = 0, fi = 0; i < x.getLength(); ++i, fi = Utils.incMod(fi, f.getLength())) {
int resultIndex = levelMapping.get(f.getDataAtAsObject(fi));
int[] collect = collectResults[resultIndex];
if (collect.length == collectResultSize[resultIndex]) {
collectResults[resultIndex] = Arrays.copyOf(collect, collect.length * SCALE_FACTOR);
collect = collectResults[resultIndex];
}
collect[collectResultSize[resultIndex]++] = x.getDataAt(i);
}
// assemble result vectors and level names
String[] names = new String[nLevels];
Object[] results = new Object[nLevels];
for (HashMap.Entry<Object, Integer> e : levelMapping.entrySet()) {
Object level = e.getKey();
int index = e.getValue();
names[index] = RRuntime.toString(level);
results[index] = RDataFactory.createIntVector(Arrays.copyOfRange(collectResults[index], 0, collectResultSize[index]), RDataFactory.COMPLETE_VECTOR);
}
return RDataFactory.createList(results, RDataFactory.createStringVector(names, RDataFactory.COMPLETE_VECTOR));
}
@Specialization
protected RList split(RAbstractDoubleVector x, RFactor f) {
HashMap<Object, Integer> levelMapping = getLevelMapping(f);
final int nLevels = levelMapping.size();
// initialise result arrays
double[][] collectResults = new double[nLevels][];
int[] collectResultSize = new int[nLevels];
for (int i = 0; i < collectResults.length; ++i) {
collectResults[i] = new double[INITIAL_SIZE];
}
// perform split
for (int i = 0, fi = 0; i < x.getLength(); ++i, fi = Utils.incMod(fi, f.getLength())) {
int resultIndex = levelMapping.get(f.getDataAtAsObject(fi));
double[] collect = collectResults[resultIndex];
if (collect.length == collectResultSize[resultIndex]) {
collectResults[resultIndex] = Arrays.copyOf(collect, collect.length * SCALE_FACTOR);
collect = collectResults[resultIndex];
}
collect[collectResultSize[resultIndex]++] = x.getDataAt(i);
}
// assemble result vectors and level names
String[] names = new String[nLevels];
Object[] results = new Object[nLevels];
for (HashMap.Entry<Object, Integer> e : levelMapping.entrySet()) {
Object level = e.getKey();
int index = e.getValue();
names[index] = RRuntime.toString(level);
results[index] = RDataFactory.createDoubleVector(Arrays.copyOfRange(collectResults[index], 0, collectResultSize[index]), RDataFactory.COMPLETE_VECTOR);
}
return RDataFactory.createList(results, RDataFactory.createStringVector(names, RDataFactory.COMPLETE_VECTOR));
}
/**
* Obtain a mapping from factor levels to integers, in the order of appearance of the levels in
* the factor.
*/
@TruffleBoundary
private static HashMap<Object, Integer> getLevelMapping(RFactor f) {
HashMap<Object, Integer> map = new HashMap<>();
int lastIndex = 0;
for (int i = 0; i < f.getLength(); ++i) {
Object level = f.getDataAtAsObject(i);
if (!map.containsKey(level)) {
map.put(level, lastIndex++);
}
}
return map;
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment