diff --git a/com.oracle.truffle.r.library/src/com/oracle/truffle/r/library/stats/Covcor.java b/com.oracle.truffle.r.library/src/com/oracle/truffle/r/library/stats/Covcor.java index 7e9d9c2c63e33d6457db83931b5fd92591c2e6bf..283a18303f840f13a145e68646bccbb85737f8c6 100644 --- a/com.oracle.truffle.r.library/src/com/oracle/truffle/r/library/stats/Covcor.java +++ b/com.oracle.truffle.r.library/src/com/oracle/truffle/r/library/stats/Covcor.java @@ -12,745 +12,901 @@ package com.oracle.truffle.r.library.stats; import static com.oracle.truffle.r.nodes.builtin.CastBuilder.Predef.nullValue; -import static com.oracle.truffle.r.nodes.builtin.CastBuilder.Predef.eq; import static com.oracle.truffle.r.nodes.builtin.CastBuilder.Predef.toBoolean; -import java.util.Arrays; - +import com.oracle.truffle.api.CompilerDirectives; +import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.profiles.BranchProfile; import com.oracle.truffle.api.profiles.ConditionProfile; -import com.oracle.truffle.api.profiles.LoopConditionProfile; import com.oracle.truffle.r.nodes.attributes.SpecialAttributesFunctions.GetDimAttributeNode; +import com.oracle.truffle.r.nodes.attributes.SpecialAttributesFunctions.GetDimNamesAttributeNode; +import com.oracle.truffle.r.nodes.attributes.SpecialAttributesFunctions.SetDimNamesAttributeNode; +import com.oracle.truffle.r.nodes.attributes.SpecialAttributesFunctionsFactory.SetDimNamesAttributeNodeGen; import com.oracle.truffle.r.nodes.builtin.RExternalBuiltinNode; +import com.oracle.truffle.r.nodes.unary.IsFactorNode; import com.oracle.truffle.r.runtime.RError; import com.oracle.truffle.r.runtime.RError.Message; import com.oracle.truffle.r.runtime.RRuntime; import com.oracle.truffle.r.runtime.data.RDataFactory; import com.oracle.truffle.r.runtime.data.RDoubleVector; -import com.oracle.truffle.r.runtime.data.RIntVector; +import com.oracle.truffle.r.runtime.data.RList; import com.oracle.truffle.r.runtime.data.RNull; import com.oracle.truffle.r.runtime.data.model.RAbstractDoubleVector; -import com.oracle.truffle.r.runtime.nodes.RBaseNode; -import com.oracle.truffle.r.runtime.ops.na.NACheck; +import com.oracle.truffle.r.runtime.nmath.RMath; /* * Logic derived from GNU-R, library/stats/src/cov.c */ public abstract class Covcor extends RExternalBuiltinNode.Arg4 { + // Checkstyle: stop method name check - private final boolean isCor; - - public Covcor(boolean isCor) { - this.isCor = isCor; + private static RuntimeException error(String message) { + CompilerDirectives.transferToInterpreter(); + throw RError.error(RError.SHOW_CALLER, Message.GENERIC, message); } - static { - Casts casts = new Casts(Covcor.class); - casts.arg(0).mustNotBeMissing().mustBe(nullValue().not(), Message.IS_NULL, "x").asDoubleVector(); - casts.arg(1).mustNotBeMissing().asDoubleVector(); - casts.arg(2).asIntegerVector().findFirst().mustBe(eq(4), Message.NYI, "covcor: other method than 4 not implemented."); - casts.arg(3).asLogicalVector().findFirst().map(toBoolean()); + private static double ANS(double[] ans, int ncx, int i, int j) { + return ans[i + j * ncx]; } - @Specialization - public Object call(RAbstractDoubleVector x, @SuppressWarnings("unused") RNull y, int method, boolean iskendall) { - return corcov(x.materialize(), null, method, iskendall, this); + private static void ANS(double[] ans, int ncx, int i, int j, double value) { + ans[i + j * ncx] = value; } - @Specialization - public Object call(RAbstractDoubleVector x, RAbstractDoubleVector y, int method, boolean iskendall) { - return corcov(x.materialize(), y.materialize(), method, iskendall, this); + private static double CLAMP(double X) { + return (X >= 1 ? 1 : (X <= -1 ? -1 : X)); } - private final NACheck check = NACheck.create(); - - private final ConditionProfile noNAXProfile = ConditionProfile.createBinaryProfile(); - private final ConditionProfile noNAYProfile = ConditionProfile.createBinaryProfile(); - private final ConditionProfile xCompleteProfile = ConditionProfile.createBinaryProfile(); - private final ConditionProfile yCompleteProfile = ConditionProfile.createBinaryProfile(); - private final ConditionProfile bothZeroProfile = ConditionProfile.createBinaryProfile(); - private final BranchProfile tooManyMissing = BranchProfile.create(); - private final BranchProfile naInRes = BranchProfile.create(); - private final BranchProfile error = BranchProfile.create(); - private final BranchProfile warning = BranchProfile.create(); - - @Child private GetDimAttributeNode getDimsNode = GetDimAttributeNode.create(); - - private final LoopConditionProfile loopLength = LoopConditionProfile.createCountingProfile(); - - public RDoubleVector corcov(RDoubleVector x, RDoubleVector y, @SuppressWarnings("unused") int method, boolean iskendall, RBaseNode invokingNode) throws RError { - - boolean ansmat = getDimsNode.isMatrix(x); - int n; - int ncx; - if (ansmat) { - n = nrows(x); - ncx = ncols(x); - } else { - n = x.getLength(); - ncx = 1; - } + private static boolean ISNAN(double v) { + return Double.isNaN(v); + } - int ncy; - if (y == null) { - ncy = ncx; - } else if (getDimsNode.isMatrix(y)) { - if (nrows(y) != n) { - error.enter(); - throw error("incompatible dimensions"); + /* + * Note that "if (kendall)" and "if (cor)" are used inside a double for() loop; which makes the + * code better readable -- and is hopefully dealt with by a smartly optimizing compiler + */ + + /** + * Compute Cov(xx[], yy[]) or Cor(.,.) with n = length(xx) + */ + private static void COV_PAIRWISE_BODY(double[] ans, int n, int ncx, int i, int j, double[] x, double[] y, int xx, int yy, boolean[] sd_0, boolean cor, boolean kendall) { + double xmean = 0, ymean = 0; + int nobs = 0; + if (!kendall) { + for (int k = 0; k < n; k++) { + if (!(ISNAN(x[xx + k]) || ISNAN(y[yy + k]))) { + nobs++; + xmean += x[xx + k]; + ymean += y[yy + k]; + } } - ncy = ncols(y); - ansmat = true; - } else { - if (y.getLength() != n) { - error.enter(); - throw error("incompatible dimensions"); + } else /* kendall */ + for (int k = 0; k < n; k++) { + if (!(ISNAN(x[xx + k]) || ISNAN(y[yy + k]))) { + nobs++; + } } - ncy = 1; - } - // TODO adopt full use semantics + if (nobs >= 2) { + int n1 = -1; + double xsd = 0, ysd = 0, sum = 0; + if (!kendall) { + xmean /= nobs; + ymean /= nobs; + n1 = nobs - 1; + } + for (int k = 0; k < n; k++) { + if (!(ISNAN(x[xx + k]) || ISNAN(y[yy + k]))) { + if (!kendall) { + double xm = x[xx + k] - xmean; + double ym = y[yy + k] - ymean; + + sum += xm * ym; + if (cor) { + xsd += xm * xm; + ysd += ym * ym; + } + } - /* "default: complete" (easier for -Wall) */ - boolean naFail = false; - boolean everything = false; - boolean emptyErr = true; + else { /* Kendall's tau */ + for (n1 = 0; n1 < k; n1++) { + if (!(ISNAN(x[xx + n1]) || ISNAN(y[yy + n1]))) { + double xm = RMath.sign(x[xx + k] - x[xx + n1]); + double ym = RMath.sign(y[yy + k] - y[yy + n1]); - // case 4: /* "everything": NAs are propagated */ - everything = true; - emptyErr = false; + sum += xm * ym; + if (cor) { + xsd += xm * xm; + ysd += ym * ym; + } + } + } + } + } + } + if (cor) { + if (xsd == 0 || ysd == 0) { + sd_0[0] = true; + sum = RRuntime.DOUBLE_NA; + } else { + if (!kendall) { + xsd /= n1; + ysd /= n1; + sum /= n1; + } + sum /= (Math.sqrt(xsd) * Math.sqrt(ysd)); + sum = CLAMP(sum); + } + } else if (!kendall) { + sum /= n1; + } - if (emptyErr && x.getLength() == 0) { - error.enter(); - throw error("'x' is empty"); + ANS(ans, ncx, i, j, sum); + } else { + ANS(ans, ncx, i, j, RRuntime.DOUBLE_NA); } + } - double[] answerData = new double[ncx * ncy]; + private static void cov_pairwise1(int n, int ncx, double[] x, double[] ans, boolean[] sd_0, boolean cor, boolean kendall) { + for (int i = 0; i < ncx; i++) { + int xx = i * n; + for (int j = 0; j <= i; j++) { + int yy = j * n; - double[] xm = new double[ncx]; - boolean sd0; - if (y == null) { - if (everything) { - sd0 = covNA1(n, ncx, x, xm, answerData, isCor, iskendall); - } else { - RIntVector ind = RDataFactory.createIntVector(n); - complete1(n, ncx, x, ind, naFail); - sd0 = covComplete1(n, ncx, x, xm, ind, answerData, isCor, iskendall); - } - } else { - double[] ym = new double[ncy]; - if (everything) { - sd0 = covNA2(n, ncx, ncy, x, y, xm, ym, answerData, isCor, iskendall); - } else { - RIntVector ind = RDataFactory.createIntVector(n); - complete2(n, ncx, ncy, x, y, ind, naFail); - sd0 = covComplete2(n, ncx, ncy, x, y, xm, ym, ind, answerData, isCor, iskendall); + COV_PAIRWISE_BODY(ans, n, ncx, i, j, x, x, xx, yy, sd_0, cor, kendall); + + ANS(ans, ncx, j, i, ANS(ans, ncx, i, j)); } } + } - if (sd0) { /* only in cor() */ - warning.enter(); - RError.warning(invokingNode, RError.Message.SD_ZERO); - } + private static void cov_pairwise2(int n, int ncx, int ncy, double[] x, double[] y, double[] ans, boolean[] sd_0, boolean cor, boolean kendall) { + for (int i = 0; i < ncx; i++) { + int xx = i * n; + for (int j = 0; j < ncy; j++) { + int yy = j * n; - boolean seenNA = false; - for (int i = 0; i < answerData.length; i++) { - if (RRuntime.isNA(answerData[i])) { - naInRes.enter(); - seenNA = true; - break; + COV_PAIRWISE_BODY(ans, n, ncx, i, j, x, y, xx, yy, sd_0, cor, kendall); } } - - RDoubleVector ans = null; - if (getDimsNode.isMatrix(x)) { - ans = RDataFactory.createDoubleVector(answerData, !seenNA, new int[]{ncx, ncy}); - } else { - ans = RDataFactory.createDoubleVector(answerData, !seenNA); - } - return ans; } - private int ncols(RDoubleVector x) { - assert x.isMatrix(); - return getDimsNode.getDimensions(x)[1]; - } - - private int nrows(RDoubleVector x) { - assert x.isMatrix(); - return getDimsNode.getDimensions(x)[0]; - } + /* + * method = "complete" or "all.obs" (only difference: na_fail): -------- ------- + */ - private void complete1(int n, int ncx, RDoubleVector x, RIntVector ind, boolean naFail) { - int i; - int j; - for (i = 0; i < n; i++) { - ind.updateDataAt(i, 1, check); - } - for (j = 0; j < ncx; j++) { - // z = &x[j * n]; - for (i = 0; i < n; i++) { - if (Double.isNaN(x.getDataAt(j * n + i))) { - if (naFail) { - throw error("missing observations in cov/cor"); - } else { - ind.updateDataAt(i, 0, check); - } + /* This uses two passes for better accuracy */ + private static void MEAN(int n, int ncx, double[] x, double[] xm, boolean[] ind, int nobs) { + /* variable means */ + for (int i = 0; i < ncx; i++) { + int xx = i * n; + double sum = 0; + for (int k = 0; k < n; k++) { + if (ind[k]) { + sum += x[xx + k]; } } - } - } - - private void complete2(int n, int ncx, int ncy, RDoubleVector x, RDoubleVector y, RIntVector ind, boolean naFail) { - int i; - int j; - for (i = 0; i < n; i++) { - ind.updateDataAt(i, 1, check); - } - for (j = 0; j < ncx; j++) { - // z = &x[j * n]; - for (i = 0; i < n; i++) { - if (Double.isNaN(x.getDataAt(j * n + i))) { - if (naFail) { - throw error("missing observations in cov/cor"); - } else { - ind.updateDataAt(i, 0, check); + double tmp = sum / nobs; + if (Double.isFinite(tmp)) { + sum = 0; + for (int k = 0; k < n; k++) { + if (ind[k]) { + sum += (x[xx + k] - tmp); } } + tmp = tmp + sum / nobs; } + xm[i] = tmp; } + } - for (j = 0; j < ncy; j++) { - // z = &y[j * n]; - for (i = 0; i < n; i++) { - if (Double.isNaN(y.getDataAt(j * n + i))) { - if (naFail) { - throw error("missing observations in cov/cor"); - } else { - ind.updateDataAt(i, 0, check); + /* This uses two passes for better accuracy */ + private static void MEAN_(int n, int ncx, double[] x, double[] xm, boolean[] has_na) { + /* variable means (has_na) */ + for (int i = 0; i < ncx; i++) { + double tmp; + if (has_na[i]) { + tmp = RRuntime.DOUBLE_NA; + } else { + int xx = i * n; + double sum = 0; + for (int k = 0; k < n; k++) { + sum += x[xx + k]; + } + tmp = sum / n; + if (Double.isFinite(tmp)) { + sum = 0; + for (int k = 0; k < n; k++) { + sum += (x[xx + k] - tmp); } + tmp = tmp + sum / n; } } + xm[i] = tmp; } } - private static boolean covComplete1(int n, int ncx, RDoubleVector x, double[] xm, RIntVector indInput, double[] ans, boolean cor, boolean kendall) { + private static void cov_complete1(int n, int ncx, double[] x, double[] xm, boolean[] ind, double[] ans, boolean[] sd_0, boolean cor, boolean kendall) { int n1 = -1; - int nobs; - boolean isSd0 = false; /* total number of complete observations */ - nobs = 0; + int nobs = 0; for (int k = 0; k < n; k++) { - if (indInput.getDataAt(k) != 0) { + if (ind[k]) { nobs++; } } - if (nobs <= 1) { /* too many missing */ - for (int i = 0; i < ans.length; i++) { - ans[i] = RRuntime.DOUBLE_NA; + if (nobs <= 1) {/* too many missing */ + for (int i = 0; i < ncx; i++) { + for (int j = 0; j < ncx; j++) { + ANS(ans, ncx, i, j, RRuntime.DOUBLE_NA); + } } - return isSd0; - } - - RIntVector ind = indInput; - if (nobs == ind.getLength()) { - // No values of ind are zeroed. - ind = null; + return; } if (!kendall) { - mean(x, xm, ind, n, ncx, nobs); + MEAN(n, ncx, x, xm, ind, nobs); /* -> xm[] */ n1 = nobs - 1; } for (int i = 0; i < ncx; i++) { + int xx = i * n; + if (!kendall) { double xxm = xm[i]; for (int j = 0; j <= i; j++) { + int yy = j * n; double yym = xm[j]; - double sum = 0.0; + double sum = 0; for (int k = 0; k < n; k++) { - if (ind == null || ind.getDataAt(k) != 0) { - sum += (x.getDataAt(i * n + k) - xxm) * (x.getDataAt(j * n + k) - yym); + if (ind[k]) { + sum += (x[xx + k] - xxm) * (x[yy + k] - yym); } } - double r = sum / n1; - ans[i + j * ncx] = r; - ans[j + i * ncx] = r; + double result = sum / n1; + ANS(ans, ncx, j, i, result); + ANS(ans, ncx, i, j, result); } } else { /* Kendall's tau */ - throw new UnsupportedOperationException("kendall's unsupported"); + for (int j = 0; j <= i; j++) { + int yy = j * n; + double sum = 0; + for (int k = 0; k < n; k++) { + if (ind[k]) { + for (n1 = 0; n1 < n; n1++) { + if (ind[n1]) { + sum += RMath.sign(x[xx + k] - x[xx + n1]) * RMath.sign(x[yy + k] - x[yy + n1]); + } + } + } + } + ANS(ans, ncx, j, i, sum); + ANS(ans, ncx, i, j, sum); + } } } if (cor) { for (int i = 0; i < ncx; i++) { - xm[i] = Math.sqrt(ans[i + i * ncx]); + xm[i] = Math.sqrt(ANS(ans, ncx, i, i)); } for (int i = 0; i < ncx; i++) { for (int j = 0; j < i; j++) { + double result; if (xm[i] == 0 || xm[j] == 0) { - isSd0 = true; - ans[i + j * ncx] = RRuntime.DOUBLE_NA; - ans[j + i * ncx] = RRuntime.DOUBLE_NA; + sd_0[0] = true; + result = RRuntime.DOUBLE_NA; } else { - double sum = ans[i + j * ncx] / (xm[i] * xm[j]); - if (sum > 1.0) { - sum = 1.0; + double current = ANS(ans, ncx, i, j); + if (RRuntime.isNA(current)) { + result = RRuntime.DOUBLE_NA; + } else { + result = CLAMP(current / (xm[i] * xm[j])); } - ans[i + j * ncx] = sum; - ans[j + i * ncx] = sum; } + ANS(ans, ncx, j, i, result); + ANS(ans, ncx, i, j, result); } - ans[i + i * ncx] = 1.0; + ANS(ans, ncx, i, i, 1); } } - - return isSd0; } - private static boolean covComplete2(int n, int ncx, int ncy, RDoubleVector x, RDoubleVector y, double[] xm, double[] ym, RIntVector indInput, double[] ans, boolean cor, boolean kendall) { + private static void cov_na_1(int n, int ncx, double[] x, double[] xm, boolean[] has_na, double[] ans, boolean[] sd_0, boolean cor, boolean kendall) { int n1 = -1; - int nobs; - boolean isSd0 = false; - - /* total number of complete observations */ - nobs = 0; - for (int k = 0; k < n; k++) { - if (indInput.getDataAt(k) != 0) { - nobs++; - } - } - if (nobs <= 1) { /* too many missing */ - for (int i = 0; i < ans.length; i++) { - ans[i] = RRuntime.DOUBLE_NA; + if (n <= 1) { /* too many missing */ + for (int i = 0; i < ncx; i++) { + for (int j = 0; j < ncx; j++) { + ANS(ans, ncx, i, j, RRuntime.DOUBLE_NA); + } } - return isSd0; - } - - RIntVector ind = indInput; - if (nobs == ind.getLength()) { - // No values of ind are zeroed. - ind = null; + return; } if (!kendall) { - mean(x, xm, ind, n, ncx, nobs); - mean(y, ym, ind, n, ncy, nobs); - n1 = nobs - 1; + MEAN_(n, ncx, x, xm, has_na);/* -> xm[] */ + n1 = n - 1; } for (int i = 0; i < ncx; i++) { - if (!kendall) { - double xxm = xm[i]; - for (int j = 0; j < ncy; j++) { - double yym = ym[j]; - double sum = 0; - for (int k = 0; k < n; k++) { - if (ind == null || ind.getDataAt(k) != 0) { - sum += (x.getDataAt(i * n + k) - xxm) * (y.getDataAt(j * n + k) - yym); + if (has_na[i]) { + for (int j = 0; j <= i; j++) { + ANS(ans, ncx, j, i, RRuntime.DOUBLE_NA); + ANS(ans, ncx, i, j, RRuntime.DOUBLE_NA); + } + } else { + int xx = i * n; + + if (!kendall) { + double xxm = xm[i]; + for (int j = 0; j <= i; j++) { + if (has_na[j]) { + ANS(ans, ncx, j, i, RRuntime.DOUBLE_NA); + ANS(ans, ncx, i, j, RRuntime.DOUBLE_NA); + } else { + int yy = j * n; + double yym = xm[j]; + double sum = 0; + for (int k = 0; k < n; k++) { + sum += (x[xx + k] - xxm) * (x[yy + k] - yym); + } + double result = sum / n1; + ANS(ans, ncx, j, i, result); + ANS(ans, ncx, i, j, result); + } + } + } else { /* Kendall's tau */ + for (int j = 0; j <= i; j++) { + if (has_na[j]) { + ANS(ans, ncx, j, i, RRuntime.DOUBLE_NA); + ANS(ans, ncx, i, j, RRuntime.DOUBLE_NA); + } else { + int yy = j * n; + double sum = 0; + for (int k = 0; k < n; k++) { + for (n1 = 0; n1 < n; n1++) { + sum += RMath.sign(x[xx + k] - x[xx + n1]) * RMath.sign(x[yy + k] - x[yy + n1]); + } + } + ANS(ans, ncx, j, i, sum); + ANS(ans, ncx, i, j, sum); } } - ans[i + j * ncx] = (sum / n1); } - } else { /* Kendall's tau */ - throw new UnsupportedOperationException("kendall's unsupported"); } } if (cor) { - covsdev(x, xm, ind, n, ncx, n1, kendall); /* -> xm[.] */ - covsdev(y, ym, ind, n, ncy, n1, kendall); /* -> ym[.] */ - for (int i = 0; i < ncx; i++) { - for (int j = 0; j < ncy; j++) { - double divisor = (xm[i] * ym[j]); - if (divisor == 0.0) { - isSd0 = true; - ans[i + j * ncx] = RRuntime.DOUBLE_NA; - } else { - double value = ans[i + j * ncx] / divisor; - if (value > 1) { - value = 1; + if (!has_na[i]) { + xm[i] = Math.sqrt(ANS(ans, ncx, i, i)); + } + } + for (int i = 0; i < ncx; i++) { + if (!has_na[i]) { + for (int j = 0; j < i; j++) { + double result; + if (xm[i] == 0 || xm[j] == 0) { + sd_0[0] = true; + result = RRuntime.DOUBLE_NA; + } else { + double current = ANS(ans, ncx, i, j); + if (RRuntime.isNA(current)) { + result = RRuntime.DOUBLE_NA; + } else { + result = CLAMP(current / (xm[i] * xm[j])); + } } - ans[i + j * ncx] = value; + ANS(ans, ncx, j, i, result); + ANS(ans, ncx, i, j, result); } } + ANS(ans, ncx, i, i, 1); } } - return isSd0; } - private static void covsdev(RDoubleVector vector, double[] vectorM, RIntVector ind, int n, int len, int n1, boolean kendall) { - for (int i = 0; i < len; i++) { + private static void COV_SDEV1(int n, int n1, int nc, double[] array, double[] m, boolean[] ind, boolean kendall) { + for (int i = 0; i < nc; i++) { /* Var(X[i]) */ + int xx = i * n; double sum = 0; if (!kendall) { - double xxm = vectorM[i]; + double xxm = m[i]; for (int k = 0; k < n; k++) { - if (ind == null || ind.getDataAt(k) != 0) { - double value = vector.getDataAt(i * n + k); - sum += (value - xxm) * (value - xxm); + if (ind[k]) { + sum += (array[xx + k] - xxm) * (array[xx + k] - xxm); } } sum /= n1; } else { /* Kendall's tau */ - throw new UnsupportedOperationException("kendall's unsupported"); - } - vectorM[i] = Math.sqrt(sum); - } - } - - private static void mean(RDoubleVector vector, double[] vectorM, RIntVector ind, int n, int len, int nobs) { - /* variable means */ - for (int i = 0; i < len; i++) { - double sum = 0.0; - for (int k = 0; k < n; k++) { - if (ind == null || ind.getDataAt(k) != 0) { - sum += vector.getDataAt(i * n + k); - } - } - double tmp = sum / nobs; - if (!Double.isInfinite(tmp)) { - sum = 0.0; for (int k = 0; k < n; k++) { - if (ind == null || ind.getDataAt(k) != 0) { - sum += (vector.getDataAt(i * n + k) - tmp); + if (ind[k]) { + for (int n1_ = 0; n1_ < n; n1_++) { + if (ind[n1_] && array[xx + k] != array[xx + n1_]) { + sum++; /* = sign(. - .)^2 */ + } + } } } - tmp = tmp + sum / nobs; - } - vectorM[i] = tmp; - } - } - - private static boolean[] findNAs(int n, int nc, RDoubleVector v) { - boolean[] hasNA = new boolean[nc]; - double[] data = v.getDataWithoutCopying(); - for (int j = 0; j < nc; j++) { - for (int i = 0; i < n; i++) { - if (Double.isNaN(data[j * n + i])) { - hasNA[j] = true; - break; - } } + m[i] = Math.sqrt(sum); } - return hasNA; } - private boolean covNA1(int n, int ncx, RDoubleVector x, double[] xm, double[] ans, boolean cor, boolean iskendall) { - double sum; - double xxm; - double yym; + private static void cov_complete2(int n, int ncx, int ncy, double[] x, double[] y, double[] xm, double[] ym, boolean[] ind, double[] ans, boolean[] sd_0, boolean cor, boolean kendall) { int n1 = -1; - boolean sd0 = false; - double[] xData = x.getDataWithoutCopying(); - boolean[] hasNAx = findNAs(n, ncx, x); - - if (n <= 1) { /* too many missing */ - tooManyMissing.enter(); - Arrays.fill(ans, RRuntime.DOUBLE_NA); - return sd0; + /* total number of complete observations */ + int nobs = 0; + for (int k = 0; k < n; k++) { + if (ind[k]) { + nobs++; + } } - - if (!iskendall) { - if (xCompleteProfile.profile(x.isComplete())) { - meanNoNA(n, ncx, xData, xm, hasNAx); - } else { - mean(n, ncx, xData, xm, hasNAx); + if (nobs <= 1) {/* too many missing */ + for (int i = 0; i < ncx; i++) { + for (int j = 0; j < ncy; j++) { + ANS(ans, ncx, i, j, RRuntime.DOUBLE_NA); + } } - n1 = n - 1; + return; } + if (!kendall) { + MEAN(n, ncx, x, xm, ind, nobs);/* -> xm[] */ + MEAN(n, ncy, y, ym, ind, nobs);/* -> ym[] */ + n1 = nobs - 1; + } for (int i = 0; i < ncx; i++) { - double[] temp = new double[n]; - if (noNAXProfile.profile(!hasNAx[i])) { - if (!iskendall) { - xxm = xm[i]; - for (int j = 0; j <= i; j++) { - double r; - if (noNAXProfile.profile(!hasNAx[j])) { - yym = xm[j]; - if (checkNAs(xxm, yym)) { - r = RRuntime.DOUBLE_NA; - } else { - sum = 0.0; - loopLength.profileCounted(n); - for (int k = 0; loopLength.inject(k < n); k++) { - double u = xData[i * n + k]; - double v = xData[j * n + k]; - temp[k] = (u - xxm) * (v - yym); - } - for (int k = 0; loopLength.inject(k < n); k++) { - sum += temp[k]; + int xx = i * n; + if (!kendall) { + double xxm = xm[i]; + for (int j = 0; j < ncy; j++) { + int yy = j * n; + double yym = ym[j]; + double sum = 0; + for (int k = 0; k < n; k++) { + if (ind[k]) { + sum += (x[xx + k] - xxm) * (y[yy + k] - yym); + } + } + ANS(ans, ncx, i, j, sum / n1); + } + } else { /* Kendall's tau */ + for (int j = 0; j < ncy; j++) { + int yy = j * n; + double sum = 0; + for (int k = 0; k < n; k++) { + if (ind[k]) { + for (n1 = 0; n1 < n; n1++) { + if (ind[n1]) { + sum += RMath.sign(x[xx + k] - x[xx + n1]) * RMath.sign(y[yy + k] - y[yy + n1]); } - r = checkNAs(sum) ? RRuntime.DOUBLE_NA : sum / n1; } - } else { - r = RRuntime.DOUBLE_NA; } - ans[j + i * ncx] = r; - ans[i + j * ncx] = r; } - } else { /* Kendall's tau */ - throw new UnsupportedOperationException("kendall's unsupported"); - } - } else { - for (int j = 0; j <= i; j++) { - ans[j + i * ncx] = RRuntime.DOUBLE_NA; - ans[i + j * ncx] = RRuntime.DOUBLE_NA; + ANS(ans, ncx, i, j, sum); } } } if (cor) { + + COV_SDEV1(n, n1, ncx, x, xm, ind, kendall); /* -> xm[.] */ + COV_SDEV1(n, n1, ncy, y, ym, ind, kendall); /* -> ym[.] */ + for (int i = 0; i < ncx; i++) { - if (noNAXProfile.profile(!hasNAx[i])) { - double u = ans[i + i * ncx]; - xm[i] = checkNAs(u) ? RRuntime.DOUBLE_NA : Math.sqrt(u); - } - } - for (int i = 0; i < ncx; i++) { - if (noNAXProfile.profile(!hasNAx[i])) { - for (int j = 0; j < i; j++) { - if (bothZeroProfile.profile(xm[i] == 0 || xm[j] == 0)) { - sd0 = true; - ans[j + i * ncx] = RRuntime.DOUBLE_NA; - ans[i + j * ncx] = RRuntime.DOUBLE_NA; + for (int j = 0; j < ncy; j++) { + double result; + if (xm[i] == 0 || ym[j] == 0) { + sd_0[0] = true; + result = RRuntime.DOUBLE_NA; + } else { + double current = ANS(ans, ncx, i, j); + if (RRuntime.isNA(current)) { + result = RRuntime.DOUBLE_NA; } else { - double u = ans[i + j * ncx]; - double v = xm[i]; - double w = xm[j]; - sum = checkNAs(u, v, w) ? RRuntime.DOUBLE_NA : u / (v * w); - if (sum > 1.0) { - sum = 1.0; - } - ans[j + i * ncx] = sum; - ans[i + j * ncx] = sum; + result = CLAMP(current / (xm[i] * ym[j])); } } + ANS(ans, ncx, i, j, result); } - ans[i + i * ncx] = 1.0; } } - - return sd0; } - private static void meanNoNA(int n, int ncx, double[] x, double[] xm, boolean[] hasNA) { - double sum; - double tmp; - /* variable means (has_na) */ - for (int i = 0; i < ncx; i++) { - if (hasNA[i]) { - tmp = RRuntime.DOUBLE_NA; - } else { - sum = 0.0; - for (int k = 0; k < n; k++) { - double u = x[i * n + k]; - sum += u; - } - tmp = sum / n; - if (RRuntime.isFinite(tmp)) { - sum = 0.0; + private static void COV_SDEV2(int n, int n1, int nc, double[] array, double[] m, boolean[] has_na, boolean kendall) { + for (int i = 0; i < nc; i++) { + if (!has_na[i]) { /* Var(X[j]) */ + int xx = i * n; + double sum = 0; + if (!kendall) { + double xxm = m[i]; for (int k = 0; k < n; k++) { - double u = x[i * n + k]; - sum += u - tmp; + sum += (array[xx + k] - xxm) * (array[xx + k] - xxm); } - tmp += sum / n; - } - } - xm[i] = tmp; - } - } - - private void mean(int n, int ncx, double[] x, double[] xm, boolean[] hasNA) { - double sum; - double tmp; - /* variable means (has_na) */ - for (int i = 0; i < ncx; i++) { - if (hasNA[i]) { - tmp = RRuntime.DOUBLE_NA; - } else { - sum = 0.0; - for (int k = 0; k < n; k++) { - double u = x[i * n + k]; - if (checkNAs(u)) { - sum = RRuntime.DOUBLE_NA; - break; - } - sum += u; - } - tmp = checkNAs(sum) ? RRuntime.DOUBLE_NA : sum / n; - if (RRuntime.isFinite(tmp)) { - sum = 0.0; + sum /= n1; + } else { /* Kendall's tau */ for (int k = 0; k < n; k++) { - double u = x[i * n + k]; - if (checkNAs(u)) { - sum = RRuntime.DOUBLE_NA; - break; + for (int n1_ = 0; n1_ < n; n1_++) { + if (array[xx + k] != array[xx + n1_]) { + sum++; /* = sign(. - .)^2 */ + } } - sum += u - tmp; - } - if (checkNAs(sum)) { - tmp = RRuntime.DOUBLE_NA; - } else { - tmp += sum / n; } } + m[i] = Math.sqrt(sum); } - xm[i] = tmp; } } - private boolean covNA2(int n, int ncx, int ncy, RDoubleVector x, RDoubleVector y, double[] xm, double[] ym, double[] ans, boolean cor, boolean iskendall) { - double sum; - double xxm; - double yym; + private static void cov_na_2(int n, int ncx, int ncy, double[] x, double[] y, double[] xm, double[] ym, boolean[] has_na_x, boolean[] has_na_y, double[] ans, boolean[] sd_0, boolean cor, + boolean kendall) { int n1 = -1; - boolean sd0 = false; - - double[] xData = x.getDataWithoutCopying(); - double[] yData = y.getDataWithoutCopying(); - boolean[] hasNAx = findNAs(n, ncx, x); - boolean[] hasNAy = findNAs(n, ncy, y); - - if (n <= 1) { /* too many missing */ - tooManyMissing.enter(); + if (n <= 1) {/* too many missing */ for (int i = 0; i < ncx; i++) { for (int j = 0; j < ncy; j++) { - ans[i + j * ncx] = RRuntime.DOUBLE_NA; + ANS(ans, ncx, i, j, RRuntime.DOUBLE_NA); } } - return sd0; + return; } - if (!iskendall) { - if (xCompleteProfile.profile(x.isComplete())) { - meanNoNA(n, ncx, xData, xm, hasNAx); - } else { - mean(n, ncx, xData, xm, hasNAx); - } - if (yCompleteProfile.profile(y.isComplete())) { - meanNoNA(n, ncy, yData, ym, hasNAy); - } else { - mean(n, ncy, yData, ym, hasNAy); - } + if (!kendall) { + MEAN_(n, ncx, x, xm, has_na_x);/* -> xm[] */ + MEAN_(n, ncy, y, ym, has_na_y);/* -> ym[] */ n1 = n - 1; } - for (int i = 0; i < ncx; i++) { - if (noNAXProfile.profile(!hasNAx[i])) { - if (!iskendall) { - xxm = xm[i]; + if (has_na_x[i]) { + for (int j = 0; j < ncy; j++) { + ANS(ans, ncx, i, j, RRuntime.DOUBLE_NA); + } + } else { + int xx = i * n; + if (!kendall) { + double xxm = xm[i]; for (int j = 0; j < ncy; j++) { - double r; - if (noNAYProfile.profile(!hasNAy[j])) { - yym = ym[j]; - if (checkNAs(xxm, yym)) { - r = RRuntime.DOUBLE_NA; - } else { - sum = 0.0; - for (int k = 0; k < n; k++) { - double u = xData[i * n + k]; - double v = yData[j * n + k]; - sum += (u - xxm) * (v - yym); - } - r = checkNAs(sum) ? RRuntime.DOUBLE_NA : sum / n1; - } + if (has_na_y[j]) { + ANS(ans, ncx, i, j, RRuntime.DOUBLE_NA); } else { - r = RRuntime.DOUBLE_NA; + int yy = j * n; + double yym = ym[j]; + double sum = 0; + for (int k = 0; k < n; k++) { + sum += (x[xx + k] - xxm) * (y[yy + k] - yym); + } + ANS(ans, ncx, i, j, sum / n1); } - ans[i + j * ncx] = r; } } else { /* Kendall's tau */ - throw new UnsupportedOperationException("kendall's unsupported"); - } - } else { - for (int j = 0; j < ncy; j++) { - ans[i + j * ncx] = RRuntime.DOUBLE_NA; + for (int j = 0; j < ncy; j++) { + if (has_na_y[j]) { + ANS(ans, ncx, i, j, RRuntime.DOUBLE_NA); + } else { + int yy = j * n; + double sum = 0; + for (int k = 0; k < n; k++) { + for (n1 = 0; n1 < n; n1++) { + sum += RMath.sign(x[xx + k] - x[xx + n1]) * RMath.sign(y[yy + k] - y[yy + n1]); + } + } + ANS(ans, ncx, i, j, sum); + } + } } } } if (cor) { - covsdev(n, n1, ncx, x, hasNAx, xm, iskendall); - covsdev(n, n1, ncy, y, hasNAy, ym, iskendall); + + COV_SDEV2(n, n1, ncx, x, xm, has_na_x, kendall); /* -> xm[.] */ + COV_SDEV2(n, n1, ncy, y, ym, has_na_y, kendall); /* -> ym[.] */ for (int i = 0; i < ncx; i++) { - if (noNAXProfile.profile(!hasNAx[i])) { + if (!has_na_x[i]) { for (int j = 0; j < ncy; j++) { - if (noNAYProfile.profile(!hasNAy[j])) { - if (xm[i] == 0.0 || ym[j] == 0.0) { - sd0 = true; - ans[i + j * ncx] = RRuntime.DOUBLE_NA; + if (!has_na_y[j]) { + double result; + if (xm[i] == 0 || ym[j] == 0) { + sd_0[0] = true; + result = RRuntime.DOUBLE_NA; } else { - double u = xm[i]; - double v = ym[j]; - if (checkNAs(u, v)) { - ans[i + j * ncx] = RRuntime.DOUBLE_NA; + double current = ANS(ans, ncx, i, j); + if (RRuntime.isNA(current)) { + result = RRuntime.DOUBLE_NA; } else { - ans[i + j * ncx] /= u * v; - } - if (ans[i + j * ncx] > 1.0) { - ans[i + j * ncx] = 1.0; + result = CLAMP(current / (xm[i] * ym[j])); } } + ANS(ans, ncx, i, j, result); } } } } } + } + + /* + * complete[12]() returns indicator vector ind[] of complete.cases(), or -------------- + * if(na_fail) signals error if any NA/NaN is encountered + */ + + /* + * This might look slightly inefficient, but it is designed to optimise paging in virtual memory + * systems ... (or at least that's my story, and I'm sticking to it.) + */ + private static void NA_LOOP(int n, int z, double[] x, boolean[] ind, boolean na_fail) { + for (int i = 0; i < n; i++) { + if (ISNAN(x[z + i])) { + if (na_fail) { + error("missing observations in cov/cor"); + } else { + ind[i] = false; + } + } + } + } - return sd0; + private static void complete1(int n, int ncx, double[] x, boolean[] ind, boolean na_fail) { + for (int i = 0; i < n; i++) { + ind[i] = true; + } + for (int j = 0; j < ncx; j++) { + int z = j * n; + NA_LOOP(n, z, x, ind, na_fail); + } } - private void covsdev(int n, int n1, int ncx, RDoubleVector x, boolean[] hasNA, double[] xm, boolean iskendall) { - for (int i = 0; i < ncx; i++) { - if (!hasNA[i]) { /* Var(X[j]) */ - double sum = 0.0; - if (!iskendall) { - double xxm = xm[i]; - if (checkNAs(xxm)) { - sum = RRuntime.DOUBLE_NA; - } else { - for (int k = 0; k < n; k++) { - double u = x.getDataAt(i * n + k); - double v = x.getDataAt(i * n + k); - if (checkNAs(u, v)) { - sum = RRuntime.DOUBLE_NA; - break; - } - sum += (u - xxm) * (v - xxm); + static void complete2(int n, int ncx, int ncy, double[] x, double[] y, boolean[] ind, boolean na_fail) { + complete1(n, ncx, x, ind, na_fail); + + for (int j = 0; j < ncy; j++) { + int z = j * n; + NA_LOOP(n, z, y, ind, na_fail); + } + } + + static void find_na_1(int n, int ncx, double[] x, boolean[] has_na) { + for (int j = 0; j < ncx; j++) { + int z = j * n; + has_na[j] = false; + for (int i = 0; i < n; i++) { + if (ISNAN(x[z + i])) { + has_na[j] = true; + break; + } + } + } + } + + static void find_na_2(int n, int ncx, int ncy, double[] x, double[] y, boolean[] has_na_x, boolean[] has_na_y) { + find_na_1(n, ncx, x, has_na_x); + find_na_1(n, ncy, y, has_na_y); + } + + /* + * co[vr](x, y, use = { 1, 2, 3, 4, 5 } "all.obs", "complete.obs", "pairwise.complete", + * "everything", "na.or.complete" kendall = TRUE/FALSE) + */ + public RDoubleVector corcov(RDoubleVector x, RDoubleVector y, int method, boolean kendall) throws RError { + int n, ncx, ncy; + + /* Arg.1: x */ + if (isFactorX.executeIsFactor(x)) { + error("'x' is a factor"); + // maybe only warning: "Calling var(x) on a factor x is deprecated and will become an + // error.\n Use something like 'all(duplicated(x)[-1L])' to test for a constant vector." + } + /* length check of x -- only if(empty_err) --> below */ + int[] xDims = getDimsXNode.getDimensions(x); + boolean ansmat = matrixProfile.profile(GetDimAttributeNode.isMatrix(xDims)); + if ((ansmat)) { + n = xDims[0]; + ncx = xDims[1]; + } else { + n = x.getLength(); + ncx = 1; + } + /* Arg.2: y */ + if (y == null) {/* y = x : var() */ + ncy = ncx; + } else { + if (isFactorY.executeIsFactor(y)) { + error("'y' is a factor"); + // maybe only warning: "Calling var(x) on a factor x is deprecated and will become + // an error.\n Use something like 'all(duplicated(x)[-1L])' to test for a constant + // vector." + } + int[] yDims = getDimsYNode.getDimensions(y); + if (GetDimAttributeNode.isMatrix(yDims)) { + if (yDims[0] != n) { + error("incompatible dimensions"); + } + ncy = yDims[1]; + ansmat = true; + } else { + if (y.getLength() != n) { + error("incompatible dimensions"); + } + ncy = 1; + } + } + + /* "default: complete" */ + boolean na_fail = false; + boolean everything = false; + boolean empty_err = true; + boolean pair = false; + switch (method) { + case 1: /* use all : no NAs */ + na_fail = true; + break; + case 2: /* complete */ + /* did na.omit in R */ + if (x.getLength() == 0) { + error("no complete element pairs"); + } + break; + case 3: /* pairwise.complete */ + pair = true; + break; + case 4: /* "everything": NAs are propagated */ + everything = true; + empty_err = false; + break; + case 5: /* "na.or.complete": NAs are propagated */ + empty_err = false; + break; + default: + error("invalid 'use' (computational method)"); + } + if (empty_err && x.getLength() == 0) { + error("'x' is empty"); + } + + double[] xData = x.getDataWithoutCopying(); + double[] ans = new double[ncx * ncy]; + boolean[] sd_0 = new boolean[1]; + + evaluate(y, kendall, isCor, n, ncx, ncy, na_fail, everything, empty_err, pair, xData, ans, sd_0); + + if (sd_0[0]) { /* only in cor() */ + warning(RError.Message.SD_ZERO); + } + + boolean seenNA = false; + for (int i = 0; i < ans.length; i++) { + if (RRuntime.isNA(ans[i])) { + naInRes.enter(); + seenNA = true; + break; + } + } + + if (ansmat) { /* set dimnames() when applicable */ + RList newDimNames = null; + if (y == null) { + RList dimNames = getDimsNamesXNode.getDimNames(x); + if (dimNames != null) { + Object names = dimNames.getDataAt(1); + if (names != RNull.instance) { + newDimNames = RDataFactory.createList(new Object[]{names, names}); + } + } + } else { + RList dimNamesX = getDimsNamesXNode.getDimNames(x); + RList dimNamesY = getDimsNamesYNode.getDimNames(y); + Object namesX = dimNamesX.getLength() >= 2 ? dimNamesX.getDataAt(1) : RNull.instance; + Object namesY = dimNamesY.getLength() >= 2 ? dimNamesY.getDataAt(1) : RNull.instance; + if (namesX != RNull.instance || namesY != RNull.instance) { + newDimNames = RDataFactory.createList(new Object[]{namesX, namesY}); + } + } + RDoubleVector result = RDataFactory.createDoubleVector(ans, !seenNA, new int[]{ncx, ncy}); + if (newDimNames != null) { + setDimNamesNode.setDimNames(result, newDimNames); + } + return result; + } else { + return RDataFactory.createDoubleVector(ans, !seenNA); + } + } + + @TruffleBoundary + private static void evaluate(RDoubleVector y, boolean kendall, boolean cor, int n, int ncx, int ncy, boolean na_fail, boolean everything, boolean empty_err, boolean pair, double[] xData, + double[] ans, boolean[] sd_0) { + if (y == null) { + if (everything) { /* NA's are propagated */ + double[] xm = new double[ncx]; + boolean[] ind = new boolean[ncx]; + find_na_1(n, ncx, xData, /* --> has_na[] = */ ind); + cov_na_1(n, ncx, xData, xm, ind, ans, sd_0, cor, kendall); + } else if (!pair) { /* all | complete "var" */ + double[] xm = new double[ncx]; + boolean[] ind = new boolean[n]; + complete1(n, ncx, xData, ind, na_fail); + cov_complete1(n, ncx, xData, xm, ind, ans, sd_0, cor, kendall); + if (empty_err) { + boolean indany = false; + for (int i = 0; i < n; i++) { + if (ind[i]) { + indany = true; + break; } } - if (!checkNAs(sum)) { - sum /= n1; + if (!indany) { + error("no complete element pairs"); } - } else { /* Kendall's tau */ - throw new UnsupportedOperationException("kendall's unsupported"); } - xm[i] = checkNAs(sum) ? RRuntime.DOUBLE_NA : Math.sqrt(sum); + } else { /* pairwise "var" */ + cov_pairwise1(n, ncx, xData, ans, sd_0, cor, kendall); + } + } else { /* Co[vr] (x, y) */ + double[] yData = y.getDataWithoutCopying(); + if (everything) { + double[] xm = new double[ncx]; + double[] ym = new double[ncy]; + boolean[] ind = new boolean[ncx]; + boolean[] has_na_y = new boolean[ncy]; + find_na_2(n, ncx, ncy, xData, yData, ind, has_na_y); + cov_na_2(n, ncx, ncy, xData, yData, xm, ym, ind, has_na_y, ans, sd_0, cor, kendall); + } else if (!pair) { /* all | complete */ + double[] xm = new double[ncx]; + double[] ym = new double[ncy]; + boolean[] ind = new boolean[n]; + complete2(n, ncx, ncy, xData, yData, ind, na_fail); + cov_complete2(n, ncx, ncy, xData, yData, xm, ym, ind, ans, sd_0, cor, kendall); + if (empty_err) { + boolean indany = false; + for (int i = 0; i < n; i++) { + if (ind[i]) { + indany = true; + break; + } + } + if (!indany) { + error("no complete element pairs"); + } + } + } else { /* pairwise */ + cov_pairwise2(n, ncx, ncy, xData, yData, ans, sd_0, cor, kendall); } } } - private RuntimeException error(String message) { - throw error(Message.GENERIC, message); + private final boolean isCor; + + public Covcor(boolean isCor) { + this.isCor = isCor; } - private boolean checkNAs(double... xs) { - for (double x : xs) { - check.enable(x); - if (check.check(x)) { - return true; - } - } - return false; + static { + Casts casts = new Casts(Covcor.class); + casts.arg(0).mustNotBeMissing().mustBe(nullValue().not(), Message.IS_NULL, "x").asDoubleVector(); + casts.arg(1).mustNotBeMissing().asDoubleVector(); + casts.arg(2).asIntegerVector().findFirst(); + casts.arg(3).asLogicalVector().findFirst().map(toBoolean()); } - private boolean checkNAs(double x) { - check.enable(x); - return check.check(x); + @Specialization + public Object call(RAbstractDoubleVector x, @SuppressWarnings("unused") RNull y, int method, boolean iskendall) { + return corcov(x.materialize(), null, method, iskendall); } + + @Specialization + public Object call(RAbstractDoubleVector x, RAbstractDoubleVector y, int method, boolean iskendall) { + return corcov(x.materialize(), y.materialize(), method, iskendall); + } + + private final BranchProfile naInRes = BranchProfile.create(); + private final ConditionProfile matrixProfile = ConditionProfile.createBinaryProfile(); + + @Child private GetDimAttributeNode getDimsXNode = GetDimAttributeNode.create(); + @Child private GetDimAttributeNode getDimsYNode = GetDimAttributeNode.create(); + @Child private GetDimNamesAttributeNode getDimsNamesXNode = GetDimNamesAttributeNode.create(); + @Child private GetDimNamesAttributeNode getDimsNamesYNode = GetDimNamesAttributeNode.create(); + @Child private SetDimNamesAttributeNode setDimNamesNode = SetDimNamesAttributeNodeGen.create(); + @Child private IsFactorNode isFactorX = new IsFactorNode(); + @Child private IsFactorNode isFactorY = new IsFactorNode(); } diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/nmath/RMath.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/nmath/RMath.java index 22dadebc92a20f82539d9fb72d3a4dd438ae4672..5db1157cc3a5b0c284dab5db488af80a56d9b158 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/nmath/RMath.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/nmath/RMath.java @@ -34,6 +34,13 @@ import com.oracle.truffle.r.runtime.nmath.RMathError.MLError; */ public final class RMath { + public static double sign(double x) { + if (Double.isNaN(x)) { + return x; + } + return ((x > 0) ? 1 : ((x == 0) ? 0 : -1)); + } + public static boolean mlValid(double d) { return !Double.isNaN(d); } diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test index b9fc93b7a528fad0e1531d6ebae220d1761a1ab3..35e626cff237e38f505e8f81d7783ab4d59a2614 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test @@ -148129,6 +148129,330 @@ Error: argument 'files' must be character Error in stats::complete.cases(data.frame(col1 = c(1, 2, NA), col2 = c(1, : not all arguments have the same length +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(1:4, c(1,7,1,-4), use='a', method='k') +[1] -0.5477226 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(1:4, c(1,7,1,-4), use='a', method='p') +[1] -0.6024641 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(1:4, c(1,7,1,-4), use='a', method='s') +[1] -0.6324555 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(1:4, c(1,7,1,-4), use='c', method='k') +[1] -0.5477226 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(1:4, c(1,7,1,-4), use='c', method='p') +[1] -0.6024641 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(1:4, c(1,7,1,-4), use='c', method='s') +[1] -0.6324555 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(1:4, c(1,7,1,-4), use='e', method='k') +[1] -0.5477226 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(1:4, c(1,7,1,-4), use='e', method='p') +[1] -0.6024641 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(1:4, c(1,7,1,-4), use='e', method='s') +[1] -0.6324555 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(1:4, c(1,7,1,-4), use='n', method='k') +[1] -0.5477226 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(1:4, c(1,7,1,-4), use='n', method='p') +[1] -0.6024641 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(1:4, c(1,7,1,-4), use='n', method='s') +[1] -0.6324555 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(1:4, c(1,7,1,-4), use='p', method='k') +[1] -0.5477226 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(1:4, c(1,7,1,-4), use='p', method='p') +[1] -0.6024641 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(1:4, c(1,7,1,-4), use='p', method='s') +[1] -0.6324555 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(mtcars[,1:4], use='a', method='k') + mpg cyl disp hp +mpg 1.0000000 -0.7953134 -0.7681311 -0.7428125 +cyl -0.7953134 1.0000000 0.8144263 0.7851865 +disp -0.7681311 0.8144263 1.0000000 0.6659987 +hp -0.7428125 0.7851865 0.6659987 1.0000000 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(mtcars[,1:4], use='a', method='p') + mpg cyl disp hp +mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 +cyl -0.8521620 1.0000000 0.9020329 0.8324475 +disp -0.8475514 0.9020329 1.0000000 0.7909486 +hp -0.7761684 0.8324475 0.7909486 1.0000000 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(mtcars[,1:4], use='a', method='s') + mpg cyl disp hp +mpg 1.0000000 -0.9108013 -0.9088824 -0.8946646 +cyl -0.9108013 1.0000000 0.9276516 0.9017909 +disp -0.9088824 0.9276516 1.0000000 0.8510426 +hp -0.8946646 0.9017909 0.8510426 1.0000000 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(mtcars[,1:4], use='c', method='k') + mpg cyl disp hp +mpg 1.0000000 -0.7953134 -0.7681311 -0.7428125 +cyl -0.7953134 1.0000000 0.8144263 0.7851865 +disp -0.7681311 0.8144263 1.0000000 0.6659987 +hp -0.7428125 0.7851865 0.6659987 1.0000000 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(mtcars[,1:4], use='c', method='p') + mpg cyl disp hp +mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 +cyl -0.8521620 1.0000000 0.9020329 0.8324475 +disp -0.8475514 0.9020329 1.0000000 0.7909486 +hp -0.7761684 0.8324475 0.7909486 1.0000000 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(mtcars[,1:4], use='c', method='s') + mpg cyl disp hp +mpg 1.0000000 -0.9108013 -0.9088824 -0.8946646 +cyl -0.9108013 1.0000000 0.9276516 0.9017909 +disp -0.9088824 0.9276516 1.0000000 0.8510426 +hp -0.8946646 0.9017909 0.8510426 1.0000000 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(mtcars[,1:4], use='e', method='k') + mpg cyl disp hp +mpg 1.0000000 -0.7953134 -0.7681311 -0.7428125 +cyl -0.7953134 1.0000000 0.8144263 0.7851865 +disp -0.7681311 0.8144263 1.0000000 0.6659987 +hp -0.7428125 0.7851865 0.6659987 1.0000000 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(mtcars[,1:4], use='e', method='p') + mpg cyl disp hp +mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 +cyl -0.8521620 1.0000000 0.9020329 0.8324475 +disp -0.8475514 0.9020329 1.0000000 0.7909486 +hp -0.7761684 0.8324475 0.7909486 1.0000000 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(mtcars[,1:4], use='e', method='s') + mpg cyl disp hp +mpg 1.0000000 -0.9108013 -0.9088824 -0.8946646 +cyl -0.9108013 1.0000000 0.9276516 0.9017909 +disp -0.9088824 0.9276516 1.0000000 0.8510426 +hp -0.8946646 0.9017909 0.8510426 1.0000000 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(mtcars[,1:4], use='n', method='k') + mpg cyl disp hp +mpg 1.0000000 -0.7953134 -0.7681311 -0.7428125 +cyl -0.7953134 1.0000000 0.8144263 0.7851865 +disp -0.7681311 0.8144263 1.0000000 0.6659987 +hp -0.7428125 0.7851865 0.6659987 1.0000000 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(mtcars[,1:4], use='n', method='p') + mpg cyl disp hp +mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 +cyl -0.8521620 1.0000000 0.9020329 0.8324475 +disp -0.8475514 0.9020329 1.0000000 0.7909486 +hp -0.7761684 0.8324475 0.7909486 1.0000000 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(mtcars[,1:4], use='n', method='s') + mpg cyl disp hp +mpg 1.0000000 -0.9108013 -0.9088824 -0.8946646 +cyl -0.9108013 1.0000000 0.9276516 0.9017909 +disp -0.9088824 0.9276516 1.0000000 0.8510426 +hp -0.8946646 0.9017909 0.8510426 1.0000000 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(mtcars[,1:4], use='p', method='k') + mpg cyl disp hp +mpg 1.0000000 -0.7953134 -0.7681311 -0.7428125 +cyl -0.7953134 1.0000000 0.8144263 0.7851865 +disp -0.7681311 0.8144263 1.0000000 0.6659987 +hp -0.7428125 0.7851865 0.6659987 1.0000000 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(mtcars[,1:4], use='p', method='p') + mpg cyl disp hp +mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 +cyl -0.8521620 1.0000000 0.9020329 0.8324475 +disp -0.8475514 0.9020329 1.0000000 0.7909486 +hp -0.7761684 0.8324475 0.7909486 1.0000000 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cor(mtcars[,1:4], use='p', method='s') + mpg cyl disp hp +mpg 1.0000000 -0.9108013 -0.9088824 -0.8946646 +cyl -0.9108013 1.0000000 0.9276516 0.9017909 +disp -0.9088824 0.9276516 1.0000000 0.8510426 +hp -0.8946646 0.9017909 0.8510426 1.0000000 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(1:4, c(1,7,1,-4), use='a', method='k') +[1] -6 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(1:4, c(1,7,1,-4), use='a', method='p') +[1] -3.5 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(1:4, c(1,7,1,-4), use='a', method='s') +[1] -1 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(1:4, c(1,7,1,-4), use='c', method='k') +[1] -6 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(1:4, c(1,7,1,-4), use='c', method='p') +[1] -3.5 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(1:4, c(1,7,1,-4), use='c', method='s') +[1] -1 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(1:4, c(1,7,1,-4), use='e', method='k') +[1] -6 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(1:4, c(1,7,1,-4), use='e', method='p') +[1] -3.5 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(1:4, c(1,7,1,-4), use='e', method='s') +[1] -1 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(1:4, c(1,7,1,-4), use='n', method='k') +[1] -6 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(1:4, c(1,7,1,-4), use='n', method='p') +[1] -3.5 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(1:4, c(1,7,1,-4), use='n', method='s') +[1] -1 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(mtcars[,1:4], use='a', method='k') + mpg cyl disp hp +mpg 978 -638 -752 -722 +cyl -638 658 654 626 +disp -752 654 980 648 +hp -722 626 648 966 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(mtcars[,1:4], use='a', method='p') + mpg cyl disp hp +mpg 36.324103 -9.172379 -633.0972 -320.7321 +cyl -9.172379 3.189516 199.6603 101.9315 +disp -633.097208 199.660282 15360.7998 6721.1587 +hp -320.732056 101.931452 6721.1587 4700.8669 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(mtcars[,1:4], use='a', method='s') + mpg cyl disp hp +mpg 87.88710 -74.54032 -79.87903 -78.56452 +cyl -74.54032 76.20968 75.91935 73.74194 +disp -79.87903 75.91935 87.88710 74.73387 +hp -78.56452 73.74194 74.73387 87.74194 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(mtcars[,1:4], use='c', method='k') + mpg cyl disp hp +mpg 978 -638 -752 -722 +cyl -638 658 654 626 +disp -752 654 980 648 +hp -722 626 648 966 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(mtcars[,1:4], use='c', method='p') + mpg cyl disp hp +mpg 36.324103 -9.172379 -633.0972 -320.7321 +cyl -9.172379 3.189516 199.6603 101.9315 +disp -633.097208 199.660282 15360.7998 6721.1587 +hp -320.732056 101.931452 6721.1587 4700.8669 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(mtcars[,1:4], use='c', method='s') + mpg cyl disp hp +mpg 87.88710 -74.54032 -79.87903 -78.56452 +cyl -74.54032 76.20968 75.91935 73.74194 +disp -79.87903 75.91935 87.88710 74.73387 +hp -78.56452 73.74194 74.73387 87.74194 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(mtcars[,1:4], use='e', method='k') + mpg cyl disp hp +mpg 978 -638 -752 -722 +cyl -638 658 654 626 +disp -752 654 980 648 +hp -722 626 648 966 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(mtcars[,1:4], use='e', method='p') + mpg cyl disp hp +mpg 36.324103 -9.172379 -633.0972 -320.7321 +cyl -9.172379 3.189516 199.6603 101.9315 +disp -633.097208 199.660282 15360.7998 6721.1587 +hp -320.732056 101.931452 6721.1587 4700.8669 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(mtcars[,1:4], use='e', method='s') + mpg cyl disp hp +mpg 87.88710 -74.54032 -79.87903 -78.56452 +cyl -74.54032 76.20968 75.91935 73.74194 +disp -79.87903 75.91935 87.88710 74.73387 +hp -78.56452 73.74194 74.73387 87.74194 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(mtcars[,1:4], use='n', method='k') + mpg cyl disp hp +mpg 978 -638 -752 -722 +cyl -638 658 654 626 +disp -752 654 980 648 +hp -722 626 648 966 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(mtcars[,1:4], use='n', method='p') + mpg cyl disp hp +mpg 36.324103 -9.172379 -633.0972 -320.7321 +cyl -9.172379 3.189516 199.6603 101.9315 +disp -633.097208 199.660282 15360.7998 6721.1587 +hp -320.732056 101.931452 6721.1587 4700.8669 + +##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCombinations# +#cov(mtcars[,1:4], use='n', method='s') + mpg cyl disp hp +mpg 87.88710 -74.54032 -79.87903 -78.56452 +cyl -74.54032 76.20968 75.91935 73.74194 +disp -79.87903 75.91935 87.88710 74.73387 +hp -78.56452 73.74194 74.73387 87.74194 + ##com.oracle.truffle.r.test.library.stats.TestExternal_covcor.testCovcor# #.Call(stats:::C_cov, 1:5, 1:5, 4, FALSE) [1] 2.5 diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/library/stats/TestExternal_covcor.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/library/stats/TestExternal_covcor.java index 5f548a2355f67085dc227c71caed04103bece6e7..fe08b63966fe6a4eac773b268a55517cbedc9367 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/library/stats/TestExternal_covcor.java +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/library/stats/TestExternal_covcor.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -40,4 +40,15 @@ public class TestExternal_covcor extends TestBase { assertEval(".Call(stats:::C_cov, NULL, 1:5, 4, FALSE)"); assertEval(".Call(stats:::C_cov, 1:3, 1:5, 4, FALSE)"); } + + @Test + public void testCombinations() { + String[] useCor = new String[]{"e", "a", "c", "n", "p"}; + String[] useCov = new String[]{"e", "a", "c", "n"}; + String[] methods = new String[]{"p", "k", "s"}; + assertEval(template("cor(mtcars[,1:4], use='%0', method='%1')", useCor, methods)); + assertEval(template("cor(1:4, c(1,7,1,-4), use='%0', method='%1')", useCor, methods)); + assertEval(template("cov(mtcars[,1:4], use='%0', method='%1')", useCov, methods)); + assertEval(template("cov(1:4, c(1,7,1,-4), use='%0', method='%1')", useCov, methods)); + } }