001/*- 002 ******************************************************************************* 003 * Copyright (c) 2011, 2016 Diamond Light Source Ltd. 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the Eclipse Public License v1.0 006 * which accompanies this distribution, and is available at 007 * http://www.eclipse.org/legal/epl-v10.html 008 * 009 * Contributors: 010 * Peter Chang - initial API and implementation and/or initial documentation 011 *******************************************************************************/ 012 013package org.eclipse.january.dataset; 014 015import java.lang.ref.SoftReference; 016import java.util.ArrayList; 017import java.util.Collections; 018import java.util.HashMap; 019import java.util.List; 020import java.util.Map; 021import java.util.TreeMap; 022 023import org.apache.commons.math3.complex.Complex; 024import org.apache.commons.math3.stat.descriptive.moment.Kurtosis; 025import org.apache.commons.math3.stat.descriptive.moment.Skewness; 026import org.eclipse.january.metadata.Dirtiable; 027import org.eclipse.january.metadata.MetadataType; 028 029 030/** 031 * Statistics class 032 * 033 * TODO Where is mode? http://en.wikipedia.org/wiki/Mode_(statistics) 034 * 035 */ 036public class Stats { 037 038 private static class ReferencedDataset extends SoftReference<Dataset> { 039 public ReferencedDataset(Dataset d) { 040 super(d); 041 } 042 } 043 044 private static class QStatisticsImpl<T> implements MetadataType { 045 private static final long serialVersionUID = -3296671666463190388L; 046 final static Double Q1 = 0.25; 047 final static Double Q2 = 0.5; 048 final static Double Q3 = 0.75; 049 Map<Double, T> qmap = new HashMap<Double, T>(); 050 transient Map<Integer, Map<Double, ReferencedDataset>> aqmap = new HashMap<Integer, Map<Double, ReferencedDataset>>(); 051 transient ReferencedDataset s; // store 0th element 052 transient Map<Integer, ReferencedDataset> smap = new HashMap<>(); 053 054 @Dirtiable 055 private boolean isDirty = true; 056 057 @Override 058 public QStatisticsImpl<T> clone() { 059 return new QStatisticsImpl<T>(this); 060 } 061 062 public QStatisticsImpl() { 063 } 064 065 private QStatisticsImpl(QStatisticsImpl<T> qstats) { 066 if (qstats.s != null && qstats.s.get() != null) { 067 s = new ReferencedDataset(qstats.s.get().getView(false)); 068 } 069 qmap.putAll(qstats.qmap); 070 for (Integer i : qstats.aqmap.keySet()) { 071 aqmap.put(i, new HashMap<>(qstats.aqmap.get(i))); 072 } 073 smap.putAll(qstats.smap); 074 isDirty = qstats.isDirty; 075 } 076 077 public void setQuantile(double q, T v) { 078 qmap.put(q, v); 079 } 080 081 public T getQuantile(double q) { 082 return qmap.get(q); 083 } 084 085 private Map<Double, ReferencedDataset> getMap(int axis) { 086 Map<Double, ReferencedDataset> qm = aqmap.get(axis); 087 if (qm == null) { 088 qm = new HashMap<>(); 089 aqmap.put(axis, qm); 090 } 091 return qm; 092 } 093 094 public void setQuantile(int axis, double q, Dataset v) { 095 Map<Double, ReferencedDataset> qm = getMap(axis); 096 qm.put(q, new ReferencedDataset(v)); 097 } 098 099 public Dataset getQuantile(int axis, double q) { 100 Map<Double, ReferencedDataset> qm = getMap(axis); 101 ReferencedDataset rd = qm.get(q); 102 return rd == null ? null : rd.get(); 103 } 104 105 Dataset getSortedDataset(int axis) { 106 return smap.containsKey(axis) ? smap.get(axis).get() : null; 107 } 108 109 void setSortedDataset(int axis, Dataset v) { 110 smap.put(axis, new ReferencedDataset(v)); 111 } 112 } 113 114 // calculates statistics and returns sorted dataset (0th element if compound) 115 private static QStatisticsImpl<?> calcQuartileStats(final Dataset a) { 116 Dataset s = null; 117 final int is = a.getElementsPerItem(); 118 119 if (is == 1) { 120 s = DatasetUtils.sort(a); 121 122 QStatisticsImpl<Double> qstats = new QStatisticsImpl<Double>(); 123 124 qstats.setQuantile(QStatisticsImpl.Q1, pQuantile(s, QStatisticsImpl.Q1)); 125 qstats.setQuantile(QStatisticsImpl.Q2, pQuantile(s, QStatisticsImpl.Q2)); 126 qstats.setQuantile(QStatisticsImpl.Q3, pQuantile(s, QStatisticsImpl.Q3)); 127 qstats.s = new ReferencedDataset(s); 128 return qstats; 129 } 130 131 QStatisticsImpl<double[]> qstats = new QStatisticsImpl<double[]>(); 132 133 Dataset w = DatasetFactory.zeros(1, a.getClass(), a.getShapeRef()); 134 double[] q1 = new double[is]; 135 double[] q2 = new double[is]; 136 double[] q3 = new double[is]; 137 qstats.setQuantile(QStatisticsImpl.Q1, q1); 138 qstats.setQuantile(QStatisticsImpl.Q2, q2); 139 qstats.setQuantile(QStatisticsImpl.Q3, q3); 140 for (int j = 0; j < is; j++) { 141 ((CompoundDataset) a).copyElements(w, j); 142 w.sort(null); 143 144 q1[j] = pQuantile(w, QStatisticsImpl.Q1); 145 q2[j] = pQuantile(w, QStatisticsImpl.Q2); 146 q3[j] = pQuantile(w, QStatisticsImpl.Q3); 147 if (j == 0) 148 s = w.clone(); 149 } 150 qstats.s = new ReferencedDataset(s); 151 152 return qstats; 153 } 154 155 static private QStatisticsImpl<?> getQStatistics(final Dataset a) { 156 QStatisticsImpl<?> m = a.getFirstMetadata(QStatisticsImpl.class); 157 if (m == null || m.isDirty) { 158 m = calcQuartileStats(a); 159 a.setMetadata(m); 160 } 161 return m; 162 } 163 164 static private QStatisticsImpl<?> getQStatistics(final Dataset a, final int axis) { 165 final int is = a.getElementsPerItem(); 166 QStatisticsImpl<?> qstats = a.getFirstMetadata(QStatisticsImpl.class); 167 168 if (qstats == null || qstats.isDirty) { 169 if (is == 1) { 170 qstats = new QStatisticsImpl<Double>(); 171 } else { 172 qstats = new QStatisticsImpl<double[]>(); 173 } 174 a.setMetadata(qstats); 175 } 176 177 if (qstats.getQuantile(axis, QStatisticsImpl.Q2) == null) { 178 if (is == 1) { 179 Dataset s = DatasetUtils.sort(a, axis); 180 181 qstats.setQuantile(axis, QStatisticsImpl.Q1, pQuantile(s, axis, QStatisticsImpl.Q1)); 182 qstats.setQuantile(axis, QStatisticsImpl.Q2, pQuantile(s, axis, QStatisticsImpl.Q2)); 183 qstats.setQuantile(axis, QStatisticsImpl.Q3, pQuantile(s, axis, QStatisticsImpl.Q3)); 184 qstats.setSortedDataset(axis, s); 185 } else { 186 Dataset w = DatasetFactory.zeros(1, a.getClass(), a.getShapeRef()); 187 CompoundDoubleDataset q1 = null, q2 = null, q3 = null; 188 for (int j = 0; j < is; j++) { 189 ((CompoundDataset) a).copyElements(w, j); 190 w.sort(axis); 191 192 final Dataset c = pQuantile(w, axis, QStatisticsImpl.Q1); 193 if (j == 0) { 194 q1 = DatasetFactory.zeros(is, CompoundDoubleDataset.class, c.getShapeRef()); 195 q2 = DatasetFactory.zeros(is, CompoundDoubleDataset.class, c.getShapeRef()); 196 q3 = DatasetFactory.zeros(is, CompoundDoubleDataset.class, c.getShapeRef()); 197 } 198 q1.setElements(c, j); 199 200 q2.setElements(pQuantile(w, axis, QStatisticsImpl.Q2), j); 201 202 q3.setElements(pQuantile(w, axis, QStatisticsImpl.Q3), j); 203 } 204 qstats.setQuantile(axis, QStatisticsImpl.Q1, q1); 205 qstats.setQuantile(axis, QStatisticsImpl.Q2, q2); 206 qstats.setQuantile(axis, QStatisticsImpl.Q3, q3); 207 } 208 } 209 210 return qstats; 211 } 212 213 // process a sorted dataset 214 private static double pQuantile(final Dataset s, final double q) { 215 double f = (s.getSize() - 1) * q; // fraction of sample number 216 if (f < 0) 217 return Double.NaN; 218 int qpt = (int) Math.floor(f); // quantile point 219 f -= qpt; 220 221 double quantile = s.getElementDoubleAbs(qpt); 222 if (f > 0) { 223 quantile = (1-f)*quantile + f*s.getElementDoubleAbs(qpt+1); 224 } 225 return quantile; 226 } 227 228 private static Dataset zeros(int is, int[] shape) { 229 return is == 1 ? DatasetFactory.zeros(DoubleDataset.class, shape) : DatasetFactory.zeros(is, CompoundDoubleDataset.class, shape); 230 } 231 232 // process a sorted dataset and returns a double or compound double dataset 233 private static Dataset pQuantile(final Dataset s, final int axis, final double q) { 234 final int rank = s.getRank(); 235 final int is = s.getElementsPerItem(); 236 237 int[] oshape = s.getShape(); 238 239 double f = (oshape[axis] - 1) * q; // fraction of sample number 240 int qpt = (int) Math.floor(f); // quantile point 241 f -= qpt; 242 243 oshape[axis] = 1; 244 int[] qshape = ShapeUtils.squeezeShape(oshape, false); 245 Dataset qds = zeros(is, qshape); 246 247 IndexIterator qiter = qds.getIterator(true); 248 int[] qpos = qiter.getPos(); 249 int[] spos = oshape; 250 251 while (qiter.hasNext()) { 252 int i = 0; 253 for (; i < axis; i++) { 254 spos[i] = qpos[i]; 255 } 256 spos[i++] = qpt; 257 for (; i < rank; i++) { 258 spos[i] = qpos[i-1]; 259 } 260 261 Object obj = s.getObject(spos); 262 qds.set(obj, qpos); 263 } 264 265 if (f > 0) { 266 qiter = qds.getIterator(true); 267 qpos = qiter.getPos(); 268 qpt++; 269 Dataset rds = zeros(is, qshape); 270 271 while (qiter.hasNext()) { 272 int i = 0; 273 for (; i < axis; i++) { 274 spos[i] = qpos[i]; 275 } 276 spos[i++] = qpt; 277 for (; i < rank; i++) { 278 spos[i] = qpos[i-1]; 279 } 280 281 Object obj = s.getObject(spos); 282 rds.set(obj, qpos); 283 } 284 rds.imultiply(f); 285 qds.imultiply(1-f); 286 qds.iadd(rds); 287 } 288 289 return qds; 290 } 291 292 /** 293 * Calculate quantile of dataset which is defined as the inverse of the cumulative distribution function (CDF) 294 * @param a dataset 295 * @param q quantile 296 * @return point at which CDF has value q 297 */ 298 @SuppressWarnings("unchecked") 299 public static double quantile(final Dataset a, final double q) { 300 if (q < 0 || q > 1) { 301 throw new IllegalArgumentException("Quantile requested is outside [0,1]"); 302 } 303 QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a); 304 Double qv = qs.getQuantile(q); 305 if (qv == null) { 306 qv = pQuantile(qs.s.get(), q); 307 qs.setQuantile(q, qv); 308 } 309 return qv; 310 } 311 312 /** 313 * Calculate quantiles of dataset which is defined as the inverse of the cumulative distribution function (CDF) 314 * @param a dataset 315 * @param values quantiles 316 * @return points at which CDF has given values 317 */ 318 @SuppressWarnings("unchecked") 319 public static double[] quantile(final Dataset a, final double... values) { 320 final double[] points = new double[values.length]; 321 QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a); 322 for (int i = 0; i < points.length; i++) { 323 final double q = values[i]; 324 if (q < 0 || q > 1) { 325 throw new IllegalArgumentException("Quantile requested is outside [0,1]"); 326 } 327 Double qv = qs.getQuantile(q); 328 if (qv == null) { 329 qv = pQuantile(qs.s.get(), q); 330 qs.setQuantile(q, qv); 331 } 332 points[i] = qv; 333 } 334 335 return points; 336 } 337 338 /** 339 * Calculate quantiles of dataset which is defined as the inverse of the cumulative distribution function (CDF) 340 * @param a dataset 341 * @param axis to reduce along 342 * @param values quantiles 343 * @return points at which CDF has given values 344 */ 345 @SuppressWarnings({ "unchecked" }) 346 public static Dataset[] quantile(final Dataset a, int axis, final double... values) { 347 final Dataset[] points = new Dataset[values.length]; 348 final int is = a.getElementsPerItem(); 349 axis = a.checkAxis(axis); 350 351 if (is == 1) { 352 QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a, axis); 353 for (int i = 0; i < points.length; i++) { 354 final double q = values[i]; 355 if (q < 0 || q > 1) { 356 throw new IllegalArgumentException("Quantile requested is outside [0,1]"); 357 } 358 Dataset qv = qs.getQuantile(axis, q); 359 if (qv == null) { 360 qv = pQuantile(qs.getSortedDataset(axis), axis, q); 361 qs.setQuantile(axis, q, qv); 362 } 363 points[i] = qv; 364 } 365 } else { 366 QStatisticsImpl<double[]> qs = (QStatisticsImpl<double[]>) getQStatistics(a); 367 Dataset w = DatasetFactory.zeros(1, a.getClass(), a.getShapeRef()); 368 for (int j = 0; j < is; j++) { 369 boolean copied = false; 370 371 for (int i = 0; i < points.length; i++) { 372 final double q = values[i]; 373 if (q < 0 || q > 1) { 374 throw new IllegalArgumentException("Quantile requested is outside [0,1]"); 375 } 376 Dataset qv = qs.getQuantile(axis, q); 377 if (qv == null) { 378 if (!copied) { 379 copied = true; 380 ((CompoundDataset) a).copyElements(w, j); 381 w.sort(axis); 382 } 383 qv = pQuantile(w, axis, q); 384 qs.setQuantile(axis, q, qv); 385 if (j == 0) { 386 points[i] = DatasetFactory.zeros(is, qv.getClass(), qv.getShapeRef()); 387 } 388 ((CompoundDoubleDataset) points[i]).setElements(qv, j); 389 } 390 } 391 } 392 } 393 394 return points; 395 } 396 397 /** 398 * @param a dataset 399 * @param axis to reduce along 400 * @return median 401 */ 402 public static Dataset median(final Dataset a, int axis) { 403 axis = a.checkAxis(axis); 404 return getQStatistics(a, axis).getQuantile(axis, QStatisticsImpl.Q2); 405 } 406 407 /** 408 * @param a dataset 409 * @return median 410 */ 411 public static Object median(final Dataset a) { 412 return getQStatistics(a).getQuantile(QStatisticsImpl.Q2); 413 } 414 415 /** 416 * Interquartile range: Q3 - Q1 417 * @param a dataset 418 * @return range 419 */ 420 @SuppressWarnings("unchecked") 421 public static Object iqr(final Dataset a) { 422 final int is = a.getElementsPerItem(); 423 if (is == 1) { 424 QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a); 425 return qs.getQuantile(QStatisticsImpl.Q3) - qs.getQuantile(QStatisticsImpl.Q1); 426 } 427 428 QStatisticsImpl<double[]> qs = (QStatisticsImpl<double[]>) getQStatistics(a); 429 double[] q1 = (double[]) qs.getQuantile(QStatisticsImpl.Q1); 430 double[] q3 = ((double[]) qs.getQuantile(QStatisticsImpl.Q3)).clone(); 431 for (int j = 0; j < is; j++) { 432 q3[j] -= q1[j]; 433 } 434 return q3; 435 } 436 437 /** 438 * Interquartile range: Q3 - Q1 439 * @param a dataset 440 * @param axis to reduce along 441 * @return range 442 */ 443 public static Dataset iqr(final Dataset a, int axis) { 444 axis = a.checkAxis(axis); 445 QStatisticsImpl<?> qs = getQStatistics(a, axis); 446 Dataset q3 = qs.getQuantile(axis, QStatisticsImpl.Q3); 447 448 return Maths.subtract(q3, qs.getQuantile(axis, QStatisticsImpl.Q1)); 449 } 450 451 static private HigherStatisticsImpl<?> getHigherStatistic(final Dataset a, boolean[] ignoreInvalids) { 452 boolean ignoreNaNs = false; 453 boolean ignoreInfs = false; 454 if (a.hasFloatingPointElements()) { 455 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 456 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 457 } 458 459 HigherStatisticsImpl<?> stats = a.getFirstMetadata(HigherStatisticsImpl.class); 460 if (stats == null || stats.isDirty) { 461 stats = calculateHigherMoments(a, ignoreNaNs, ignoreInfs); 462 a.setMetadata(stats); 463 } 464 465 return stats; 466 } 467 468 static private HigherStatisticsImpl<?> getHigherStatistic(final Dataset a, final boolean[] ignoreInvalids, final int axis) { 469 boolean ignoreNaNs = false; 470 boolean ignoreInfs = false; 471 if (a.hasFloatingPointElements()) { 472 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 473 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 474 } 475 476 HigherStatisticsImpl<?> stats = a.getFirstMetadata(HigherStatisticsImpl.class); 477 if (stats == null || stats.getSkewness(axis) == null || stats.isDirty) { 478 stats = calculateHigherMoments(a, ignoreNaNs, ignoreInfs, axis); 479 a.setMetadata(stats); 480 } 481 482 return stats; 483 } 484 485 private static class HigherStatisticsImpl<T> implements MetadataType { 486 private static final long serialVersionUID = -6587974784104116792L; 487 T skewness; 488 T kurtosis; 489 transient Map<Integer, ReferencedDataset> smap = new HashMap<>(); 490 transient Map<Integer, ReferencedDataset> kmap = new HashMap<>(); 491 492 @Dirtiable 493 private boolean isDirty = true; 494 495 @Override 496 public HigherStatisticsImpl<T> clone() { 497 return new HigherStatisticsImpl<>(this); 498 } 499 500 public HigherStatisticsImpl() { 501 } 502 503 private HigherStatisticsImpl(HigherStatisticsImpl<T> hstats) { 504 skewness = hstats.skewness; 505 kurtosis = hstats.kurtosis; 506 smap.putAll(hstats.smap); 507 kmap.putAll(hstats.kmap); 508 isDirty = hstats.isDirty; 509 } 510 511// public void setSkewness(T skewness) { 512// this.skewness = skewness; 513// } 514// 515// public void setKurtosis(T kurtosis) { 516// this.kurtosis = kurtosis; 517// } 518// 519// public T getSkewness() { 520// return skewness; 521// } 522// 523// public T getKurtosis() { 524// return kurtosis; 525// } 526 527 public Dataset getSkewness(int axis) { 528 ReferencedDataset rd = smap.get(axis); 529 return rd == null ? null : rd.get(); 530 } 531 532 public Dataset getKurtosis(int axis) { 533 ReferencedDataset rd = kmap.get(axis); 534 return rd == null ? null : rd.get(); 535 } 536 537 public void setSkewness(int axis, Dataset s) { 538 smap.put(axis, new ReferencedDataset(s)); 539 } 540 541 public void setKurtosis(int axis, Dataset k) { 542 kmap.put(axis, new ReferencedDataset(k)); 543 } 544 } 545 546 static private HigherStatisticsImpl<?> calculateHigherMoments(final Dataset a, final boolean ignoreNaNs, final boolean ignoreInfs) { 547 final int is = a.getElementsPerItem(); 548 final IndexIterator iter = a.getIterator(); 549 550 if (is == 1) { 551 Skewness s = new Skewness(); 552 Kurtosis k = new Kurtosis(); 553 if (ignoreNaNs) { 554 while (iter.hasNext()) { 555 final double x = a.getElementDoubleAbs(iter.index); 556 if (Double.isNaN(x)) 557 continue; 558 s.increment(x); 559 k.increment(x); 560 } 561 } else { 562 while (iter.hasNext()) { 563 final double x = a.getElementDoubleAbs(iter.index); 564 s.increment(x); 565 k.increment(x); 566 } 567 } 568 569 HigherStatisticsImpl<Double> stats = new HigherStatisticsImpl<Double>(); 570 stats.skewness = s.getResult(); 571 stats.kurtosis = k.getResult(); 572 return stats; 573 } 574 final Skewness[] s = new Skewness[is]; 575 final Kurtosis[] k = new Kurtosis[is]; 576 577 for (int j = 0; j < is; j++) { 578 s[j] = new Skewness(); 579 k[j] = new Kurtosis(); 580 } 581 if (ignoreNaNs) { 582 while (iter.hasNext()) { 583 boolean skip = false; 584 for (int j = 0; j < is; j++) { 585 if (Double.isNaN(a.getElementDoubleAbs(iter.index + j))) { 586 skip = true; 587 break; 588 } 589 } 590 if (!skip) 591 for (int j = 0; j < is; j++) { 592 final double val = a.getElementDoubleAbs(iter.index + j); 593 s[j].increment(val); 594 k[j].increment(val); 595 } 596 } 597 } else { 598 while (iter.hasNext()) { 599 for (int j = 0; j < is; j++) { 600 final double val = a.getElementDoubleAbs(iter.index + j); 601 s[j].increment(val); 602 k[j].increment(val); 603 } 604 } 605 } 606 final double[] ts = new double[is]; 607 final double[] tk = new double[is]; 608 for (int j = 0; j < is; j++) { 609 ts[j] = s[j].getResult(); 610 tk[j] = k[j].getResult(); 611 } 612 613 HigherStatisticsImpl<double[]> stats = new HigherStatisticsImpl<double[]>(); 614 stats.skewness = ts; 615 stats.kurtosis = tk; 616 return stats; 617 } 618 619 static private HigherStatisticsImpl<?> calculateHigherMoments(final Dataset a, final boolean ignoreNaNs, final boolean ignoreInfs, final int axis) { 620 final int rank = a.getRank(); 621 final int is = a.getElementsPerItem(); 622 final int[] oshape = a.getShape(); 623 final int alen = oshape[axis]; 624 oshape[axis] = 1; 625 626 final int[] nshape = ShapeUtils.squeezeShape(oshape, false); 627 final Dataset sk; 628 final Dataset ku; 629 HigherStatisticsImpl<?> stats = null; 630 631 if (is == 1) { 632 if (stats == null) { 633 stats = new HigherStatisticsImpl<Double>(); 634 a.setMetadata(stats); 635 } 636 sk = DatasetFactory.zeros(DoubleDataset.class, nshape); 637 ku = DatasetFactory.zeros(DoubleDataset.class, nshape); 638 final IndexIterator qiter = sk.getIterator(true); 639 final int[] qpos = qiter.getPos(); 640 final int[] spos = oshape; 641 642 while (qiter.hasNext()) { 643 int i = 0; 644 for (; i < axis; i++) { 645 spos[i] = qpos[i]; 646 } 647 spos[i++] = 0; 648 for (; i < rank; i++) { 649 spos[i] = qpos[i - 1]; 650 } 651 652 Skewness s = new Skewness(); 653 Kurtosis k = new Kurtosis(); 654 if (ignoreNaNs) { 655 for (int j = 0; j < alen; j++) { 656 spos[axis] = j; 657 final double val = a.getDouble(spos); 658 if (Double.isNaN(val)) 659 continue; 660 661 s.increment(val); 662 k.increment(val); 663 } 664 } else { 665 for (int j = 0; j < alen; j++) { 666 spos[axis] = j; 667 final double val = a.getDouble(spos); 668 s.increment(val); 669 k.increment(val); 670 } 671 } 672 sk.set(s.getResult(), qpos); 673 ku.set(k.getResult(), qpos); 674 } 675 } else { 676 if (stats == null) { 677 stats = new HigherStatisticsImpl<double[]>(); 678 a.setMetadata(stats); 679 } 680 sk = zeros(is, nshape); 681 ku = zeros(is, nshape); 682 final IndexIterator qiter = sk.getIterator(true); 683 final int[] qpos = qiter.getPos(); 684 final int[] spos = oshape; 685 final Skewness[] s = new Skewness[is]; 686 final Kurtosis[] k = new Kurtosis[is]; 687 final double[] ts = new double[is]; 688 final double[] tk = new double[is]; 689 690 for (int j = 0; j < is; j++) { 691 s[j] = new Skewness(); 692 k[j] = new Kurtosis(); 693 } 694 while (qiter.hasNext()) { 695 int i = 0; 696 for (; i < axis; i++) { 697 spos[i] = qpos[i]; 698 } 699 spos[i++] = 0; 700 for (; i < rank; i++) { 701 spos[i] = qpos[i-1]; 702 } 703 704 for (int j = 0; j < is; j++) { 705 s[j].clear(); 706 k[j].clear(); 707 } 708 int index = a.get1DIndex(spos); 709 if (ignoreNaNs) { 710 boolean skip = false; 711 for (int j = 0; j < is; j++) { 712 if (Double.isNaN(a.getElementDoubleAbs(index + j))) { 713 skip = true; 714 break; 715 } 716 } 717 if (!skip) 718 for (int j = 0; j < is; j++) { 719 final double val = a.getElementDoubleAbs(index + j); 720 s[j].increment(val); 721 k[j].increment(val); 722 } 723 } else { 724 for (int j = 0; j < is; j++) { 725 final double val = a.getElementDoubleAbs(index + j); 726 s[j].increment(val); 727 k[j].increment(val); 728 } 729 } 730 for (int j = 0; j < is; j++) { 731 ts[j] = s[j].getResult(); 732 tk[j] = k[j].getResult(); 733 } 734 sk.set(ts, qpos); 735 ku.set(tk, qpos); 736 } 737 } 738 739 stats.setSkewness(axis, sk); 740 stats.setKurtosis(axis, ku); 741 return stats; 742 } 743 744 /** 745 * @param a dataset 746 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 747 * @return skewness 748 * @since 2.0 749 */ 750 public static Object skewness(final Dataset a, final boolean... ignoreInvalids) { 751 return getHigherStatistic(a, ignoreInvalids).skewness; 752 } 753 754 /** 755 * @param a dataset 756 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 757 * @return kurtosis 758 * @since 2.0 759 */ 760 public static Object kurtosis(final Dataset a, final boolean... ignoreInvalids) { 761 return getHigherStatistic(a, ignoreInvalids).kurtosis; 762 } 763 764 /** 765 * @param a dataset 766 * @param axis to reduce along 767 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 768 * @return skewness along axis in dataset 769 * @since 2.0 770 */ 771 public static Dataset skewness(final Dataset a, int axis, final boolean... ignoreInvalids) { 772 axis = a.checkAxis(axis); 773 return getHigherStatistic(a, ignoreInvalids, axis).getSkewness(axis); 774 } 775 776 /** 777 * @param a dataset 778 * @param axis to reduce along 779 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 780 * @return kurtosis along axis in dataset 781 * @since 2.0 782 */ 783 public static Dataset kurtosis(final Dataset a, int axis, final boolean... ignoreInvalids) { 784 axis = a.checkAxis(axis); 785 return getHigherStatistic(a, ignoreInvalids, axis).getKurtosis(axis); 786 } 787 788 /** 789 * @param a dataset 790 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 791 * @return sum of dataset 792 * @since 2.0 793 */ 794 public static Object sum(final Dataset a, final boolean... ignoreInvalids) { 795 return a.sum(ignoreInvalids); 796 } 797 798 /** 799 * @param clazz dataset class 800 * @param a dataset 801 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 802 * @return typed sum of dataset 803 * @since 2.3 804 */ 805 public static Object typedSum(final Class<? extends Dataset> clazz, final Dataset a, final boolean... ignoreInvalids) { 806 if (a.isComplex()) { 807 Complex m = (Complex) a.sum(ignoreInvalids); 808 return m; 809 } else if (a instanceof CompoundDataset) { 810 return InterfaceUtils.fromDoublesToBiggestPrimitives(clazz, (double[]) a.sum(ignoreInvalids)); 811 } 812 return InterfaceUtils.fromDoubleToBiggestNumber(clazz, DTypeUtils.toReal(a.sum(ignoreInvalids))); 813 } 814 815 /** 816 * @param a dataset 817 * @param dtype dataset type 818 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 819 * @return typed sum of dataset 820 * @since 2.0 821 * @deprecated Please use the class-based method {@link #typedSum(Class, Dataset, boolean...)} 822 */ 823 @Deprecated 824 public static Object typedSum(final Dataset a, int dtype, final boolean... ignoreInvalids) { 825 return typedSum(DTypeUtils.getInterface(dtype), a, ignoreInvalids); 826 } 827 828 /** 829 * @param <T> dataset sub-interface 830 * @param clazz dataset sub-interface 831 * @param a dataset 832 * @param axis to reduce along 833 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 834 * @return typed sum of items along axis in dataset 835 * @since 2.3 836 */ 837 public static <T extends Dataset> T typedSum(final Class<T> clazz, final Dataset a, int axis, final boolean... ignoreInvalids) { 838 return a.sum(axis, ignoreInvalids).cast(clazz); 839 } 840 841 /** 842 * @param <T> dataset sub-interface 843 * @param clazz dataset sub-interface 844 * @param a dataset 845 * @param axes to reduce over 846 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 847 * @return typed sum of items along axes in dataset 848 * @since 2.3 849 */ 850 public static <T extends Dataset> T typedSum(final Class<T> clazz, final Dataset a, int[] axes, final boolean... ignoreInvalids) { 851 return a.sum(axes, ignoreInvalids).cast(clazz); 852 } 853 854 /** 855 * @param a dataset 856 * @param dtype dataset type 857 * @param axis to reduce along 858 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 859 * @return typed sum of items along axis in dataset 860 * @since 2.0 861 * @deprecated Please use the class-based method {@link #typedSum(Class, Dataset, int, boolean...)} 862 */ 863 @Deprecated 864 public static Dataset typedSum(final Dataset a, int dtype, int axis, final boolean... ignoreInvalids) { 865 return DatasetUtils.cast(a.sum(axis, ignoreInvalids), dtype); 866 } 867 868 /** 869 * @param a dataset 870 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 871 * @return product of all items in dataset 872 * @since 2.0 873 */ 874 public static Object product(final Dataset a, final boolean... ignoreInvalids) { 875 return typedProduct(a.getClass(), a, ignoreInvalids); 876 } 877 878 /** 879 * @param a dataset 880 * @param axis to reduce along 881 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 882 * @return product of items along axis in dataset 883 * @since 2.0 884 */ 885 public static Dataset product(final Dataset a, final int axis, final boolean... ignoreInvalids) { 886 return typedProduct(a.getClass(), a, axis, ignoreInvalids); 887 } 888 889 /** 890 * @param a dataset 891 * @param axes to reduce over 892 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 893 * @return product of items along axes in dataset 894 * @since 2.2 895 */ 896 public static Dataset product(final Dataset a, final int[] axes, final boolean... ignoreInvalids) { 897 return typedProduct(a.getClass(), a, axes, ignoreInvalids); 898 } 899 900 /** 901 * @param a dataset 902 * @param dtype dataset type 903 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 904 * @return typed product of all items in dataset 905 * @since 2.0 906 * @deprecated Please use the class-based method {@link #typedProduct(Class, Dataset, boolean...)} 907 */ 908 @Deprecated 909 public static Object typedProduct(final Dataset a, final int dtype, final boolean... ignoreInvalids) { 910 return typedProduct(DTypeUtils.getInterface(dtype), a, ignoreInvalids); 911 } 912 913 /** 914 * @param clazz dataset class 915 * @param a dataset 916 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 917 * @return typed product of all items in dataset 918 * @since 2.3 919 */ 920 public static Object typedProduct(final Class<? extends Dataset> clazz, final Dataset a, final boolean... ignoreInvalids) { 921 final boolean ignoreNaNs; 922 final boolean ignoreInfs; 923 if (a.hasFloatingPointElements()) { 924 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 925 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 926 } else { 927 ignoreNaNs = false; 928 ignoreInfs = false; 929 } 930 931 if (a.isComplex()) { 932 IndexIterator it = a.getIterator(); 933 double rv = 1, iv = 0; 934 935 while (it.hasNext()) { 936 final double r1 = a.getElementDoubleAbs(it.index); 937 final double i1 = a.getElementDoubleAbs(it.index + 1); 938 if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) { 939 continue; 940 } 941 if (ignoreInfs && (Double.isInfinite(r1) || Double.isInfinite(i1))) { 942 continue; 943 } 944 final double tv = r1*rv - i1*iv; 945 iv = r1*iv + i1*rv; 946 rv = tv; 947 if (Double.isNaN(rv) && Double.isNaN(iv)) { 948 break; 949 } 950 } 951 952 return new Complex(rv, iv); 953 } 954 955 IndexIterator it = a.getIterator(); 956 final int is; 957 final long[] lresults; 958 final double[] dresults; 959 960 if (BooleanDataset.class.isAssignableFrom(clazz) || ByteDataset.class.isAssignableFrom(clazz) || ShortDataset.class.isAssignableFrom(clazz) || IntegerDataset.class.isAssignableFrom(clazz) || LongDataset.class.isAssignableFrom(clazz)) { 961 long lresult = 1; 962 while (it.hasNext()) { 963 lresult *= a.getElementLongAbs(it.index); 964 } 965 return Long.valueOf(lresult); 966 } else if (CompoundByteDataset.class.isAssignableFrom(clazz) || ShortDataset.class.isAssignableFrom(clazz) || CompoundIntegerDataset.class.isAssignableFrom(clazz) || CompoundLongDataset.class.isAssignableFrom(clazz)) { 967 is = a.getElementsPerItem(); 968 lresults = new long[is]; 969 for (int j = 0; j < is; j++) { 970 lresults[j] = 1; 971 } 972 while (it.hasNext()) { 973 for (int j = 0; j < is; j++) 974 lresults[j] *= a.getElementLongAbs(it.index+j); 975 } 976 return lresults; 977 } else if (FloatDataset.class.isAssignableFrom(clazz) || DoubleDataset.class.isAssignableFrom(clazz)) { 978 double dresult = 1.; 979 while (it.hasNext()) { 980 final double x = a.getElementDoubleAbs(it.index); 981 if (ignoreNaNs && Double.isNaN(x)) { 982 continue; 983 } 984 if (ignoreInfs && Double.isInfinite(x)) { 985 continue; 986 } 987 dresult *= x; 988 if (Double.isNaN(dresult)) { 989 break; 990 } 991 } 992 return Double.valueOf(dresult); 993 } else if (CompoundFloatDataset.class.isAssignableFrom(clazz) || CompoundDoubleDataset.class.isAssignableFrom(clazz)) { 994 is = a.getElementsPerItem(); 995 double[] vals = new double[is]; 996 dresults = new double[is]; 997 for (int j = 0; j < is; j++) { 998 dresults[j] = 1.; 999 } 1000 while (it.hasNext()) { 1001 boolean okay = true; 1002 for (int j = 0; j < is; j++) { 1003 final double val = a.getElementDoubleAbs(it.index + j); 1004 if (ignoreNaNs && Double.isNaN(val)) { 1005 okay = false; 1006 break; 1007 } 1008 if (ignoreInfs && Double.isInfinite(val)) { 1009 okay = false; 1010 break; 1011 } 1012 vals[j] = val; 1013 } 1014 if (okay) { 1015 for (int j = 0; j < is; j++) { 1016 double val = vals[j]; 1017 dresults[j] *= val; 1018 } 1019 } 1020 } 1021 return dresults; 1022 } 1023 1024 return null; 1025 } 1026 1027 /** 1028 * @param a dataset 1029 * @param dtype dataset type 1030 * @param axes to reduce over 1031 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 1032 * @return typed product of items in axes of dataset 1033 * @since 2.2 1034 * @deprecated Please use the class-based method {@link #typedProduct(Class, Dataset, int[], boolean...)} 1035 */ 1036 @Deprecated 1037 public static Dataset typedProduct(final Dataset a, final int dtype, int[] axes, final boolean... ignoreInvalids) { 1038 return typedProduct(DTypeUtils.getInterface(dtype), a, axes, ignoreInvalids); 1039 } 1040 1041 /** 1042 * @param a dataset 1043 * @param dtype dataset type 1044 * @param axis to reduce along 1045 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 1046 * @return typed product of items along axis in dataset 1047 * @since 2.0 1048 * @deprecated Please use the class-based method {@link #typedProduct(Class, Dataset, int, boolean...)} 1049 */ 1050 @Deprecated 1051 public static Dataset typedProduct(final Dataset a, final int dtype, int axis, final boolean... ignoreInvalids) { 1052 return typedProduct(DTypeUtils.getInterface(dtype), a, new int[] {axis}, ignoreInvalids); 1053 } 1054 1055 /** 1056 * @param <T> dataset sub-interface 1057 * @param clazz dataset sub-interface 1058 * @param a dataset 1059 * @param axis to reduce along 1060 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 1061 * @return typed product of items along axis in dataset 1062 * @since 2.3 1063 */ 1064 public static <T extends Dataset> T typedProduct(final Class<T> clazz, final Dataset a, int axis, final boolean... ignoreInvalids) { 1065 return typedProduct(clazz, a, new int[] {axis}, ignoreInvalids); 1066 } 1067 1068 /** 1069 * @param <T> dataset sub-interface 1070 * @param clazz dataset sub-interface 1071 * @param a dataset 1072 * @param axes to reduce over 1073 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 1074 * @return typed product of items in axes of dataset 1075 * @since 2.3 1076 */ 1077 public static <T extends Dataset> T typedProduct(final Class<T> clazz, final Dataset a, int[] axes, final boolean... ignoreInvalids) { 1078 axes = ShapeUtils.checkAxes(a.getRank(), axes); 1079 SliceNDIterator siter = new SliceNDIterator(new SliceND(a.getShapeRef()), axes); 1080 1081 int[] nshape = siter.getUsedSlice().getSourceShape(); 1082 final int is = a.getElementsPerItem(); 1083 1084 final boolean ignoreNaNs; 1085 final boolean ignoreInfs; 1086 if (a.hasFloatingPointElements()) { 1087 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 1088 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 1089 } else { 1090 ignoreNaNs = false; 1091 ignoreInfs = false; 1092 } 1093 1094 T result = DatasetFactory.zeros(is, clazz, nshape); 1095 1096 int[] spos = siter.getUsedPos(); 1097 1098 // TODO add getLongArray(long[], int...) to CompoundDataset 1099 final boolean isComplex = a.isComplex(); 1100 while (siter.hasNext()) { 1101 IndexIterator iter = a.getSliceIterator(siter.getCurrentSlice()); 1102 final int[] pos = iter.getPos(); 1103 1104 if (isComplex) { 1105 double rv = 1, iv = 0; 1106 if (a instanceof ComplexFloatDataset) { 1107 ComplexFloatDataset af = (ComplexFloatDataset) a; 1108 while (iter.hasNext()) { 1109 final float r1 = af.getReal(pos); 1110 final float i1 = af.getImag(pos); 1111 if (ignoreNaNs && (Float.isNaN(r1) || Float.isNaN(i1))) { 1112 continue; 1113 } 1114 if (ignoreInfs && (Float.isInfinite(r1) || Float.isInfinite(i1))) { 1115 continue; 1116 } 1117 final double tv = r1*rv - i1*iv; 1118 iv = r1*iv + i1*rv; 1119 rv = tv; 1120 if (Double.isNaN(rv) && Double.isNaN(iv)) { 1121 break; 1122 } 1123 } 1124 } else if (a instanceof ComplexDoubleDataset) { 1125 ComplexDoubleDataset ad = (ComplexDoubleDataset) a; 1126 while (iter.hasNext()) { 1127 final double r1 = ad.getReal(pos); 1128 final double i1 = ad.getImag(pos); 1129 if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) { 1130 continue; 1131 } 1132 if (ignoreInfs && (Double.isInfinite(r1) || Double.isInfinite(i1))) { 1133 continue; 1134 } 1135 final double tv = r1*rv - i1*iv; 1136 iv = r1*iv + i1*rv; 1137 rv = tv; 1138 if (Double.isNaN(rv) && Double.isNaN(iv)) { 1139 break; 1140 } 1141 } 1142 } 1143 1144 result.set(new Complex(rv, iv), spos); 1145 } else { 1146 final long[] lresults; 1147 final double[] dresults; 1148 1149 if (a instanceof BooleanDataset || a instanceof ByteDataset || a instanceof ShortDataset || a instanceof IntegerDataset || a instanceof LongDataset) { 1150 long lresult = 1; 1151 while (iter.hasNext()) { 1152 lresult *= a.getLong(pos); 1153 } 1154 result.set(lresult, spos); 1155 } else if (a instanceof CompoundByteDataset || a instanceof CompoundShortDataset || a instanceof CompoundIntegerDataset || a instanceof CompoundLongDataset) { 1156 CompoundDataset ca = (CompoundDataset) a; 1157 lresults = new long[is]; 1158 for (int k = 0; k < is; k++) { 1159 lresults[k] = 1; 1160 } 1161 while (iter.hasNext()) { 1162 int l = iter.index; 1163 for (int k = 0; k < is; k++) { 1164 lresults[k] *= ca.getElementLongAbs(l++); 1165 } 1166 } 1167 result.set(lresults, spos); 1168 } else if (a instanceof FloatDataset || a instanceof DoubleDataset) { 1169 double dresult = 1.; 1170 while (iter.hasNext()) { 1171 final double x = a.getElementDoubleAbs(iter.index); 1172 if (ignoreNaNs && Double.isNaN(x)) { 1173 continue; 1174 } 1175 if (ignoreInfs && Double.isInfinite(x)) { 1176 continue; 1177 } 1178 dresult *= x; 1179 if (Double.isNaN(dresult)) { 1180 break; 1181 } 1182 } 1183 result.set(dresult, spos); 1184 } else if (a instanceof CompoundFloatDataset || a instanceof CompoundDoubleDataset) { 1185 CompoundDataset da = (CompoundDataset) a; 1186 double[] dvalues = new double[is]; 1187 dresults = new double[is]; 1188 for (int k = 0; k < is; k++) { 1189 dresults[k] = 1.; 1190 } 1191 while (iter.hasNext()) { 1192 da.getDoubleArrayAbs(iter.index, dvalues); 1193 boolean okay = true; 1194 for (int k = 0; k < is; k++) { 1195 final double val = dvalues[k]; 1196 if (ignoreNaNs && Double.isNaN(val)) { 1197 okay = false; 1198 break; 1199 } 1200 if (ignoreInfs && Double.isInfinite(val)) { 1201 okay = false; 1202 break; 1203 } 1204 } 1205 if (okay) { 1206 for (int k = 0; k < is; k++) { 1207 dresults[k] *= dvalues[k]; 1208 } 1209 } 1210 } 1211 result.set(dresults, spos); 1212 } 1213 } 1214 } 1215 1216 return result; 1217 } 1218 1219 /** 1220 * @param a dataset 1221 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 1222 * @return cumulative product of items in flattened dataset 1223 * @since 2.0 1224 */ 1225 public static Dataset cumulativeProduct(final Dataset a, final boolean... ignoreInvalids) { 1226 return cumulativeProduct(a.flatten(), 0, ignoreInvalids); 1227 } 1228 1229 /** 1230 * @param a dataset 1231 * @param axis to reduce along 1232 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 1233 * @return cumulative product of items along axis in dataset 1234 * @since 2.0 1235 */ 1236 public static Dataset cumulativeProduct(final Dataset a, int axis, final boolean... ignoreInvalids) { 1237 axis = a.checkAxis(axis); 1238 int[] oshape = a.getShape(); 1239 int alen = oshape[axis]; 1240 oshape[axis] = 1; 1241 1242 final boolean ignoreNaNs; 1243 final boolean ignoreInfs; 1244 if (a.hasFloatingPointElements()) { 1245 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 1246 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 1247 } else { 1248 ignoreNaNs = false; 1249 ignoreInfs = false; 1250 } 1251 Dataset result = DatasetFactory.zeros(a); 1252 PositionIterator pi = result.getPositionIterator(axis); 1253 1254 int[] pos = pi.getPos(); 1255 1256 while (pi.hasNext()) { 1257 if (a.isComplex()) { 1258 double rv = 1, iv = 0; 1259 if (a instanceof ComplexFloatDataset) { 1260 ComplexFloatDataset af = (ComplexFloatDataset) a; 1261 ComplexFloatDataset rf = (ComplexFloatDataset) result; 1262 for (int j = 0; j < alen; j++) { 1263 if (!Double.isNaN(rv) || !Double.isNaN(iv)) { 1264 pos[axis] = j; 1265 final float r1 = af.getReal(pos); 1266 final float i1 = af.getImag(pos); 1267 if (ignoreNaNs && (Float.isNaN(r1) || Float.isNaN(i1))) { 1268 continue; 1269 } 1270 if (ignoreInfs && (Float.isInfinite(r1) || Float.isInfinite(i1))) { 1271 continue; 1272 } 1273 final double tv = r1*rv - i1*iv; 1274 iv = r1*iv + i1*rv; 1275 rv = tv; 1276 } 1277 rf.set((float) rv, (float) iv, pos); 1278 } 1279 } else if (a instanceof ComplexDoubleDataset) { 1280 ComplexDoubleDataset ad = (ComplexDoubleDataset) a; 1281 ComplexDoubleDataset rd = (ComplexDoubleDataset) result; 1282 for (int j = 0; j < alen; j++) { 1283 if (!Double.isNaN(rv) || !Double.isNaN(iv)) { 1284 pos[axis] = j; 1285 final double r1 = ad.getReal(pos); 1286 final double i1 = ad.getImag(pos); 1287 if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) { 1288 continue; 1289 } 1290 if (ignoreInfs && (Double.isInfinite(r1) || Double.isInfinite(i1))) { 1291 continue; 1292 } 1293 final double tv = r1*rv - i1*iv; 1294 iv = r1*iv + i1*rv; 1295 rv = tv; 1296 } 1297 rd.set(rv, iv, pos); 1298 } 1299 } 1300 } else { 1301 final int is; 1302 final long[] lresults; 1303 final double[] dresults; 1304 1305 if (a instanceof BooleanDataset || a instanceof ByteDataset || a instanceof ShortDataset || a instanceof IntegerDataset) { 1306 long lresult = 1; 1307 for (int j = 0; j < alen; j++) { 1308 pos[axis] = j; 1309 lresult *= a.getLong(pos); 1310 result.set(lresult, pos); 1311 } 1312 } else if (a instanceof CompoundByteDataset || a instanceof CompoundShortDataset || a instanceof CompoundIntegerDataset || a instanceof CompoundLongDataset) { 1313 is = a.getElementsPerItem(); 1314 CompoundDataset ca = (CompoundDataset) a; 1315 lresults = new long[is]; 1316 for (int k = 0; k < is; k++) { 1317 lresults[k] = 1; 1318 } 1319 for (int j = 0; j < alen; j++) { 1320 pos[axis] = j; 1321 int l = a.get1DIndex(pos); 1322 for (int k = 0; k < is; k++) { 1323 lresults[k] *= ca.getElementLongAbs(l++); 1324 } 1325 result.set(lresults, pos); 1326 } 1327 } else if (a instanceof FloatDataset || a instanceof DoubleDataset) { 1328 double dresult = 1.; 1329 for (int j = 0; j < alen; j++) { 1330 if (!Double.isNaN(dresult)) { 1331 pos[axis] = j; 1332 final double x = a.getDouble(pos); 1333 if (ignoreNaNs && Double.isNaN(x)) { 1334 continue; 1335 } 1336 if (ignoreInfs && Double.isInfinite(x)) { 1337 continue; 1338 } 1339 dresult *= x; 1340 } 1341 result.set(dresult, pos); 1342 } 1343 } else if (a instanceof CompoundFloatDataset || a instanceof CompoundDoubleDataset) { 1344 is = a.getElementsPerItem(); 1345 CompoundDataset da = (CompoundDataset) a; 1346 double[] dvalues = new double[is]; 1347 dresults = new double[is]; 1348 for (int k = 0; k < is; k++) { 1349 dresults[k] = 1.; 1350 } 1351 for (int j = 0; j < alen; j++) { 1352 pos[axis] = j; 1353 da.getDoubleArray(dvalues, pos); 1354 boolean okay = true; 1355 for (int k = 0; k < is; k++) { 1356 final double val = dvalues[k]; 1357 if (ignoreNaNs && Double.isNaN(val)) { 1358 okay = false; 1359 break; 1360 } 1361 if (ignoreInfs && Double.isInfinite(val)) { 1362 okay = false; 1363 break; 1364 } 1365 } 1366 if (okay) { 1367 for (int k = 0; k < is; k++) { 1368 dresults[k] *= dvalues[k]; 1369 } 1370 } 1371 result.set(dresults, pos); 1372 } 1373 } 1374 } 1375 } 1376 1377 return result; 1378 } 1379 1380 /** 1381 * @param a dataset 1382 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 1383 * @return cumulative sum of items in flattened dataset 1384 * @since 2.0 1385 */ 1386 public static Dataset cumulativeSum(final Dataset a, final boolean... ignoreInvalids) { 1387 return cumulativeSum(a.flatten(), 0, ignoreInvalids); 1388 } 1389 1390 /** 1391 * @param a dataset 1392 * @param axis to reduce along 1393 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 1394 * @return cumulative sum of items along axis in dataset 1395 * @since 2.0 1396 */ 1397 public static Dataset cumulativeSum(final Dataset a, int axis, final boolean... ignoreInvalids) { 1398 axis = a.checkAxis(axis); 1399 int[] oshape = a.getShape(); 1400 int alen = oshape[axis]; 1401 oshape[axis] = 1; 1402 1403 final boolean ignoreNaNs; 1404 final boolean ignoreInfs; 1405 if (a.hasFloatingPointElements()) { 1406 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 1407 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 1408 } else { 1409 ignoreNaNs = false; 1410 ignoreInfs = false; 1411 } 1412 Dataset result = DatasetFactory.zeros(a); 1413 PositionIterator pi = result.getPositionIterator(axis); 1414 1415 int[] pos = pi.getPos(); 1416 1417 while (pi.hasNext()) { 1418 1419 if (a.isComplex()) { 1420 double rv = 0, iv = 0; 1421 if (a instanceof ComplexFloatDataset) { 1422 ComplexFloatDataset af = (ComplexFloatDataset) a; 1423 ComplexFloatDataset rf = (ComplexFloatDataset) result; 1424 for (int j = 0; j < alen; j++) { 1425 if (!Double.isNaN(rv) || !Double.isNaN(iv)) { 1426 pos[axis] = j; 1427 final float r1 = af.getReal(pos); 1428 final float i1 = af.getImag(pos); 1429 if (ignoreNaNs && (Float.isNaN(r1) || Float.isNaN(i1))) { 1430 continue; 1431 } 1432 if (ignoreInfs && (Float.isInfinite(r1) || Float.isInfinite(i1))) { 1433 continue; 1434 } 1435 rv += r1; 1436 iv += i1; 1437 } 1438 rf.set((float) rv, (float) iv, pos); 1439 } 1440 } else if (a instanceof ComplexDoubleDataset) { 1441 ComplexDoubleDataset ad = (ComplexDoubleDataset) a; 1442 ComplexDoubleDataset rd = (ComplexDoubleDataset) result; 1443 for (int j = 0; j < alen; j++) { 1444 if (!Double.isNaN(rv) || !Double.isNaN(iv)) { 1445 pos[axis] = j; 1446 final double r1 = ad.getReal(pos); 1447 final double i1 = ad.getImag(pos); 1448 if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) { 1449 continue; 1450 } 1451 if (ignoreInfs && (Double.isInfinite(r1) || Double.isInfinite(i1))) { 1452 continue; 1453 } 1454 rv += r1; 1455 iv += i1; 1456 } 1457 rd.set(rv, iv, pos); 1458 } 1459 } 1460 } else { 1461 final int is; 1462 final long[] lresults; 1463 final double[] dresults; 1464 1465 if (a instanceof BooleanDataset || a instanceof ByteDataset || a instanceof ShortDataset || a instanceof IntegerDataset || a instanceof LongDataset) { 1466 long lresult = 0; 1467 for (int j = 0; j < alen; j++) { 1468 pos[axis] = j; 1469 lresult += a.getLong(pos); 1470 result.set(lresult, pos); 1471 } 1472 } else if (a instanceof CompoundByteDataset) { 1473 is = a.getElementsPerItem(); 1474 lresults = new long[is]; 1475 for (int j = 0; j < alen; j++) { 1476 pos[axis] = j; 1477 final byte[] va = (byte[]) a.getObject(pos); 1478 for (int k = 0; k < is; k++) { 1479 lresults[k] += va[k]; 1480 } 1481 result.set(lresults, pos); 1482 } 1483 } else if (a instanceof CompoundShortDataset) { 1484 is = a.getElementsPerItem(); 1485 lresults = new long[is]; 1486 for (int j = 0; j < alen; j++) { 1487 pos[axis] = j; 1488 final short[] va = (short[]) a.getObject(pos); 1489 for (int k = 0; k < is; k++) { 1490 lresults[k] += va[k]; 1491 } 1492 result.set(lresults, pos); 1493 } 1494 } else if (a instanceof CompoundIntegerDataset) { 1495 is = a.getElementsPerItem(); 1496 lresults = new long[is]; 1497 for (int j = 0; j < alen; j++) { 1498 pos[axis] = j; 1499 final int[] va = (int[]) a.getObject(pos); 1500 for (int k = 0; k < is; k++) { 1501 lresults[k] += va[k]; 1502 } 1503 result.set(lresults, pos); 1504 } 1505 } else if (a instanceof CompoundLongDataset) { 1506 is = a.getElementsPerItem(); 1507 lresults = new long[is]; 1508 for (int j = 0; j < alen; j++) { 1509 pos[axis] = j; 1510 final long[] va = (long[]) a.getObject(pos); 1511 for (int k = 0; k < is; k++) { 1512 lresults[k] += va[k]; 1513 } 1514 result.set(lresults, pos); 1515 } 1516 } else if (a instanceof FloatDataset || a instanceof DoubleDataset) { 1517 double dresult = 0.; 1518 for (int j = 0; j < alen; j++) { 1519 if (!Double.isNaN(dresult)) { 1520 pos[axis] = j; 1521 final double x = a.getDouble(pos); 1522 if (ignoreNaNs && Double.isNaN(x)) { 1523 continue; 1524 } 1525 if (ignoreInfs && Double.isInfinite(x)) { 1526 continue; 1527 } 1528 dresult += x; 1529 } 1530 result.set(dresult, pos); 1531 } 1532 } else if (a instanceof CompoundFloatDataset || a instanceof CompoundDoubleDataset) { 1533 is = a.getElementsPerItem(); 1534 CompoundDataset da = (CompoundDataset) a; 1535 double[] dvalues = new double[is]; 1536 dresults = new double[is]; 1537 for (int j = 0; j < alen; j++) { 1538 pos[axis] = j; 1539 da.getDoubleArray(dvalues, pos); 1540 boolean okay = true; 1541 for (int k = 0; k < is; k++) { 1542 final double val = dvalues[k]; 1543 if (ignoreNaNs && Double.isNaN(val)) { 1544 okay = false; 1545 break; 1546 } 1547 if (ignoreInfs && Double.isInfinite(val)) { 1548 okay = false; 1549 break; 1550 } 1551 } 1552 if (okay) { 1553 for (int k = 0; k < is; k++) { 1554 dresults[k] += dvalues[k]; 1555 } 1556 } 1557 result.set(dresults, pos); 1558 } 1559 } 1560 } 1561 } 1562 1563 return result; 1564 } 1565 1566 /** 1567 * @param a dataset 1568 * @return average deviation value of all items the dataset 1569 */ 1570 public static Object averageDeviation(final Dataset a) { 1571 final IndexIterator it = a.getIterator(); 1572 final int is = a.getElementsPerItem(); 1573 1574 if (is == 1) { 1575 double mean = (Double) a.mean(); 1576 double sum = 0.0; 1577 1578 while (it.hasNext()) { 1579 sum += Math.abs(a.getElementDoubleAbs(it.index) - mean); 1580 } 1581 1582 return sum / a.getSize(); 1583 } 1584 1585 double[] means = (double[]) a.mean(); 1586 double[] sums = new double[is]; 1587 1588 while (it.hasNext()) { 1589 for (int j = 0; j < is; j++) 1590 sums[j] += Math.abs(a.getElementDoubleAbs(it.index + j) - means[j]); 1591 } 1592 1593 double n = a.getSize(); 1594 for (int j = 0; j < is; j++) 1595 sums[j] /= n; 1596 1597 return sums; 1598 } 1599 1600 /** 1601 * The residual is the sum of squared differences 1602 * @param a first dataset 1603 * @param b second dataset 1604 * @return residual value 1605 */ 1606 public static double residual(final Dataset a, final Dataset b) { 1607 return a.residual(b); 1608 } 1609 1610 /** 1611 * The residual is the sum of squared differences 1612 * @param a first dataset 1613 * @param b second dataset 1614 * @param w weight dataset 1615 * @return residual value 1616 */ 1617 public static double weightedResidual(final Dataset a, final Dataset b, final Dataset w) { 1618 return a.residual(b, w, false); 1619 } 1620 1621 /** 1622 * Calculate approximate outlier values. These are defined as the values in the dataset 1623 * that are approximately below and above the given thresholds - in terms of percentages 1624 * of dataset size. 1625 * <p> 1626 * It approximates by limiting the number of items (given by length) used internally by 1627 * data structures - the larger this is, the more accurate will those outlier values become. 1628 * The actual thresholds used are returned in the array. 1629 * <p> 1630 * Also, the low and high values will be made distinct if possible by adjusting the thresholds 1631 * @param a dataset 1632 * @param lo percentage threshold for lower limit 1633 * @param hi percentage threshold for higher limit 1634 * @param length maximum number of items used internally, if negative, then unlimited 1635 * @return double array with low and high values, and low and high percentage thresholds 1636 */ 1637 public static double[] outlierValues(final Dataset a, double lo, double hi, final int length) { 1638 return outlierValues(a, null, true, lo, hi, length); 1639 } 1640 1641 /** 1642 * Calculate approximate outlier values. These are defined as the values in the dataset 1643 * that are approximately below and above the given thresholds - in terms of percentages 1644 * of dataset size. 1645 * <p> 1646 * It approximates by limiting the number of items (given by length) used internally by 1647 * data structures - the larger this is, the more accurate will those outlier values become. 1648 * The actual thresholds used are returned in the array. 1649 * <p> 1650 * Also, the low and high values will be made distinct if possible by adjusting the thresholds 1651 * @param a dataset 1652 * @param mask can be null 1653 * @param value value of mask to match to include for calculation 1654 * @param lo percentage threshold for lower limit 1655 * @param hi percentage threshold for higher limit 1656 * @param length maximum number of items used internally, if negative, then unlimited 1657 * @return double array with low and high values, and low and high percentage thresholds 1658 * @since 2.1 1659 */ 1660 public static double[] outlierValues(final Dataset a, final Dataset mask, final boolean value, double lo, double hi, final int length) { 1661 if (lo <= 0 || hi <= 0 || lo >= hi || hi >= 100 || Double.isNaN(lo)|| Double.isNaN(hi)) { 1662 throw new IllegalArgumentException("Thresholds must be between (0,100) and in order"); 1663 } 1664 final int size = a.getSize(); 1665 int nl = Math.max((int) ((lo*size)/100), 1); 1666 if (length > 0 && nl > length) 1667 nl = length; 1668 int nh = Math.max((int) (((100-hi)*size)/100), 1); 1669 if (length > 0 && nh > length) 1670 nh = length; 1671 1672 IndexIterator it = mask == null ? a.getIterator() : a.getBooleanIterator(mask, value); 1673 double[] results = Math.max(nl, nh) > 640 ? outlierValuesMap(a, it, nl, nh) : outlierValuesList(a, it, nl, nh); 1674 1675 results[2] = results[2]*100./size; 1676 results[3] = 100. - results[3]*100./size; 1677 return results; 1678 } 1679 1680 static double[] outlierValuesMap(final Dataset a, final IndexIterator it, int nl, int nh) { 1681 final TreeMap<Double, Integer> lMap = new TreeMap<Double, Integer>(); 1682 final TreeMap<Double, Integer> hMap = new TreeMap<Double, Integer>(); 1683 1684 int ml = 0; 1685 int mh = 0; 1686 while (it.hasNext()) { 1687 Double x = a.getElementDoubleAbs(it.index); 1688 if (Double.isNaN(x)) { 1689 continue; 1690 } 1691 Integer i; 1692 if (ml == nl) { 1693 Double k = lMap.lastKey(); 1694 if (x < k) { 1695 i = lMap.get(k) - 1; 1696 if (i == 0) { 1697 lMap.remove(k); 1698 } else { 1699 lMap.put(k, i); 1700 } 1701 i = lMap.get(x); 1702 if (i == null) { 1703 lMap.put(x, 1); 1704 } else { 1705 lMap.put(x, i + 1); 1706 } 1707 } 1708 } else { 1709 i = lMap.get(x); 1710 if (i == null) { 1711 lMap.put(x, 1); 1712 } else { 1713 lMap.put(x, i + 1); 1714 } 1715 ml++; 1716 } 1717 1718 if (mh == nh) { 1719 Double k = hMap.firstKey(); 1720 if (x > k) { 1721 i = hMap.get(k) - 1; 1722 if (i == 0) { 1723 hMap.remove(k); 1724 } else { 1725 hMap.put(k, i); 1726 } 1727 i = hMap.get(x); 1728 if (i == null) { 1729 hMap.put(x, 1); 1730 } else { 1731 hMap.put(x, i+1); 1732 } 1733 } 1734 } else { 1735 i = hMap.get(x); 1736 if (i == null) { 1737 hMap.put(x, 1); 1738 } else { 1739 hMap.put(x, i+1); 1740 } 1741 mh++; 1742 } 1743 } 1744 1745 // Attempt to make values distinct 1746 double lx = lMap.lastKey(); 1747 double hx = hMap.firstKey(); 1748 if (lx >= hx) { 1749 Double h = hMap.higherKey(lx); 1750 if (h != null) { 1751 hx = h; 1752 mh--; 1753 } else { 1754 Double l = lMap.lowerKey(hx); 1755 if (l != null) { 1756 lx = l; 1757 ml--; 1758 } 1759 } 1760 1761 } 1762 return new double[] {lMap.lastKey(), hMap.firstKey(), ml, mh}; 1763 } 1764 1765 static double[] outlierValuesList(final Dataset a, final IndexIterator it, int nl, int nh) { 1766 final List<Double> lList = new ArrayList<Double>(nl); 1767 final List<Double> hList = new ArrayList<Double>(nh); 1768 1769 double lx = Double.POSITIVE_INFINITY; 1770 double hx = Double.NEGATIVE_INFINITY; 1771 1772 while (it.hasNext()) { 1773 double x = a.getElementDoubleAbs(it.index); 1774 if (Double.isNaN(x)) { 1775 continue; 1776 } 1777 if (x < lx) { 1778 if (lList.size() == nl) { 1779 lList.remove(lx); 1780 } 1781 lList.add(x); 1782 lx = Collections.max(lList); 1783 } else if (x == lx) { 1784 if (lList.size() < nl) { 1785 lList.add(x); 1786 } 1787 } 1788 1789 if (x > hx) { 1790 if (hList.size() == nh) { 1791 hList.remove(hx); 1792 } 1793 hList.add(x); 1794 hx = Collections.min(hList); 1795 } else if (x == hx) { 1796 if (hList.size() < nh) { 1797 hList.add(x); 1798 } 1799 } 1800 } 1801 1802 nl = lList.size(); 1803 nh = hList.size(); 1804 1805 // Attempt to make values distinct 1806 if (lx >= hx) { 1807 Collections.sort(hList); 1808 for (double h : hList) { 1809 if (h > hx) { 1810 hx = h; 1811 break; 1812 } 1813 nh--; 1814 } 1815 if (lx >= hx) { 1816 Collections.sort(lList); 1817 Collections.reverse(lList); 1818 for (double l : lList) { 1819 if (l < lx) { 1820 lx = l; 1821 break; 1822 } 1823 nl--; 1824 } 1825 } 1826 } 1827 return new double[] {lx, hx, nl, nh}; 1828 } 1829 1830 /** 1831 * See {@link #covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof)} 1832 * with b = null, rowvar = true, bias = false and ddof = null. 1833 * @param a dataset containing multiple variable and observations. Each row represents a variable, each column an observation. 1834 * @return covariance array of a 1835 */ 1836 public static Dataset covariance(final Dataset a) { 1837 return covariance(a, true, false, null); 1838 } 1839 1840 /** 1841 * See {@link #covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof)} 1842 * with b = null. 1843 * @param a dataset containing multiple variable and observations. Each row represents a variable, each column an observation. 1844 * @param rowvar When true (default), each row is a variable; when false each column is a variable. 1845 * @param bias Default normalisation is (N - 1) - N is number of observations. If set true, normalisation is (N). 1846 * @param ddof Default normalisation is (N - 1). If ddof is set, then normalisation is (N - ddof). 1847 * @return covariance array of a 1848 * @since 2.0 1849 */ 1850 public static Dataset covariance(final Dataset a, boolean rowvar, boolean bias, Integer ddof) { 1851 return covariance(a, null, rowvar, bias, ddof); 1852 } 1853 1854 /** 1855 * See {@link #covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof)} 1856 * with b = null, rowvar = true, bias = false and ddof = null 1857 * @param a dataset containing multiple variable and observations. Each row represents a variable, each column an observation. 1858 * @param b An extra set of variables and observations. Must be of same type as a and have a compatible shape. 1859 * @return covariance array of a concatenated with b 1860 */ 1861 public static Dataset covariance(final Dataset a, final Dataset b) { 1862 return covariance(a, b, true, false, null); 1863 } 1864 1865 /** 1866 * Calculate the covariance matrix (array) of a concatenated with b. This method is 1867 * directly based on the implementation in numpy (cov). 1868 * @param a dataset containing multiple variable and observations. Each row represents a variable, each column an observation. 1869 * @param b An extra set of variables and observations. Must be of same type as a and have a compatible shape. 1870 * @param rowvar When true (default), each row is a variable; when false each column is a variable. 1871 * @param bias Default normalisation is (N - 1) - N is number of observations. If set true, normalisation is (N). 1872 * @param ddof Default normalisation is (N - 1). If ddof is set, then normalisation is (N - ddof). 1873 * @return covariance array of a concatenated with b 1874 * @since 2.0 1875 */ 1876 public static Dataset covariance(final Dataset a, final Dataset b, boolean rowvar, boolean bias, Integer ddof) { 1877 1878 //Create a working copy of the dataset & check its rank. 1879 Dataset vars = a.clone(); 1880 if (a.getRank() == 1) { 1881 vars.setShape(1, a.getShapeRef()[0]); 1882 } 1883 1884 //1D of variables, so consider rows as variables 1885 if (vars.getShapeRef()[0] == 1) { 1886 rowvar = true; 1887 } 1888 1889 //nr is the number of records; axis is index 1890 int nr, axis; 1891 if (rowvar) { 1892 nr = vars.getShapeRef()[1]; 1893 axis = 0; 1894 } else { 1895 nr = vars.getShapeRef()[0]; 1896 axis = 1; 1897 } 1898 1899 //Set the reduced degrees of freedom & normalisation factor 1900 if (ddof == null) { 1901 if (bias == false) { 1902 ddof = 1; 1903 } else { 1904 ddof = 0; 1905 } 1906 } 1907 double norm_fact = nr - ddof; 1908 if (norm_fact <= 0.) { 1909 //TODO Some sort of warning here? 1910 norm_fact = 0.; 1911 } 1912 1913 //Concatenate additional set of variables with main set 1914 if (b != null) { 1915 //Create a working copy of the dataset & check its rank. 1916 Dataset extraVars = b.clone(); 1917 if (b.getRank() == 1) { 1918 extraVars.setShape(1, a.getShapeRef()[0]); 1919 } 1920 vars = DatasetUtils.concatenate(new Dataset[]{vars, extraVars}, axis); 1921 } 1922 1923 //Calculate deviations & covariance matrix 1924 Dataset varsMean = vars.mean(1-axis, false); 1925 //-vars & varsMean need same shape (this is a hack!) 1926 int[] meanShape = vars.getShape(); 1927 meanShape[1-axis] = 1; 1928 varsMean.setShape(meanShape); 1929 vars.isubtract(varsMean); 1930 Dataset cov; 1931 if (rowvar) { 1932 cov = Maths.divide(LinearAlgebra.dotProduct(vars, Maths.conjugate(vars.transpose())), norm_fact).squeeze(); 1933 } else { 1934 cov = Maths.divide(LinearAlgebra.dotProduct(vars.transpose(), Maths.conjugate(vars)), norm_fact).squeeze(); 1935 } 1936 return cov; 1937 } 1938}