public static void shuffleArray(int[] array, long rngSeed) { shuffleArray(array, new Random(rngSeed)); }
/** * Discretize the given value * * @param value the value to discretize * @param min the min of the distribution * @param max the max of the distribution * @param binCount the number of bins * @return the discretized value */ public static int discretize(double value, double min, double max, int binCount) { int discreteValue = (int) (binCount * normalize(value, min, max)); return clamp(discreteValue, 0, binCount - 1); }
/** * This returns the sum of products for the given * numbers. * * @param nums the sum of products for the give numbers * @return the sum of products for the given numbers */ public static double sumOfProducts(double[]... nums) { if (nums == null || nums.length < 1) return 0; double sum = 0; for (int i = 0; i < nums.length; i++) { /* The ith column for all of the rows */ double[] column = column(i, nums); sum += times(column); } return sum; }//end sumOfProducts
/** * This returns the minimized loss values for a given vector. * It is assumed that the x, y pairs are at * vector[i], vector[i+1] * * @param vector the vector of numbers to getFromOrigin the weights for * @return a double array with w_0 and w_1 are the associated indices. */ public static double[] weightsFor(double[] vector) { /* split coordinate system */ List<double[]> coords = coordSplit(vector); /* x vals */ double[] x = coords.get(0); /* y vals */ double[] y = coords.get(1); double meanX = sum(x) / x.length; double meanY = sum(y) / y.length; double sumOfMeanDifferences = sumOfMeanDifferences(x, y); double xDifferenceOfMean = sumOfMeanDifferencesOnePoint(x); double w_1 = sumOfMeanDifferences / xDifferenceOfMean; double w_0 = meanY - (w_1) * meanX; double[] ret = new double[vector.length]; ret[0] = w_0; ret[1] = w_1; return ret; }//end weightsFor
@Override public void reset() { cursor = 0; curr = null; if (shuffle) { if((train && numExamples < NUM_EXAMPLES) || (!train && numExamples < NUM_EXAMPLES_TEST)){ //Shuffle only first N elements if(firstShuffle){ MathUtils.shuffleArray(order, rng); firstShuffle = false; } else { MathUtils.shuffleArraySubset(order, numExamples, rng); } } else { MathUtils.shuffleArray(order, rng); } } }
/** * This returns the permutation of n choose r. * * @param n the n to choose * @param r the number of elements to choose * @return the permutation of these numbers */ public static double permutation(double n, double r) { double nFac = MathUtils.factorial(n); double nMinusRFac = MathUtils.factorial((n - r)); return nFac / nMinusRFac; }//end permutation
/** * This returns the determination coefficient of two vectors given a length * * @param y1 the first vector * @param y2 the second vector * @param n the length of both vectors * @return the determination coefficient or r^2 */ public static double determinationCoefficient(double[] y1, double[] y2, int n) { return Math.pow(correlation(y1, y2), 2); }
/** * This will return the bernoulli trial for the given event. * A bernoulli trial is a mechanism for detecting the probability * of a given event occurring k times in n independent trials * * @param n the number of trials * @param k the number of times the target event occurs * @param successProb the probability of the event happening * @return the probability of the given event occurring k times. */ public static double bernoullis(double n, double k, double successProb) { double combo = MathUtils.combination(n, k); double p = successProb; double q = 1 - successProb; return combo * Math.pow(p, k) * Math.pow(q, n - k); }//end bernoullis
/** * This returns the minimized loss values for a given vector. * It is assumed that the x, y pairs are at * vector[i], vector[i+1] * * @param vector the vector of numbers to getFromOrigin the weights for * @return a double array with w_0 and w_1 are the associated indices. */ public static double[] weightsFor(List<Double> vector) { /* split coordinate system */ List<double[]> coords = coordSplit(vector); /* x vals */ double[] x = coords.get(0); /* y vals */ double[] y = coords.get(1); double meanX = sum(x) / x.length; double meanY = sum(y) / y.length; double sumOfMeanDifferences = sumOfMeanDifferences(x, y); double xDifferenceOfMean = sumOfMeanDifferencesOnePoint(x); double w_1 = sumOfMeanDifferences / xDifferenceOfMean; double w_0 = meanY - (w_1) * meanX; //double w_1=(n*sumOfProducts(x,y) - sum(x) * sum(y))/(n*sumOfSquares(x) - Math.pow(sum(x),2)); // double w_0=(sum(y) - (w_1 * sum(x)))/n; double[] ret = new double[vector.size()]; ret[0] = w_0; ret[1] = w_1; return ret; }//end weightsFor
/** * This returns the combination of n choose r * * @param n the number of elements overall * @param r the number of elements to choose * @return the amount of possible combinations for this applyTransformToDestination of elements */ public static double combination(double n, double r) { double nFac = MathUtils.factorial(n); double rFac = MathUtils.factorial(r); double nMinusRFac = MathUtils.factorial((n - r)); return nFac / (rFac * nMinusRFac); }//end combination
/** * This returns the determination coefficient of two vectors given a length * * @param y1 the first vector * @param y2 the second vector * @param n the length of both vectors * @return the determination coefficient or r^2 */ public static double determinationCoefficient(double[] y1, double[] y2, int n) { return Math.pow(correlation(y1, y2), 2); }
/** * This will return the bernoulli trial for the given event. * A bernoulli trial is a mechanism for detecting the probability * of a given event occurring k times in n independent trials * * @param n the number of trials * @param k the number of times the target event occurs * @param successProb the probability of the event happening * @return the probability of the given event occurring k times. */ public static double bernoullis(double n, double k, double successProb) { double combo = MathUtils.combination(n, k); double p = successProb; double q = 1 - successProb; return combo * Math.pow(p, k) * Math.pow(q, n - k); }//end bernoullis
private JavaRDD<String> listPathsSubset(JavaSparkContext sc, String path, int max, int rngSeed) throws IOException { Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(URI.create(path), config); RemoteIterator<LocatedFileStatus> fileIter = hdfs.listFiles(new org.apache.hadoop.fs.Path(path), true); List<String> paths = new ArrayList<>(); while (fileIter.hasNext()) { String filePath = fileIter.next().getPath().toString(); paths.add(filePath); } //Now, get a consistent random subset - assuming here that file listing isn't consistent Collections.sort(paths); int[] arr = new int[paths.size()]; for( int i=0; i<arr.length ; i++){ arr[i] = i; } MathUtils.shuffleArray(arr, rngSeed); List<String> out = new ArrayList<>(); for( int i=0; i<arr.length && i < max; i++ ){ out.add(paths.get(arr[i])); } return sc.parallelize(out); } }
/** * This returns the minimized loss values for a given vector. * It is assumed that the x, y pairs are at * vector[i], vector[i+1] * * @param vector the vector of numbers to getFromOrigin the weights for * @return a double array with w_0 and w_1 are the associated indices. */ public static double[] weightsFor(double[] vector) { /* split coordinate system */ List<double[]> coords = coordSplit(vector); /* x vals */ double[] x = coords.get(0); /* y vals */ double[] y = coords.get(1); double meanX = sum(x) / x.length; double meanY = sum(y) / y.length; double sumOfMeanDifferences = sumOfMeanDifferences(x, y); double xDifferenceOfMean = sumOfMeanDifferencesOnePoint(x); double w_1 = sumOfMeanDifferences / xDifferenceOfMean; double w_0 = meanY - (w_1) * meanX; double[] ret = new double[vector.length]; ret[0] = w_0; ret[1] = w_1; return ret; }//end weightsFor
/** * Discretize the given value * * @param value the value to discretize * @param min the min of the distribution * @param max the max of the distribution * @param binCount the number of bins * @return the discretized value */ public static int discretize(double value, double min, double max, int binCount) { int discreteValue = (int) (binCount * normalize(value, min, max)); return clamp(discreteValue, 0, binCount - 1); }
/** * This returns the sum of products for the given * numbers. * * @param nums the sum of products for the give numbers * @return the sum of products for the given numbers */ public static double sumOfProducts(double[]... nums) { if (nums == null || nums.length < 1) return 0; double sum = 0; for (int i = 0; i < nums.length; i++) { /* The ith column for all of the rows */ double[] column = column(i, nums); sum += times(column); } return sum; }//end sumOfProducts
/** * This returns the permutation of n choose r. * * @param n the n to choose * @param r the number of elements to choose * @return the permutation of these numbers */ public static double permutation(double n, double r) { double nFac = MathUtils.factorial(n); double nMinusRFac = MathUtils.factorial((n - r)); return nFac / nMinusRFac; }//end permutation
public DataSetIterator getDataIterator(String dataRootDir, boolean train, int totalExamples, int batchSize, int seed) { File root = new File(dataRootDir, train ? "train" : "test"); List<String> all = new ArrayList<>(); File[] files = root.listFiles(); if(files == null || files.length == 0){ throw new IllegalStateException("Did not find files in directory " + root.getAbsolutePath()); } for(File f : files){ all.add(f.getAbsolutePath()); } Collections.sort(all); int totalBatches = (totalExamples < 0 ? -1 : totalExamples / batchSize); if(totalBatches > 0 && totalBatches < all.size()){ Random r = new Random(seed); int[] order = new int[all.size()]; for( int i=0; i<order.length; i++ ){ order[i] = i; } MathUtils.shuffleArray(order, r); List<String> from = all; all = new ArrayList<>(); for( int i=0; i<totalBatches; i++ ){ all.add(from.get(order[i])); } } Loader<DataSet> loader = new LoadDataSetsFunction(w2vPath, PatentLabelGenerator.classLabelToIndex().size(), 300); return new DataSetLoaderIterator(all, loader, new LocalFileSourceFactory()); } }
/** * This returns the minimized loss values for a given vector. * It is assumed that the x, y pairs are at * vector[i], vector[i+1] * * @param vector the vector of numbers to getFromOrigin the weights for * @return a double array with w_0 and w_1 are the associated indices. */ public static double[] weightsFor(List<Double> vector) { /* split coordinate system */ List<double[]> coords = coordSplit(vector); /* x vals */ double[] x = coords.get(0); /* y vals */ double[] y = coords.get(1); double meanX = sum(x) / x.length; double meanY = sum(y) / y.length; double sumOfMeanDifferences = sumOfMeanDifferences(x, y); double xDifferenceOfMean = sumOfMeanDifferencesOnePoint(x); double w_1 = sumOfMeanDifferences / xDifferenceOfMean; double w_0 = meanY - (w_1) * meanX; //double w_1=(n*sumOfProducts(x,y) - sum(x) * sum(y))/(n*sumOfSquares(x) - Math.pow(sum(x),2)); // double w_0=(sum(y) - (w_1 * sum(x)))/n; double[] ret = new double[vector.size()]; ret[0] = w_0; ret[1] = w_1; return ret; }//end weightsFor
/** * This returns the combination of n choose r * * @param n the number of elements overall * @param r the number of elements to choose * @return the amount of possible combinations for this applyTransformToDestination of elements */ public static double combination(double n, double r) { double nFac = MathUtils.factorial(n); double rFac = MathUtils.factorial(r); double nMinusRFac = MathUtils.factorial((n - r)); return nFac / (rFac * nMinusRFac); }//end combination