Refine search
private void outputDimRangeInfo(List<Tuple2<String, Tuple3<Writable, Writable, String>>> result) { if (col != null && minValue != null) { // output written to baseDir/colName/colName.dci-r-00000 (etc) String dimRangeFileName = col.getIdentity() + "/" + col.getName() + DIMENSION_COL_INFO_FILE_POSTFIX; result.add(new Tuple2<String, Tuple3<Writable, Writable, String>>(BatchConstants.CFG_OUTPUT_PARTITION, new Tuple3<Writable, Writable, String>(NullWritable.get(), new Text(minValue.getBytes(StandardCharsets.UTF_8)), dimRangeFileName))); result.add(new Tuple2<String, Tuple3<Writable, Writable, String>>(BatchConstants.CFG_OUTPUT_PARTITION, new Tuple3<Writable, Writable, String>(NullWritable.get(), new Text(maxValue.getBytes(StandardCharsets.UTF_8)), dimRangeFileName))); logger.info("write dimension range info for col : {} minValue:{} maxValue:{}", col.getName(), minValue, maxValue); } }
@SuppressWarnings("unchecked") @Test public void cogroup3() { JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Apples", "Fruit"), new Tuple2<>("Oranges", "Fruit"), new Tuple2<>("Oranges", "Citrus") )); JavaPairRDD<String, Integer> prices = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 2), new Tuple2<>("Apples", 3) )); JavaPairRDD<String, Integer> quantities = sc.parallelizePairs(Arrays.asList( new Tuple2<>("Oranges", 21), new Tuple2<>("Apples", 42) )); JavaPairRDD<String, Tuple3<Iterable<String>, Iterable<Integer>, Iterable<Integer>>> cogrouped = categories.cogroup(prices, quantities); assertEquals("[Fruit, Citrus]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._1())); assertEquals("[2]", Iterables.toString(cogrouped.lookup("Oranges").get(0)._2())); assertEquals("[42]", Iterables.toString(cogrouped.lookup("Apples").get(0)._3())); cogrouped.collect(); }
@Test public void runImplicitALSUsingStaticMethods() { int features = 1; int iterations = 15; int users = 80; int products = 160; Tuple3<List<Rating>, double[], double[]> testData = ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, false); JavaRDD<Rating> data = jsc.parallelize(testData._1()); MatrixFactorizationModel model = ALS.trainImplicit(data.rdd(), features, iterations); validatePrediction(model, users, products, testData._2(), 0.4, true, testData._3()); }
JavaSparkContext sc = new JavaSparkContext(sparkConf); logs = sc.textFile(args[0]); logs = sc.parallelize(EXAMPLE_LOGS); logs.mapToPair((String logRecord) -> { String[] tokens = logRecord.split(","); Tuple3<String, String, String> key = Util.createKey(tokens); extracted.filter((Tuple2<Tuple3<String, String, String>, LogStatistics> s) -> { Tuple3<String, String, String> t3 = s._1; return (t3._1() != null); // exclude Tuple3(null,null,null) });
JavaSparkContext ctx = new JavaSparkContext(); JavaRDD<String> transactions = ctx.textFile(transactionsFileName, 1); transactions.saveAsTextFile("/rules/output/1"); transactions.flatMapToPair((String transaction) -> { List<String> list = Util.toList(transaction); List<List<String>> combinations = Combination.findSortedCombinations(list); List<String> t2List = new ArrayList<String>(t2._1); t2List.removeAll(fromList); result.add(new Tuple3(fromList, t2List, confidence)); assocRules.saveAsTextFile("/rules/output/6"); ctx.close();
JavaRDD<String> lines = ctx.textFile(inputPath); JavaPairRDD<Long,Long> edges = lines.flatMapToPair((String s) -> { String[] nodes = s.split(" "); long start = Long.parseLong(nodes[0]); long[] aTraingle = {key._1, key._2, node}; Arrays.sort(aTraingle); Tuple3<Long,Long,Long> t3 = new Tuple3<Long,Long,Long>(aTraingle[0], aTraingle[1], aTraingle[2]); List<Tuple3<Long,Long,Long>> debug4 = trianglesWithDuplicates.collect(); for (Tuple3<Long,Long,Long> t3 : debug4) { JavaRDD<Tuple3<Long,Long,Long>> uniqueTriangles = trianglesWithDuplicates.distinct(); ctx.close();
LinearIndexer constraintOffset = new LinearIndexer(0); JavaPairRDD<Long, FieldT> aRDD = JavaPairRDD.fromJavaRDD(config.sparkContext().parallelize(A, numPartitions)); JavaPairRDD<Long, FieldT> bRDD = JavaPairRDD.fromJavaRDD(config.sparkContext().parallelize(B, numPartitions)); constraintOffset); final R1CSConstraintsRDD<FieldT> constraints = ret._1(); JavaPairRDD<Long, FieldT> oneFullAssignment = ret._2(); oneFullAssignment = oneFullAssignment.union(config.sparkContext().parallelizePairs(Collections.singletonList(new Tuple2<>((long) 0, one)))); numAuxiliary); return new Tuple3<>(r1cs, primary, oneFullAssignment);
@Test public void runRecommend() { int features = 5; int iterations = 10; int users = 200; int products = 50; List<Rating> testData = ALSSuite.generateRatingsAsJava( users, products, features, 0.7, true, false)._1(); JavaRDD<Rating> data = jsc.parallelize(testData); MatrixFactorizationModel model = new ALS().setRank(features) .setIterations(iterations) .setImplicitPrefs(true) .setSeed(8675309L) .run(data.rdd()); validateRecommendations(model.recommendProducts(1, 10), 10); validateRecommendations(model.recommendUsers(1, 20), 20); }
JavaPairRDD<Long, LinearTerm<FieldT>> ALCXm = config.sparkContext().parallelize(intList, numPartitions).flatMapToPair(blockNumber -> { return xMinusMeanConstraintsHelper(fieldFactory, ConstraintType.constraintA, outputAssignmentIndexer, xOffset, constraintOffset, JavaPairRDD<Long, LinearTerm<FieldT>> ALCMean = config.sparkContext().parallelize(intList, numPartitions).flatMapToPair(blockNumber -> { return xMinusMeanConstraintsHelper(fieldFactory, ConstraintType.constraintA, newMeanOffset, xOffset, constraintOffset, }); JavaPairRDD<Long, LinearTerm<FieldT>> BLC = config.sparkContext().parallelize(intList, numPartitions).flatMapToPair(blockNumber -> { return xMinusMeanConstraintsHelper(fieldFactory, ConstraintType.constraintB, newMeanOffset, xOffset, constraintOffset, xm2OutputAssignmentIndexer, xm2ConstraintOffset); constraints.union(xm2._1()); long xm2NumAssignments = xm2._3(); long finalConstraintOffsetNumber = xm2ConstraintOffset.getIndex(0) + xm2._1().size(); LinearIndexer finalConstraintOffset = new LinearIndexer(finalConstraintOffsetNumber); R1CSConstraintsRDD<FieldT> finalConstraints = equalityConstraintGen(fieldFactory, config, JavaPairRDD<Long, FieldT> oneFullAssignment = xm.union(xm2._2()).union(covN); return new Tuple3<>(constraints, oneFullAssignment, numAssignments);
Tuple3<Long, int[], double[]> topTopics = model.javaTopTopicsPerDocument(3).first(); Long docId = topTopics._1(); // confirm doc ID type int[] topicIndices = topTopics._2(); double[] topicWeights = topTopics._3(); assertEquals(3, topicIndices.length); assertEquals(3, topicWeights.length); Tuple3<Long, int[], int[]> topicAssignment = model.javaTopicAssignments().first(); Long docId2 = topicAssignment._1(); int[] termIndices2 = topicAssignment._2(); int[] topicIndices2 = topicAssignment._3(); assertEquals(termIndices2.length, topicIndices2.length);
.parallelize(partitions, numPartitions).flatMapToPair(part -> { final long partSize = (part == numPartitions && totalSize % 2 != 0) ? totalSize % (totalSize / numPartitions) : totalSize / numPartitions; .parallelize(partitions, numPartitions).flatMapToPair(part -> { final long partSize = (part == numPartitions && totalSize % 2 != 0) ? totalSize % (totalSize / numPartitions) : totalSize / numPartitions; .parallelize(partitions, numPartitions).flatMapToPair(part -> { final long partSize = (part == numPartitions && totalSize % 2 != 0) ? totalSize % (totalSize / numPartitions) : totalSize / numPartitions; assert (r1cs.isSatisfied(primary, oneFullAssignment)); return new Tuple3<>(r1cs, primary, oneFullAssignment);
private void clear() { /* Format of element: (gate, leftTraversed, rightTraversed). */ Stack<Tuple3<Gate<FieldT>, Boolean, Boolean>> traversalStack = new Stack<>(); traversalStack.push(new Tuple3<>(this.resultGate, false, false)); HashSet<Gate<FieldT>> visited = new HashSet<>(); visited.add(this.resultGate); while (traversalStack.size() > 0) { Tuple3<Gate<FieldT>, Boolean, Boolean> top = traversalStack.pop(); Gate<FieldT> topGate = top._1(); boolean leftTraversed = top._2(); boolean rightTraversed = top._3(); if (topGate.left != null && !visited.contains(topGate.left) && !leftTraversed) { traversalStack.push(new Tuple3<>(topGate, true, rightTraversed)); traversalStack.push(new Tuple3<>(topGate.left, false, false)); } else if (topGate.right != null && !visited.contains(topGate.right) && !rightTraversed) { traversalStack.push(new Tuple3<>(topGate, leftTraversed, true)); traversalStack.push(new Tuple3<>(topGate.right, false, false)); } else { topGate.clear(); } } }
.parallelize(partitions, numPartitions).flatMapToPair(part -> { final long partSize = part == numPartitions ? totalSize % (totalSize / numPartitions) : totalSize / numPartitions; }); JavaPairRDD<Long, LinearTerm<FieldT>> linearCombinationB = config.sparkContext() .parallelize(partitions, numPartitions).flatMapToPair(part -> { final long partSize = part == numPartitions ? totalSize % (totalSize / numPartitions) : totalSize / numPartitions; }); JavaPairRDD<Long, LinearTerm<FieldT>> linearCombinationC = config.sparkContext() .parallelize(partitions, numPartitions).flatMapToPair(part -> { final long partSize = part == numPartitions ? totalSize % (totalSize / numPartitions) : totalSize / numPartitions; assert (r1cs.isSatisfied(primary, oneFullAssignment)); return new Tuple3<>(r1cs, primary, oneFullAssignment);
JavaPairRDD<Long, FieldT> X = JavaPairRDD.fromJavaRDD(config.sparkContext().parallelize(XList, numPartitions)); JavaPairRDD<Long, FieldT> mean = JavaPairRDD.fromJavaRDD(config.sparkContext().parallelize(meanList, numPartitions)); JavaPairRDD<Long, FieldT> cov = JavaPairRDD.fromJavaRDD(config.sparkContext().parallelize(covList, numPartitions)); return new Tuple3<R1CSRelationRDD<FieldT>, Assignment<FieldT>, JavaPairRDD<Long, FieldT>>(r1cs, primary, oneFullAssignment);
public Boolean call(Tuple2<Tuple3<String, String, String>, LogStatistics> s) { Tuple3<String, String, String> t3 = s._1; return (t3._1() != null); // exclude Tuple3(null,null,null) } });
/** * Fetch last offset available. * * @return the last available offset if any. */ public Optional<Offset> lastOffset() { Lock lock = rwLock.readLock(); try { lock.lock(); return Optional.ofNullable(buffer.isEmpty() ? null : buffer.getLast()._2()); } finally { lock.unlock(); } }
@Test public void runImplicitALSUsingStaticMethods() { int features = 1; int iterations = 15; int users = 80; int products = 160; Tuple3<List<Rating>, double[], double[]> testData = ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, false); JavaRDD<Rating> data = jsc.parallelize(testData._1()); MatrixFactorizationModel model = ALS.trainImplicit(data.rdd(), features, iterations); validatePrediction(model, users, products, testData._2(), 0.4, true, testData._3()); }
JavaPairRDD<Long, FieldT> X = JavaPairRDD.fromJavaRDD(config.sparkContext().parallelize(XList, numPartitions)); JavaPairRDD<Long, FieldT> w = JavaPairRDD.fromJavaRDD(config.sparkContext().parallelize(wList, numPartitions)); JavaPairRDD<Long, FieldT> y = JavaPairRDD.fromJavaRDD(config.sparkContext().parallelize(yList, numPartitions)); JavaPairRDD<Long, FieldT> oneFullAssignment = config.sparkContext().parallelizePairs(Collections.singletonList(new Tuple2<>((long) 0, one))); xtIndexer, xIndexer, outputOffsetX2, constraintOffsetX2); R1CSConstraintsRDD<FieldT> X2Constraints = retX2._1(); JavaPairRDD<Long, FieldT> X2Assignments = retX2._2(); long rhsAssignmentsOffset = assignmentOffset + retX2._3(); bd, bd, 1, d, d, 1, RHSBlockIndexer, wIndexer, outputOffsetRHS, constraintOffsetRHS); R1CSConstraintsRDD<FieldT> RHSConstraints = retRHS._1(); JavaPairRDD<Long, FieldT> RHSAssignments = retRHS._2(); long lhsAssignmentsOffset = rhsAssignmentsOffset + retRHS._3(); bd, bn, 1, d, n, 1, xtIndexer, yIndexer, outputOffsetLHS, constraintOffsetLHS); R1CSConstraintsRDD<FieldT> LHSConstraints = retLHS._1(); JavaPairRDD<Long, FieldT> LHSAssignments = retLHS._2(); long numLHSAssignments = retLHS._3(); long numLHSConstraints = LHSConstraints.size(); return new Tuple3<R1CSRelationRDD<FieldT>, Assignment<FieldT>, JavaPairRDD<Long, FieldT>>(r1cs, primary, oneFullAssignment);