private void outputDimRangeInfo(List<Tuple2<String, Tuple3<Writable, Writable, String>>> result) { if (col != null && minValue != null) { // output written to baseDir/colName/colName.dci-r-00000 (etc) String dimRangeFileName = col.getIdentity() + "/" + col.getName() + DIMENSION_COL_INFO_FILE_POSTFIX; result.add(new Tuple2<String, Tuple3<Writable, Writable, String>>(BatchConstants.CFG_OUTPUT_PARTITION, new Tuple3<Writable, Writable, String>(NullWritable.get(), new Text(minValue.getBytes(StandardCharsets.UTF_8)), dimRangeFileName))); result.add(new Tuple2<String, Tuple3<Writable, Writable, String>>(BatchConstants.CFG_OUTPUT_PARTITION, new Tuple3<Writable, Writable, String>(NullWritable.get(), new Text(maxValue.getBytes(StandardCharsets.UTF_8)), dimRangeFileName))); logger.info("write dimension range info for col : {} minValue:{} maxValue:{}", col.getName(), minValue, maxValue); } }
private void outputDict(TblColRef col, Dictionary<String> dict, List<Tuple2<String, Tuple3<Writable, Writable, String>>> result) throws IOException { // output written to baseDir/colName/colName.rldict-r-00000 (etc) String dictFileName = col.getIdentity() + "/" + col.getName() + DICT_FILE_POSTFIX; try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream outputStream = new DataOutputStream(baos)) { outputStream.writeUTF(dict.getClass().getName()); dict.write(outputStream); result.add(new Tuple2<String, Tuple3<Writable, Writable, String>>(BatchConstants.CFG_OUTPUT_DICT, new Tuple3<Writable, Writable, String>(NullWritable.get(), new ArrayPrimitiveWritable(baos.toByteArray()), dictFileName))); } }
new Tuple3<Writable, Writable, String>(new LongWritable(-1), new BytesWritable(Bytes.toBytes(mapperOverlapRatio)), statisticsFileName))); new Tuple3<Writable, Writable, String>(new LongWritable(-2), new BytesWritable(Bytes.toBytes(baseCuboidRowCountInMappers.size())), statisticsFileName))); new Tuple3<Writable, Writable, String>(new LongWritable(0L), new BytesWritable(Bytes.toBytes(samplingPercent)), statisticsFileName))); new Tuple3<Writable, Writable, String>(new LongWritable(i), new BytesWritable(valueCopy, valueCopy.length), statisticsFileName)));
BatchConstants.CFG_OUTPUT_COLUMN, new Tuple3<Writable, Writable, String>( NullWritable.get(), new Text(keyBytes), fileName)));
public static Tuple3<String, String, String> createKey(String[] tokens) { String userID = tokens[1]; if (userID.equals("-")) { // undefined user return new Tuple3<String, String, String>(null, null, null); } else { String ipAddress = tokens[0]; // String userID = tokens[1]; String query = tokens[3]; return new Tuple3<String, String, String>(ipAddress, userID, query); } }
List<String> t2List = new ArrayList<String>(t2._1); t2List.removeAll(fromList); result.add(new Tuple3(fromList, t2List, confidence));
long[] aTraingle = {key._1, key._2, node}; Arrays.sort(aTraingle); Tuple3<Long,Long,Long> t3 = new Tuple3<Long,Long,Long>(aTraingle[0], aTraingle[1], aTraingle[2]);
private T createTuple(Object[] data) { switch(subreaders.length) { case 2: return (T)new Tuple2(data[0], data[1]); case 3: return (T)new Tuple3(data[0], data[1], data[2]); case 4: return (T)new Tuple4(data[0], data[1], data[2], data[3]); case 5: return (T)new Tuple5(data[0], data[1], data[2], data[3], data[4]); case 8: return (T)new Tuple8(data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]); default: throw new IllegalArgumentException("Creating a tuple with a SQLReader with unknown size " + subreaders.length); } }
@Override public Tuple3<A, B, C> read(final Kryo kryo, final Input input, final Class<Tuple3<A, B, C>> clazz) { return new Tuple3(kryo.readClassAndObject(input), kryo.readClassAndObject(input), kryo.readClassAndObject(input)); } }
long[] aTraingle = {key._1, key._2, node}; Arrays.sort(aTraingle); Tuple3<Long,Long,Long> t3 = new Tuple3<Long,Long,Long>(aTraingle[0], aTraingle[1], aTraingle[2]);
List<String> t2List = new ArrayList<String>(t2._1); t2List.removeAll(fromList); result.add(new Tuple3(fromList, t2List, confidence));
private void checkAndSet() { int newRemaining = remaining.decrementAndGet(); checkState(newRemaining >= 0, "Less than 0 remaining futures"); if (newRemaining == 0) { if (!shortCircuit.get() && !isDone()) { checkNotNull(firstValue, "First value is null on completion"); checkNotNull(secondValue, "Second value is null on completion"); checkNotNull(thirdValue, "Third value is null on completion"); set(new Right<GeneralError, Tuple3<T, U, V>>(new Tuple3<>(firstValue, secondValue, thirdValue))); } } }
private static List<Tuple3<Double, Double, Double>> generateIsotonicInput(double[] labels) { List<Tuple3<Double, Double, Double>> input = new ArrayList<>(labels.length); for (int i = 1; i <= labels.length; i++) { input.add(new Tuple3<>(labels[i - 1], (double) i, 1.0)); } return input; }
private static List<Tuple3<Double, Double, Double>> generateIsotonicInput(double[] labels) { List<Tuple3<Double, Double, Double>> input = new ArrayList<>(labels.length); for (int i = 1; i <= labels.length; i++) { input.add(new Tuple3<>(labels[i - 1], (double) i, 1.0)); } return input; }
@Test public void testTupleEncoder() { Encoder<Tuple2<Integer, String>> encoder2 = Encoders.tuple(Encoders.INT(), Encoders.STRING()); List<Tuple2<Integer, String>> data2 = Arrays.asList(tuple2(1, "a"), tuple2(2, "b")); Dataset<Tuple2<Integer, String>> ds2 = spark.createDataset(data2, encoder2); Assert.assertEquals(data2, ds2.collectAsList()); Encoder<Tuple3<Integer, Long, String>> encoder3 = Encoders.tuple(Encoders.INT(), Encoders.LONG(), Encoders.STRING()); List<Tuple3<Integer, Long, String>> data3 = Arrays.asList(new Tuple3<>(1, 2L, "a")); Dataset<Tuple3<Integer, Long, String>> ds3 = spark.createDataset(data3, encoder3); Assert.assertEquals(data3, ds3.collectAsList()); Encoder<Tuple4<Integer, String, Long, String>> encoder4 = Encoders.tuple(Encoders.INT(), Encoders.STRING(), Encoders.LONG(), Encoders.STRING()); List<Tuple4<Integer, String, Long, String>> data4 = Arrays.asList(new Tuple4<>(1, "b", 2L, "a")); Dataset<Tuple4<Integer, String, Long, String>> ds4 = spark.createDataset(data4, encoder4); Assert.assertEquals(data4, ds4.collectAsList()); Encoder<Tuple5<Integer, String, Long, String, Boolean>> encoder5 = Encoders.tuple(Encoders.INT(), Encoders.STRING(), Encoders.LONG(), Encoders.STRING(), Encoders.BOOLEAN()); List<Tuple5<Integer, String, Long, String, Boolean>> data5 = Arrays.asList(new Tuple5<>(1, "b", 2L, "a", true)); Dataset<Tuple5<Integer, String, Long, String, Boolean>> ds5 = spark.createDataset(data5, encoder5); Assert.assertEquals(data5, ds5.collectAsList()); }
@Test public void testTupleEncoder() { Encoder<Tuple2<Integer, String>> encoder2 = Encoders.tuple(Encoders.INT(), Encoders.STRING()); List<Tuple2<Integer, String>> data2 = Arrays.asList(tuple2(1, "a"), tuple2(2, "b")); Dataset<Tuple2<Integer, String>> ds2 = spark.createDataset(data2, encoder2); Assert.assertEquals(data2, ds2.collectAsList()); Encoder<Tuple3<Integer, Long, String>> encoder3 = Encoders.tuple(Encoders.INT(), Encoders.LONG(), Encoders.STRING()); List<Tuple3<Integer, Long, String>> data3 = Arrays.asList(new Tuple3<>(1, 2L, "a")); Dataset<Tuple3<Integer, Long, String>> ds3 = spark.createDataset(data3, encoder3); Assert.assertEquals(data3, ds3.collectAsList()); Encoder<Tuple4<Integer, String, Long, String>> encoder4 = Encoders.tuple(Encoders.INT(), Encoders.STRING(), Encoders.LONG(), Encoders.STRING()); List<Tuple4<Integer, String, Long, String>> data4 = Arrays.asList(new Tuple4<>(1, "b", 2L, "a")); Dataset<Tuple4<Integer, String, Long, String>> ds4 = spark.createDataset(data4, encoder4); Assert.assertEquals(data4, ds4.collectAsList()); Encoder<Tuple5<Integer, String, Long, String, Boolean>> encoder5 = Encoders.tuple(Encoders.INT(), Encoders.STRING(), Encoders.LONG(), Encoders.STRING(), Encoders.BOOLEAN()); List<Tuple5<Integer, String, Long, String, Boolean>> data5 = Arrays.asList(new Tuple5<>(1, "b", 2L, "a", true)); Dataset<Tuple5<Integer, String, Long, String, Boolean>> ds5 = spark.createDataset(data5, encoder5); Assert.assertEquals(data5, ds5.collectAsList()); }
@Test public void testTupleEncoder() { Encoder<Tuple2<Integer, String>> encoder2 = Encoders.tuple(Encoders.INT(), Encoders.STRING()); List<Tuple2<Integer, String>> data2 = Arrays.asList(tuple2(1, "a"), tuple2(2, "b")); Dataset<Tuple2<Integer, String>> ds2 = spark.createDataset(data2, encoder2); Assert.assertEquals(data2, ds2.collectAsList()); Encoder<Tuple3<Integer, Long, String>> encoder3 = Encoders.tuple(Encoders.INT(), Encoders.LONG(), Encoders.STRING()); List<Tuple3<Integer, Long, String>> data3 = Arrays.asList(new Tuple3<>(1, 2L, "a")); Dataset<Tuple3<Integer, Long, String>> ds3 = spark.createDataset(data3, encoder3); Assert.assertEquals(data3, ds3.collectAsList()); Encoder<Tuple4<Integer, String, Long, String>> encoder4 = Encoders.tuple(Encoders.INT(), Encoders.STRING(), Encoders.LONG(), Encoders.STRING()); List<Tuple4<Integer, String, Long, String>> data4 = Arrays.asList(new Tuple4<>(1, "b", 2L, "a")); Dataset<Tuple4<Integer, String, Long, String>> ds4 = spark.createDataset(data4, encoder4); Assert.assertEquals(data4, ds4.collectAsList()); Encoder<Tuple5<Integer, String, Long, String, Boolean>> encoder5 = Encoders.tuple(Encoders.INT(), Encoders.STRING(), Encoders.LONG(), Encoders.STRING(), Encoders.BOOLEAN()); List<Tuple5<Integer, String, Long, String, Boolean>> data5 = Arrays.asList(new Tuple5<>(1, "b", 2L, "a", true)); Dataset<Tuple5<Integer, String, Long, String, Boolean>> ds5 = spark.createDataset(data5, encoder5); Assert.assertEquals(data5, ds5.collectAsList()); }
@Test public void testNestedTupleEncoder() { // test ((int, string), string) Encoder<Tuple2<Tuple2<Integer, String>, String>> encoder = Encoders.tuple(Encoders.tuple(Encoders.INT(), Encoders.STRING()), Encoders.STRING()); List<Tuple2<Tuple2<Integer, String>, String>> data = Arrays.asList(tuple2(tuple2(1, "a"), "a"), tuple2(tuple2(2, "b"), "b")); Dataset<Tuple2<Tuple2<Integer, String>, String>> ds = spark.createDataset(data, encoder); Assert.assertEquals(data, ds.collectAsList()); // test (int, (string, string, long)) Encoder<Tuple2<Integer, Tuple3<String, String, Long>>> encoder2 = Encoders.tuple(Encoders.INT(), Encoders.tuple(Encoders.STRING(), Encoders.STRING(), Encoders.LONG())); List<Tuple2<Integer, Tuple3<String, String, Long>>> data2 = Arrays.asList(tuple2(1, new Tuple3<>("a", "b", 3L))); Dataset<Tuple2<Integer, Tuple3<String, String, Long>>> ds2 = spark.createDataset(data2, encoder2); Assert.assertEquals(data2, ds2.collectAsList()); // test (int, ((string, long), string)) Encoder<Tuple2<Integer, Tuple2<Tuple2<String, Long>, String>>> encoder3 = Encoders.tuple(Encoders.INT(), Encoders.tuple(Encoders.tuple(Encoders.STRING(), Encoders.LONG()), Encoders.STRING())); List<Tuple2<Integer, Tuple2<Tuple2<String, Long>, String>>> data3 = Arrays.asList(tuple2(1, tuple2(tuple2("a", 2L), "b"))); Dataset<Tuple2<Integer, Tuple2<Tuple2<String, Long>, String>>> ds3 = spark.createDataset(data3, encoder3); Assert.assertEquals(data3, ds3.collectAsList()); }
@Test public void testNestedTupleEncoder() { // test ((int, string), string) Encoder<Tuple2<Tuple2<Integer, String>, String>> encoder = Encoders.tuple(Encoders.tuple(Encoders.INT(), Encoders.STRING()), Encoders.STRING()); List<Tuple2<Tuple2<Integer, String>, String>> data = Arrays.asList(tuple2(tuple2(1, "a"), "a"), tuple2(tuple2(2, "b"), "b")); Dataset<Tuple2<Tuple2<Integer, String>, String>> ds = spark.createDataset(data, encoder); Assert.assertEquals(data, ds.collectAsList()); // test (int, (string, string, long)) Encoder<Tuple2<Integer, Tuple3<String, String, Long>>> encoder2 = Encoders.tuple(Encoders.INT(), Encoders.tuple(Encoders.STRING(), Encoders.STRING(), Encoders.LONG())); List<Tuple2<Integer, Tuple3<String, String, Long>>> data2 = Arrays.asList(tuple2(1, new Tuple3<>("a", "b", 3L))); Dataset<Tuple2<Integer, Tuple3<String, String, Long>>> ds2 = spark.createDataset(data2, encoder2); Assert.assertEquals(data2, ds2.collectAsList()); // test (int, ((string, long), string)) Encoder<Tuple2<Integer, Tuple2<Tuple2<String, Long>, String>>> encoder3 = Encoders.tuple(Encoders.INT(), Encoders.tuple(Encoders.tuple(Encoders.STRING(), Encoders.LONG()), Encoders.STRING())); List<Tuple2<Integer, Tuple2<Tuple2<String, Long>, String>>> data3 = Arrays.asList(tuple2(1, tuple2(tuple2("a", 2L), "b"))); Dataset<Tuple2<Integer, Tuple2<Tuple2<String, Long>, String>>> ds3 = spark.createDataset(data3, encoder3); Assert.assertEquals(data3, ds3.collectAsList()); }
@Test public void testNestedTupleEncoder() { // test ((int, string), string) Encoder<Tuple2<Tuple2<Integer, String>, String>> encoder = Encoders.tuple(Encoders.tuple(Encoders.INT(), Encoders.STRING()), Encoders.STRING()); List<Tuple2<Tuple2<Integer, String>, String>> data = Arrays.asList(tuple2(tuple2(1, "a"), "a"), tuple2(tuple2(2, "b"), "b")); Dataset<Tuple2<Tuple2<Integer, String>, String>> ds = spark.createDataset(data, encoder); Assert.assertEquals(data, ds.collectAsList()); // test (int, (string, string, long)) Encoder<Tuple2<Integer, Tuple3<String, String, Long>>> encoder2 = Encoders.tuple(Encoders.INT(), Encoders.tuple(Encoders.STRING(), Encoders.STRING(), Encoders.LONG())); List<Tuple2<Integer, Tuple3<String, String, Long>>> data2 = Arrays.asList(tuple2(1, new Tuple3<>("a", "b", 3L))); Dataset<Tuple2<Integer, Tuple3<String, String, Long>>> ds2 = spark.createDataset(data2, encoder2); Assert.assertEquals(data2, ds2.collectAsList()); // test (int, ((string, long), string)) Encoder<Tuple2<Integer, Tuple2<Tuple2<String, Long>, String>>> encoder3 = Encoders.tuple(Encoders.INT(), Encoders.tuple(Encoders.tuple(Encoders.STRING(), Encoders.LONG()), Encoders.STRING())); List<Tuple2<Integer, Tuple2<Tuple2<String, Long>, String>>> data3 = Arrays.asList(tuple2(1, tuple2(tuple2("a", 2L), "b"))); Dataset<Tuple2<Integer, Tuple2<Tuple2<String, Long>, String>>> ds3 = spark.createDataset(data3, encoder3); Assert.assertEquals(data3, ds3.collectAsList()); }