.projectFirst(0).projectSecond(0);
ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag) .where(0).equalTo(0) .projectFirst(1).projectSecond(1); .joinWithTiny(nations) .where(3).equalTo(0) .projectFirst(0, 1, 2).projectSecond(1).projectFirst(4); customerWithNation.join(revenueByCustomer) .where(0).equalTo(0) .projectFirst(0, 1, 2, 3, 4).projectSecond(1);
ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag) .where(0).equalTo(0) .projectFirst(1).projectSecond(1); .joinWithTiny(nations) .where(3).equalTo(0) .projectFirst(0,1,2).projectSecond(1).projectFirst(4); customerWithNation.join(revenueByCustomer) .where(0).equalTo(0) .projectFirst(0,1,2,3,4).projectSecond(1);
ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag) .where(0).equalTo(0) .projectFirst(1).projectSecond(1); .joinWithTiny(nations) .where(3).equalTo(0) .projectFirst(0, 1, 2).projectSecond(1).projectFirst(4); customerWithNation.join(revenueByCustomer) .where(0).equalTo(0) .projectFirst(0, 1, 2, 3, 4).projectSecond(1);
.equalTo(0) .projectFirst(0, 1) .<Tuple3<K, K, FloatValue>>projectSecond(2) .setParallelism(parallelism) .name("Edge score");
ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag) .where(0).equalTo(0) .projectFirst(1).projectSecond(1); .joinWithTiny(nations) .where(3).equalTo(0) .projectFirst(0,1,2).projectSecond(1).projectFirst(4); customerWithNation.join(revenueByCustomer) .where(0).equalTo(0) .projectFirst(0,1,2,3,4).projectSecond(1);
.equalTo(0) .projectFirst(0, 1) .<Tuple3<K, K, FloatValue>>projectSecond(2) .setParallelism(parallelism) .name("Edge score");
ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag) .where(0).equalTo(0) .projectFirst(1).projectSecond(1); .joinWithTiny(nations) .where(3).equalTo(0) .projectFirst(0, 1, 2).projectSecond(1).projectFirst(4); customerWithNation.join(revenueByCustomer) .where(0).equalTo(0) .projectFirst(0, 1, 2, 3, 4).projectSecond(1);
.equalTo(0) .projectFirst(0, 1) .<Tuple3<K, K, FloatValue>>projectSecond(2) .setParallelism(littleParallelism) .name("Edge score");
public static void main(String[] args) throws Exception { // parse parameters ParameterTool params = ParameterTool.fromArgs(args); String input = params.getRequired("input"); // obtain an execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read messageId, sender, and reply-to fields from the input data set DataSet<Tuple3<String, String, String>> mails = env.readCsvFile(input) .lineDelimiter(MBoxParser.MAIL_RECORD_DELIM) .fieldDelimiter(MBoxParser.MAIL_FIELD_DELIM) // messageId at position 0, sender at 2, reply-to at 5 .includeFields("101001") .types(String.class, String.class, String.class); // extract email addresses and filter out mails from bots DataSet<Tuple3<String, String, String>> addressMails = mails .map(new EmailExtractor()) .filter(new ExcludeEmailFilter("git@git.apache.org")) .filter(new ExcludeEmailFilter("jira@apache.org")); // construct reply connections by joining on messageId and reply-To DataSet<Tuple2<String, String>> replyConnections = addressMails .join(addressMails).where(2).equalTo(0).projectFirst(1).projectSecond(1); // count reply connections for each pair of email addresses replyConnections .groupBy(0, 1).reduceGroup(new ConnectionCounter()) .print(); }
private void createJoinOperation(DatasizeHint mode, INFO info) { DataSet op1 = (DataSet) sets.get(info.parentID); DataSet op2 = (DataSet) sets.get(info.otherID); if (info.types != null && (info.projections == null || info.projections.length == 0)) { sets.put(info.setID, applyJoinOperation(op1, op2, info.keys1, info.keys2, mode, info)); } else { DefaultJoin defaultResult = createDefaultJoin(op1, op2, info.keys1, info.keys2, mode); if (info.projections.length == 0) { sets.put(info.setID, defaultResult.name("DefaultJoin")); } else { ProjectJoin project = null; for (ProjectionEntry pe : info.projections) { switch (pe.side) { case FIRST: project = project == null ? defaultResult.projectFirst(pe.keys) : project.projectFirst(pe.keys); break; case SECOND: project = project == null ? defaultResult.projectSecond(pe.keys) : project.projectSecond(pe.keys); break; } } sets.put(info.setID, project.name("ProjectJoin")); } } }
private DataSet<Tuple5<KT, KB, EV, VVT, VVB>> joinEdgeWithVertices() { return edges .join(topVertices, JoinHint.REPARTITION_HASH_SECOND) .where(0) .equalTo(0) .projectFirst(0, 1, 2) .<Tuple4<KT, KB, EV, VVT>>projectSecond(1) .name("Edge with vertex") .join(bottomVertices, JoinHint.REPARTITION_HASH_SECOND) .where(1) .equalTo(0) .projectFirst(0, 1, 2, 3) .<Tuple5<KT, KB, EV, VVT, VVB>>projectSecond(1) .name("Edge with vertices"); }
private DataSet<Tuple5<KT, KB, EV, VVT, VVB>> joinEdgeWithVertices() { return edges .join(topVertices, JoinHint.REPARTITION_HASH_SECOND) .where(0) .equalTo(0) .projectFirst(0, 1, 2) .<Tuple4<KT, KB, EV, VVT>>projectSecond(1) .name("Edge with vertex") .join(bottomVertices, JoinHint.REPARTITION_HASH_SECOND) .where(1) .equalTo(0) .projectFirst(0, 1, 2, 3) .<Tuple5<KT, KB, EV, VVT, VVB>>projectSecond(1) .name("Edge with vertices"); }
private DataSet<Tuple5<KT, KB, EV, VVT, VVB>> joinEdgeWithVertices() { return edges .join(topVertices, JoinHint.REPARTITION_HASH_SECOND) .where(0) .equalTo(0) .projectFirst(0, 1, 2) .<Tuple4<KT, KB, EV, VVT>>projectSecond(1) .name("Edge with vertex") .join(bottomVertices, JoinHint.REPARTITION_HASH_SECOND) .where(1) .equalTo(0) .projectFirst(0, 1, 2, 3) .<Tuple5<KT, KB, EV, VVT, VVB>>projectSecond(1) .name("Edge with vertices"); }
/** * Projects a pair of joined elements to a {@link Tuple} with the previously selected fields. * Requires the classes of the fields of the resulting tuples. * * @return The projected data set. * * @see Tuple * @see DataSet */ public <T0, T1, T2, T3, T4, T5, T6, T7> ProjectJoin<I1, I2, Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>> projectTuple8() { TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes); TupleTypeInfo<Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>> tType = new TupleTypeInfo<Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>>(fTypes); return new ProjectJoin<I1, I2, Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>>(this.ds1, this.ds2, this.keys1, this.keys2, this.hint, this.fieldIndexes, this.isFieldInFirst, tType, this); }
/** * Projects a pair of joined elements to a {@link Tuple} with the previously selected fields. * Requires the classes of the fields of the resulting tuples. * * @return The projected data set. * * @see Tuple * @see DataSet */ public <T0, T1> ProjectJoin<I1, I2, Tuple2<T0, T1>> projectTuple2() { TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes); TupleTypeInfo<Tuple2<T0, T1>> tType = new TupleTypeInfo<Tuple2<T0, T1>>(fTypes); return new ProjectJoin<I1, I2, Tuple2<T0, T1>>(this.ds1, this.ds2, this.keys1, this.keys2, this.hint, this.fieldIndexes, this.isFieldInFirst, tType, this); }
/** * Projects a pair of joined elements to a {@link Tuple} with the previously selected fields. * Requires the classes of the fields of the resulting tuples. * * @return The projected data set. * * @see Tuple * @see DataSet */ public <T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> ProjectJoin<I1, I2, Tuple25<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24>> projectTuple25() { TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes); TupleTypeInfo<Tuple25<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24>> tType = new TupleTypeInfo<Tuple25<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24>>(fTypes); return new ProjectJoin<I1, I2, Tuple25<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24>>(this.ds1, this.ds2, this.keys1, this.keys2, this.hint, this.fieldIndexes, this.isFieldInFirst, tType, this); }
/** * Projects a pair of joined elements to a {@link Tuple} with the previously selected fields. * Requires the classes of the fields of the resulting tuples. * * @return The projected data set. * * @see Tuple * @see DataSet */ public <T0, T1, T2, T3, T4, T5, T6, T7> ProjectJoin<I1, I2, Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>> projectTuple8() { TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes); TupleTypeInfo<Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>> tType = new TupleTypeInfo<Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>>(fTypes); return new ProjectJoin<I1, I2, Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>>(this.ds1, this.ds2, this.keys1, this.keys2, this.hint, this.fieldIndexes, this.isFieldInFirst, tType, this); }
/** * Projects a pair of joined elements to a {@link Tuple} with the previously selected fields. * Requires the classes of the fields of the resulting tuples. * * @return The projected data set. * * @see Tuple * @see DataSet */ public <T0, T1, T2, T3, T4, T5, T6> ProjectJoin<I1, I2, Tuple7<T0, T1, T2, T3, T4, T5, T6>> projectTuple7() { TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes); TupleTypeInfo<Tuple7<T0, T1, T2, T3, T4, T5, T6>> tType = new TupleTypeInfo<Tuple7<T0, T1, T2, T3, T4, T5, T6>>(fTypes); return new ProjectJoin<I1, I2, Tuple7<T0, T1, T2, T3, T4, T5, T6>>(this.ds1, this.ds2, this.keys1, this.keys2, this.hint, this.fieldIndexes, this.isFieldInFirst, tType, this); }
/** * Projects a pair of joined elements to a {@link Tuple} with the previously selected fields. * Requires the classes of the fields of the resulting tuples. * * @return The projected data set. * * @see Tuple * @see DataSet */ public <T0, T1> ProjectJoin<I1, I2, Tuple2<T0, T1>> projectTuple2() { TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes); TupleTypeInfo<Tuple2<T0, T1>> tType = new TupleTypeInfo<Tuple2<T0, T1>>(fTypes); return new ProjectJoin<I1, I2, Tuple2<T0, T1>>(this.ds1, this.ds2, this.keys1, this.keys2, this.hint, this.fieldIndexes, this.isFieldInFirst, tType, this); }