public void sinkCleanup(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException { super.sinkCleanup(flowProcess, sinkCall); sinkCall.setContext(null); }
@Override public void sinkPrepare(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException { super.sinkPrepare(flowProcess, sinkCall); Object[] context = new Object[SINK_CTX_SIZE]; // the tuple is fixed, so we can just use a collection/index Settings settings = loadSettings(flowProcess.getConfigCopy(), false); context[SINK_CTX_ALIASES] = CascadingUtils.fieldToAlias(settings, getSinkFields()); sinkCall.setContext(context); }
@Override public void sourceCleanup(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) throws IOException { super.sourceCleanup(flowProcess, sourceCall); sourceCall.setContext(null); }
public int getNumSinkParts() { return scheme.getNumSinkParts(); }
@Override public boolean equals(Object object) { if (this == object) { return true; } if (object == null || getClass() != object.getClass()) { return false; } if (!super.equals(object)) { return false; } HBaseScheme that = (HBaseScheme) object; if (!Arrays.equals(familyNames, that.familyNames)) { return false; } if (keyField != null ? !keyField.equals(that.keyField) : that.keyField != null) { return false; } if (!Arrays.equals(valueFields, that.valueFields)) { return false; } return true; }
private void setFields( Scheme scheme ) { if( scheme.isSource() ) { Fields sourceFields = normalize( scheme.getSourceFields() ); if( fields == null ) fields = sourceFields; else if( !fields.equals( sourceFields ) ) throw new IllegalArgumentException( "all schemes added to stereotype must have the same source fields, expected: " + fields + ", received: " + sourceFields + " in stereotype: " + getName() ); } if( scheme.isSink() ) { Fields sinkFields = normalize( scheme.getSinkFields() ); if( fields == null ) fields = sinkFields; else if( !fields.equals( sinkFields ) ) throw new IllegalArgumentException( "all schemes added to stereotype must have the same sink fields, expected: " + fields + ", received: " + sinkFields + " in stereotype: " + getName() ); } }
public LocalScheme(Scheme<JobConf, RecordReader, OutputCollector, SourceContext, SinkContext> scheme) { super(scheme.getSourceFields(), scheme.getSinkFields()); this.scheme = scheme; }
protected void verifyCheckpoints( FlowDef flowDef, Pipe[] flowTails ) { verifyNotSourcesSinks( flowDef.getCheckpoints(), flowDef.getSources(), flowDef.getSinks(), "checkpoint" ); for( Tap checkpointTap : flowDef.getCheckpoints().values() ) { Scheme scheme = checkpointTap.getScheme(); if( scheme.getSourceFields().equals( Fields.UNKNOWN ) && scheme.getSinkFields().equals( Fields.ALL ) ) continue; throw new PlannerException( "checkpoint tap scheme must be undeclared, source fields must be UNKNOWN, and sink fields ALL, got: " + scheme.toString() ); } Set<String> names = new HashSet<String>( asList( Pipe.names( flowTails ) ) ); for( String name : flowDef.getCheckpoints().keySet() ) { if( !names.contains( name ) ) throw new PlannerException( "named checkpoint declared in FlowDef, but no named branch found in pipe assembly: '" + name + "'" ); Set<Pipe> pipes = new HashSet<Pipe>( asList( Pipe.named( name, flowTails ) ) ); int count = 0; for( Pipe pipe : pipes ) { if( pipe instanceof Checkpoint ) count++; } if( count == 0 ) throw new PlannerException( "no checkpoint pipe with branch name found in pipe assembly: '" + name + "'" ); if( count > 1 ) throw new PlannerException( "more than one checkpoint pipe with branch name found in pipe assembly: '" + name + "'" ); } }
public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Tap tap, Scheme scheme, CloseableIterator<Input> inputIterator, Supplier<String> loggableIdentifier ) super( scheme.getSourceFields() ); this.flowProcess = flowProcess; this.scheme = scheme; this.scheme.sourcePrepare( flowProcess, sourceCall );
@Override public int hashCode() { int result = super.hashCode(); result = 31 * result + (keyField != null ? keyField.hashCode() : 0); result = 31 * result + (familyNames != null ? Arrays.hashCode(familyNames) : 0); result = 31 * result + (valueFields != null ? Arrays.hashCode(valueFields) : 0); return result; } }
/** * Method retrieveSinkFields notifies a Scheme when it is appropriate to dynamically * update the fields it sources. By default the current declared fields are returned. * <p> * The {@code FlowProcess} presents all known properties resolved by the current planner. * <p> * The {@code tap} instance is the parent {@link Tap} for this Scheme instance. * * @param flowProcess of type FlowProcess * @param tap of type Tap * @return Fields */ public Fields retrieveSinkFields( FlowProcess<? extends Config> flowProcess, Tap tap ) { return getSinkFields(); }
protected void presentSourceFieldsInternal( Fields fields ) { if( getSourceFields().equals( Fields.UNKNOWN ) ) setSourceFields( fields ); }
protected void presentSinkFieldsInternal( Fields fields ) { if( getSinkFields().equals( Fields.ALL ) ) setSinkFields( fields ); }
/** * Method retrieveSourceFields notifies a Scheme when it is appropriate to dynamically * update the fields it sources. By default the current declared fields are returned. * <p> * The {@code FlowProcess} presents all known properties resolved by the current planner. * <p> * The {@code tap} instance is the parent {@link Tap} for this Scheme instance. * * @param flowProcess of type FlowProcess * @param tap of type Tap * @return Fields */ public Fields retrieveSourceFields( FlowProcess<? extends Config> flowProcess, Tap tap ) { return getSourceFields(); }
@Override public void presentSinkFields(FlowProcess<Properties> flowProcess, Tap tap, Fields fields) { scheme.presentSinkFields(new HadoopFlowProcess(defaults), lfs, fields); }
public void setNumSinkParts( int numSinkParts ) { scheme.setNumSinkParts( numSinkParts ); }
protected int getParallelism( FlowNode flowNode, JobConf conf ) { // only count streamed taps, accumulated taps are always annotated HashSet<Tap> sourceStreamedTaps = new HashSet<>( flowNode.getSourceTaps() ); sourceStreamedTaps.removeAll( flowNode.getSourceElements( StreamMode.Accumulated ) ); if( sourceStreamedTaps.size() != 0 ) return -1; int parallelism = Integer.MAX_VALUE; for( Tap tap : flowNode.getSinkTaps() ) { int numSinkParts = tap.getScheme().getNumSinkParts(); if( numSinkParts == 0 ) continue; if( parallelism != Integer.MAX_VALUE ) LOG.info( "multiple sink taps in flow node declaring numSinkParts, choosing lowest value. see cascading.flow.FlowRuntimeProps for broader control." ); parallelism = Math.min( parallelism, numSinkParts ); } if( parallelism != Integer.MAX_VALUE ) return parallelism; return conf.getInt( FlowRuntimeProps.GATHER_PARTITIONS, 0 ); }
private void verifyTaps() { Tap tap = taps[ 0 ]; for( int i = 1; i < taps.length; i++ ) { if( tap.getClass() != taps[ i ].getClass() ) throw new TapException( "all taps must be of the same type" ); if( !tap.getScheme().equals( taps[ i ].getScheme() ) ) throw new TapException( "all tap schemes must be equivalent" ); } }
/** * Method isSymmetrical returns {@code true} if the sink fields equal the source fields. That is, this * scheme sources the same fields as it sinks. * * @return the symmetrical (type boolean) of this Scheme object. */ public boolean isSymmetrical() { return getSourceFields().equals( Fields.UNKNOWN ) && getSinkFields().equals( Fields.ALL ) || getSinkFields().equals( getSourceFields() ); }
@Override public int hashCode() { int result = super.hashCode(); result = 31 * result + ( inputFormatClass != null ? inputFormatClass.hashCode() : 0 ); result = 31 * result + ( outputFormatClass != null ? outputFormatClass.hashCode() : 0 ); result = 31 * result + ( columns != null ? Arrays.hashCode( columns ) : 0 ); result = 31 * result + ( orderBy != null ? Arrays.hashCode( orderBy ) : 0 ); result = 31 * result + ( conditions != null ? conditions.hashCode() : 0 ); result = 31 * result + ( updateBy != null ? Arrays.hashCode( updateBy ) : 0 ); result = 31 * result + ( updateValueFields != null ? updateValueFields.hashCode() : 0 ); result = 31 * result + ( updateByFields != null ? updateByFields.hashCode() : 0 ); result = 31 * result + ( columnFields != null ? columnFields.hashCode() : 0 ); result = 31 * result + ( updateIfTuple != null ? updateIfTuple.hashCode() : 0 ); result = 31 * result + ( selectQuery != null ? selectQuery.hashCode() : 0 ); result = 31 * result + ( countQuery != null ? countQuery.hashCode() : 0 ); result = 31 * result + (int) ( limit ^ ( limit >>> 32 ) ); return result; } }