public static String writeStateToDistCache( JobConf conf, String id, String kind, String stepState ) { if( Util.isEmpty( stepState ) ) return null; LOG.info( "writing step state to dist cache, too large for job conf, size: {}", stepState.length() ); String statePath = Hfs.getTempPath( conf ) + "/" + kind + "-state-" + id; Hfs temp = new Hfs( new TextLine(), statePath, SinkMode.REPLACE ); try { TupleEntryCollector writer = temp.openForWrite( new HadoopFlowProcess( conf ) ); writer.add( new Tuple( stepState ) ); writer.close(); } catch( IOException exception ) { throw new FlowException( "unable to write step state to Hadoop FS: " + temp.getIdentifier() ); } URI uri = new Path( statePath ).toUri(); DistributedCache.addCacheFile( uri, conf ); LOG.info( "using step state path: {}", uri ); return statePath; }
@Override public void cleanup(FlowProcess flowProcess, OperationCall operationCall) { JobConf conf = (JobConf) flowProcess.getConfigCopy(); try { LOG.info("HLL counter found " + approxCounter.cardinality() + " distinct keys"); Hfs tap = new Hfs(new SequenceFile(new Fields("bytes")), BloomProps.getApproxCountsDir(conf)); TupleEntryCollector out = tap.openForWrite(new HadoopFlowProcess(conf)); out.add(new Tuple(new BytesWritable(approxCounter.getBytes()))); out.close(); } catch (IOException e) { throw new RuntimeException("couldn't write approximate counts to side bucket", e); } }
public static String writeStateToDistCache( JobConf conf, String id, String kind, String stepState ) { if( Util.isEmpty( stepState ) ) return null; LOG.info( "writing step state to dist cache, too large for job conf, size: {}", stepState.length() ); String statePath = Hfs.getTempPath( conf ) + "/" + kind + "-state-" + id; Hfs temp = new Hfs( new TextLine(), statePath, SinkMode.REPLACE ); try { TupleEntryCollector writer = temp.openForWrite( new HadoopFlowProcess( conf ) ); writer.add( new Tuple( stepState ) ); writer.close(); } catch( IOException exception ) { throw new FlowException( "unable to write step state to Hadoop FS: " + temp.getIdentifier() ); } URI uri = new Path( statePath ).toUri(); DistributedCache.addCacheFile( uri, conf ); LOG.info( "using step state path: {}", uri ); return statePath; }
@Override public void prepare(FlowProcess flowProcess, OperationCall<CreateBloomFilterFromIndices.Context> operationCall) { try { JobConf conf = (JobConf) flowProcess.getConfigCopy(); String partsRoot = BloomProps.getBloomFilterPartsDir(conf); maxHashes = BloomProps.getMaxBloomHashes(conf); minHashes = BloomProps.getMinBloomHashes(conf); for (int i = minHashes; i <= maxHashes; i++) { Hfs tap = new Hfs(new SequenceFile(new Fields("split", "filter")), partsRoot + "/" + i + "/"); numHashesToCollector.put(i, tap.openForWrite(new HadoopFlowProcess(conf))); } } catch (IOException e) { throw new RuntimeException(e); } }
private void writeFileTo( String path ) throws IOException { Hfs tap = new Hfs( new TextLine( new Fields( "offset", "line" ) ), getOutputPath( path ) ); TupleEntryCollector collector = tap.openForWrite( getPlatform().getFlowProcess() ); collector.add( new Tuple( 1, "1" ) ); collector.close(); }
private void writeFileTo( String path ) throws IOException { Hfs tap = new Hfs( new TextLine( new Fields( "offset", "line" ) ), getOutputPath( path ) ); TupleEntryCollector collector = tap.openForWrite( getPlatform().getFlowProcess() ); collector.add( new Tuple( 1, "1" ) ); collector.close(); }