@Override public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader<Tuple, Tuple>, OutputCollector<Tuple, Tuple>> tap, JobConf conf) { // Pick temp location in HDFS for conf files. // TODO KKr - should I get rid of this temp directory when we're done? String coreDirname = _solrCoreDir.getName(); Path hdfsSolrCoreDir = new Path(Hfs.getTempPath(conf), "solr-core-" + Util.createUniqueID() + "/" + coreDirname); // Copy Solr core directory into HDFS. try { FileSystem fs = hdfsSolrCoreDir.getFileSystem(conf); fs.copyFromLocalFile(new Path(_solrCoreDir.getAbsolutePath()), hdfsSolrCoreDir); } catch (IOException e) { throw new TapException("Can't copy Solr core directory into HDFS", e); } conf.setOutputKeyClass(Tuple.class); conf.setOutputValueClass(Tuple.class); conf.setOutputFormat(SolrOutputFormat.class); try { conf.set(SolrOutputFormat.SINK_FIELDS_KEY, HadoopUtil.serializeBase64(getSinkFields(), conf)); } catch (IOException e) { throw new TapException("Can't serialize sink fields", e); } conf.set(SolrOutputFormat.SOLR_CORE_PATH_KEY, hdfsSolrCoreDir.toString()); conf.setInt(SolrOutputFormat.MAX_SEGMENTS_KEY, _maxSegments); conf.set(SolrOutputFormat.DATA_DIR_PROPERTY_NAME_KEY, _dataDirPropertyName); }
public static String writeStateToDistCache( JobConf conf, String id, String kind, String stepState ) { if( Util.isEmpty( stepState ) ) return null; LOG.info( "writing step state to dist cache, too large for job conf, size: {}", stepState.length() ); String statePath = Hfs.getTempPath( conf ) + "/" + kind + "-state-" + id; Hfs temp = new Hfs( new TextLine(), statePath, SinkMode.REPLACE ); try { TupleEntryCollector writer = temp.openForWrite( new HadoopFlowProcess( conf ) ); writer.add( new Tuple( stepState ) ); writer.close(); } catch( IOException exception ) { throw new FlowException( "unable to write step state to Hadoop FS: " + temp.getIdentifier() ); } URI uri = new Path( statePath ).toUri(); DistributedCache.addCacheFile( uri, conf ); LOG.info( "using step state path: {}", uri ); return statePath; }
public static String writeStateToDistCache( JobConf conf, String id, String kind, String stepState ) { if( Util.isEmpty( stepState ) ) return null; LOG.info( "writing step state to dist cache, too large for job conf, size: {}", stepState.length() ); String statePath = Hfs.getTempPath( conf ) + "/" + kind + "-state-" + id; Hfs temp = new Hfs( new TextLine(), statePath, SinkMode.REPLACE ); try { TupleEntryCollector writer = temp.openForWrite( new HadoopFlowProcess( conf ) ); writer.add( new Tuple( stepState ) ); writer.close(); } catch( IOException exception ) { throw new FlowException( "unable to write step state to Hadoop FS: " + temp.getIdentifier() ); } URI uri = new Path( statePath ).toUri(); DistributedCache.addCacheFile( uri, conf ); LOG.info( "using step state path: {}", uri ); return statePath; }
outputPath = Hfs.getTempPath( sinkConf ).toString(); // unused outputPath = Hfs.getTempPath( sinkConf ).toString(); // unused
outputPath = Hfs.getTempPath( sinkConf ).toString(); // unused outputPath = Hfs.getTempPath( sinkConf ).toString(); // unused