private void setup(String path, Scheme<JobConf, RecordReader, OutputCollector, SourceCtx, SinkCtx> scheme) { this.path = path; /* * LocalTap requires your system Hadoop configuration for defaults to * supply the wrapped Lfs. Make sure you have your serializations and * serialization tokens defined there. */ defaults = new JobConf(); // HACK: c.t.h.TextLine checks this property for .zip files; the check // assumes the list is non-empty, which we mock up, here defaults.set("mapred.input.dir", path); // HACK: Parquet uses this property to generate unique file names defaults.set("mapred.task.partition", "0"); // HACK: disable Parquet counters defaults.set("parquet.benchmark.bytes.read", "false"); defaults.set("parquet.benchmark.bytes.total", "false"); defaults.set("parquet.benchmark.time.read", "false"); ((LocalScheme<SourceCtx, SinkCtx>) this.getScheme()).setDefaults(defaults); lfs = new Lfs(scheme, path); ((LocalScheme<SourceCtx, SinkCtx>) this.getScheme()).setLfs(lfs); }
@Override public boolean createResource(Properties conf) throws IOException { return lfs.createResource(mergeDefaults("LocalTap#createResource", conf, defaults)); }
@Override public boolean deleteResource(Properties conf) throws IOException { return lfs.deleteResource(mergeDefaults("LocalTap#deleteResource", conf, defaults)); }
@Override public long getModifiedTime(Properties conf) throws IOException { return lfs.getModifiedTime(mergeDefaults("LocalTap#getModifiedTime", conf, defaults)); }
@Override public TupleEntryIterator openForRead(FlowProcess<Properties> flowProcess, RecordReader input) throws IOException { JobConf jobConf = mergeDefaults("LocalTap#openForRead", flowProcess.getConfigCopy(), defaults); return lfs.openForRead(new HadoopFlowProcess(jobConf)); }
taps.add( new Lfs( getScheme(), path.toString() ) );
taps.add( new Lfs( getScheme(), path.toString() ) );
Hfs temp = new Lfs( new TextLine( new Fields( "line" ) ), stepStatePath.toString() );
Hfs temp = new Lfs( new TextLine( new Fields( "line" ) ), stepStatePath.toString() );
@Test public void testLfs() throws URISyntaxException, IOException { Tap tap = new Lfs( new SequenceFile( new Fields( "foo" ) ), "some/path" ); String path = tap.getFullIdentifier( getPlatform().getFlowProcess() ); assertTrue( "wrong scheme", new Path( path ).toUri().getScheme().equalsIgnoreCase( "file" ) ); new Lfs( new SequenceFile( new Fields( "foo" ) ), "file:///some/path" ); try { new Lfs( new SequenceFile( new Fields( "foo" ) ), "s3://localhost:5001/some/path" ); fail( "not valid url" ); } catch( Exception exception ) { } }
@Test public void testLfs() throws URISyntaxException, IOException { Tap tap = new Lfs( new SequenceFile( new Fields( "foo" ) ), "some/path" ); String path = tap.getFullIdentifier( getPlatform().getFlowProcess() ); assertTrue( "wrong scheme", new Path( path ).toUri().getScheme().equalsIgnoreCase( "file" ) ); new Lfs( new SequenceFile( new Fields( "foo" ) ), "file:///some/path" ); try { new Lfs( new SequenceFile( new Fields( "foo" ) ), "s3://localhost:5001/some/path" ); fail( "not valid url" ); } catch( Exception exception ) { } }
@Test public void testLocalModeSource() throws Exception { Tap source = new Lfs( new TextLine(), "input/path" ); Tap sink = new Hfs( new TextLine(), "output/path", SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); List<FlowStep> steps = flow.getFlowSteps(); assertEquals( "wrong size", 1, steps.size() ); FlowStep step = steps.get( 0 ); boolean isLocal = HadoopUtil.isLocal( (Configuration) step.getConfig() ); assertTrue( "is not local", isLocal ); }
@Test public void testLocalModeSource() throws Exception { Tap source = new Lfs( new TextLine(), "input/path" ); Tap sink = new Hfs( new TextLine(), "output/path", SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); List<FlowStep> steps = flow.getFlowSteps(); assertEquals( "wrong size", 1, steps.size() ); FlowStep step = steps.get( 0 ); boolean isLocal = HadoopUtil.isLocal( (Configuration) step.getConfig() ); assertTrue( "is not local", isLocal ); }
@Test public void testLocalModeSink() throws Exception { Tap source = new Hfs( new TextLine(), "input/path" ); Tap sink = new Lfs( new TextLine(), "output/path", SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); List<FlowStep> steps = flow.getFlowSteps(); assertEquals( "wrong size", 1, steps.size() ); FlowStep step = steps.get( 0 ); boolean isLocal = HadoopUtil.isLocal( (Configuration) step.getConfig() ); assertTrue( "is not local", isLocal ); }
@Test public void testLocalModeSink() throws Exception { Tap source = new Hfs( new TextLine(), "input/path" ); Tap sink = new Lfs( new TextLine(), "output/path", SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); List<FlowStep> steps = flow.getFlowSteps(); assertEquals( "wrong size", 1, steps.size() ); FlowStep step = steps.get( 0 ); boolean isLocal = HadoopUtil.isLocal( (Configuration) step.getConfig() ); assertTrue( "is not local", isLocal ); }
@Test public void testCopyConfig() throws Exception { Tap source = new Lfs( new TextLine(), "input/path" ); Tap sink = new Hfs( new TextLine(), "output/path", SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); Configuration conf = ( (BaseHadoopPlatform) getPlatform() ).getConfiguration(); conf.set( AppProps.APP_NAME, "testname" ); AppProps props = AppProps.appProps().setVersion( "1.2.3" ); Properties properties = props.buildProperties( conf ); // convert job conf to properties instance Flow flow = getPlatform().getFlowConnector( properties ).connect( source, sink, pipe ); assertEquals( "testname", flow.getProperty( AppProps.APP_NAME ) ); assertEquals( "1.2.3", flow.getProperty( AppProps.APP_VERSION ) ); }
@Test public void testCopyConfig() throws Exception { Tap source = new Lfs( new TextLine(), "input/path" ); Tap sink = new Hfs( new TextLine(), "output/path", SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); Configuration conf = ( (BaseHadoopPlatform) getPlatform() ).getConfiguration(); conf.set( AppProps.APP_NAME, "testname" ); AppProps props = AppProps.appProps().setVersion( "1.2.3" ); Properties properties = props.buildProperties( conf ); // convert job conf to properties instance Flow flow = getPlatform().getFlowConnector( properties ).connect( source, sink, pipe ); assertEquals( "testname", flow.getProperty( AppProps.APP_NAME ) ); assertEquals( "1.2.3", flow.getProperty( AppProps.APP_VERSION ) ); }
@Test public void testFlowID() throws Exception { Tap source = new Lfs( new TextLine(), "input/path" ); Tap sink = new Hfs( new TextLine(), "output/path", SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); Map<Object, Object> props = getProperties(); Flow flow1 = getPlatform().getFlowConnector( props ).connect( source, sink, pipe ); // System.out.println( "flow.getID() = " + flow1.getID() ); assertNotNull( "missing id", flow1.getID() ); assertNotNull( "missing id in conf", flow1.getProperty( "cascading.flow.id" ) ); Flow flow2 = getPlatform().getFlowConnector( props ).connect( source, sink, pipe ); assertTrue( "same id", !flow1.getID().equalsIgnoreCase( flow2.getID() ) ); }
@Test public void testFlowID() throws Exception { Tap source = new Lfs( new TextLine(), "input/path" ); Tap sink = new Hfs( new TextLine(), "output/path", SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); Map<Object, Object> props = getProperties(); Flow flow1 = getPlatform().getFlowConnector( props ).connect( source, sink, pipe ); // System.out.println( "flow.getID() = " + flow1.getID() ); assertNotNull( "missing id", flow1.getID() ); assertNotNull( "missing id in conf", flow1.getProperty( "cascading.flow.id" ) ); Flow flow2 = getPlatform().getFlowConnector( props ).connect( source, sink, pipe ); assertTrue( "same id", !flow1.getID().equalsIgnoreCase( flow2.getID() ) ); }