private void verifyWorkUnits(List<WorkUnit> workunits) throws IOException, DataRecordException { verifyWorkUnits(workunits, RECORD_SIZE); }
@Test public void testWorksNoPrefix() throws IOException, DataRecordException { DatePartitionedAvroFileSource source = new DatePartitionedAvroFileSource(); SourceState state = new SourceState(); state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, ConfigurationKeys.LOCAL_FS_URI); state.setProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, SOURCE_ENTITY); state.setProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY, OUTPUT_DIR + Path.SEPARATOR + SOURCE_ENTITY + Path.SEPARATOR + PREFIX); state.setProp(ConfigurationKeys.SOURCE_ENTITY, SOURCE_ENTITY); state.setProp(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS, 2); state.setProp("date.partitioned.source.partition.pattern", DATE_PATTERN); state.setProp("date.partitioned.source.min.watermark.value", DateTimeFormat.forPattern(DATE_PATTERN).print( this.startDateTime.minusMinutes(1))); state.setProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, TableType.SNAPSHOT_ONLY); state.setProp("date.partitioned.source.partition.suffix", SUFFIX); //Read data partitioned by minutes, i.e each workunit is assigned records under the same YYYY/MM/dd/HH_mm directory List<WorkUnit> workunits = source.getWorkunits(state); Assert.assertEquals(workunits.size(), 4); verifyWorkUnits(workunits); }
@Test public void testReadPartitionsByMinute() throws IOException, DataRecordException { DatePartitionedAvroFileSource source = new DatePartitionedAvroFileSource(); SourceState state = new SourceState(); state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, ConfigurationKeys.LOCAL_FS_URI); state.setProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, SOURCE_ENTITY); state.setProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY, OUTPUT_DIR + Path.SEPARATOR + SOURCE_ENTITY); state.setProp(ConfigurationKeys.SOURCE_ENTITY, SOURCE_ENTITY); state.setProp(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS, 2); state.setProp("date.partitioned.source.partition.pattern", DATE_PATTERN); state.setProp("date.partitioned.source.min.watermark.value", DateTimeFormat.forPattern(DATE_PATTERN).print( this.startDateTime.minusMinutes(1))); state.setProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, TableType.SNAPSHOT_ONLY); state.setProp("date.partitioned.source.partition.prefix", PREFIX); state.setProp("date.partitioned.source.partition.suffix", SUFFIX); //Read data partitioned by minutes, i.e each workunit is assigned records under the same YYYY/MM/dd/HH_mm directory List<WorkUnit> workunits = source.getWorkunits(state); Assert.assertEquals(workunits.size(), 4); verifyWorkUnits(workunits); }
@Test public void testReadPartitionsByMinuteWithLeadtime() throws IOException, DataRecordException { DatePartitionedAvroFileSource source = new DatePartitionedAvroFileSource(); SourceState state = new SourceState(); state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, ConfigurationKeys.LOCAL_FS_URI); state.setProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, SOURCE_ENTITY); state.setProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY, OUTPUT_DIR + Path.SEPARATOR + SOURCE_ENTITY); state.setProp(ConfigurationKeys.SOURCE_ENTITY, SOURCE_ENTITY); state.setProp(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS, 2); state.setProp("date.partitioned.source.partition.pattern", DATE_PATTERN); state.setProp("date.partitioned.source.min.watermark.value", DateTimeFormat.forPattern(DATE_PATTERN).print( this.startDateTime.minusMinutes(1))); state.setProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, TableType.SNAPSHOT_ONLY); state.setProp("date.partitioned.source.partition.prefix", PREFIX); state.setProp("date.partitioned.source.partition.suffix", SUFFIX); state.setProp("date.partitioned.source.partition.lead_time.size", "3"); state.setProp("date.partitioned.source.partition.lead_time.granularity", "HOUR"); /* * Since lead time is 3 hours, only the first WorkUnit (which is 6 hours old, rest are 2hrs) should get * picked up */ List<WorkUnit> workunits = source.getWorkunits(state); Assert.assertEquals(workunits.size(), 1); verifyWorkUnits(workunits, workunits.size()); }