@Test(expectedExceptions = UpdateNotFoundException.class) public void testNoMatchingPattern() throws Exception { HiveUnitUpdateProvider updateProvider = new DatePatternUpdateProvider(); Partition mockPartition = createMockPartitionWithLocation("/data/TestEvent/2016/02/02/10"); updateProvider.getUpdateTime(mockPartition); }
private long parseDateForLocation(String location) throws UpdateNotFoundException { for (Patterns pattern : Patterns.values()) { String dateString = StringUtils.substringAfterLast(location, pattern.prefix); if (StringUtils.isNotBlank(dateString)) { try { return pattern.dateFormat.parseMillis(dateString); } catch (IllegalArgumentException | UnsupportedOperationException e) { throw new UpdateNotFoundException(String.format("Failed parsing date string %s", dateString)); } } } throw new UpdateNotFoundException(String.format("Path %s does not match any date pattern %s", location, Arrays.toString(Patterns.values()))); }
/** * Get the update time of a {@link Table} * @return the update time if available, 0 otherwise * * {@inheritDoc} * @see HiveUnitUpdateProvider#getUpdateTime(org.apache.hadoop.hive.ql.metadata.Table) */ @Override public long getUpdateTime(Table table) throws UpdateNotFoundException { try { return getUpdateTime(table.getDataLocation()); } catch (IOException e) { throw new UpdateNotFoundException(String.format("Failed to get update time for %s.", table.getCompleteName()), e); } }
try { final long updateTime = this.updateProvider.getUpdateTime(hiveDataset.getTable()); log.warn(String .format("Not validating table: %s as update time was not found. %s", hiveDataset.getTable().getCompleteName(), e.getMessage()));
/** * * {@inheritDoc} * * Uses the <code>table</code>'s modified time as watermark. The modified time is read using * {@link HiveUnitUpdateProvider#getUpdateTime(Table)} * @throws UpdateNotFoundException if there was an error fetching update time using {@link HiveUnitUpdateProvider#getUpdateTime(Table)} * @see org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#getExpectedHighWatermark(org.apache.hadoop.hive.ql.metadata.Table, long) */ @Override public LongWatermark getExpectedHighWatermark(Table table, long tableProcessTime) { return new LongWatermark(this.updateProvider.getUpdateTime(table)); }
@Override public long getUpdateTime(Partition partition) throws UpdateNotFoundException { return parseDateForLocation(partition.getTPartition().getSd().getLocation()); }
public static HiveUnitUpdateProvider create(Properties properties) { try { return (HiveUnitUpdateProvider) GobblinConstructorUtils.invokeFirstConstructor(Class.forName(properties .getProperty( OPTIONAL_HIVE_UNIT_UPDATE_PROVIDER_CLASS_KEY, DEFAULT_HIVE_UNIT_UPDATE_PROVIDER_CLASS)), ImmutableList.<Object>of(getFileSystem(properties)), ImmutableList.of()); } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException | ClassNotFoundException | IOException e) { throw new RuntimeException(e); } }
@Override public Long call() throws Exception { if (HdfsBasedUpdateProvider.this.fs.exists(path)) { return HdfsBasedUpdateProvider.this.fs.getFileStatus(path).getModificationTime(); } throw new UpdateNotFoundException(String.format("Data file does not exist at path %s", path)); } });
@Test(expectedExceptions = UpdateNotFoundException.class) public void testHourlyInvalid() throws Exception { HiveUnitUpdateProvider updateProvider = new DatePatternUpdateProvider(); Partition mockPartition = createMockPartitionWithLocation("/data/TestEvent/hourly/2016/02/abc/10"); updateProvider.getUpdateTime(mockPartition); }
long updateTime = this.updateProvider.getUpdateTime(hiveDataset.getTable()); .getCompleteName(), e.getMessage()), e); } catch (SchemaNotFoundException e) { log.error(String.format("Not Creating workunit for %s as schema was not found. %s", hiveDataset.getTable()
protected boolean shouldCreateWorkunit(Table table, LongWatermark lowWatermark) throws UpdateNotFoundException { long updateTime = this.updateProvider.getUpdateTime(table); long createTime = getCreateTime(table); return shouldCreateWorkunit(createTime, updateTime, lowWatermark); }
/** * Get the update time of a {@link Partition} * * @return the update time if available, 0 otherwise * * {@inheritDoc} * @see HiveUnitUpdateProvider#getUpdateTime(org.apache.hadoop.hive.ql.metadata.Partition) */ @Override public long getUpdateTime(Partition partition) throws UpdateNotFoundException { try { return getUpdateTime(partition.getDataLocation()); } catch (IOException e) { throw new UpdateNotFoundException(String.format("Failed to get update time for %s", partition.getCompleteName()), e); } }
@Override public long getUpdateTime(Table table) throws UpdateNotFoundException { return parseDateForLocation(table.getTTable().getSd().getLocation()); }
public static HiveUnitUpdateProvider create(State state) { try { return (HiveUnitUpdateProvider) GobblinConstructorUtils.invokeFirstConstructor(Class.forName(state.getProp( OPTIONAL_HIVE_UNIT_UPDATE_PROVIDER_CLASS_KEY, DEFAULT_HIVE_UNIT_UPDATE_PROVIDER_CLASS)), ImmutableList.<Object>of(getFileSystem(state.getProperties())), ImmutableList.of()); } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException | ClassNotFoundException | IOException e) { throw new RuntimeException(e); } }
@Test public void testHourlyLate() throws Exception { HiveUnitUpdateProvider updateProvider = new DatePatternUpdateProvider(); Partition mockPartition = createMockPartitionWithLocation("/data/TestEvent/hourly_late/2016/02/02/10"); Assert.assertEquals(updateProvider.getUpdateTime(mockPartition), EPOCH_2016_02_02_10); }
protected boolean shouldCreateWorkunit(Partition sourcePartition, LongWatermark lowWatermark) throws UpdateNotFoundException { long updateTime = this.updateProvider.getUpdateTime(sourcePartition); long createTime = getCreateTime(sourcePartition); return shouldCreateWorkunit(createTime, updateTime, lowWatermark); }
@Test public void testDaily() throws Exception { HiveUnitUpdateProvider updateProvider = new DatePatternUpdateProvider(); Partition mockPartition = createMockPartitionWithLocation("/data/TestEvent/daily/2016/02/02"); Assert.assertEquals(updateProvider.getUpdateTime(mockPartition), EPOCH_2016_02_02); }
@Test public void testDailyLate() throws Exception { HiveUnitUpdateProvider updateProvider = new DatePatternUpdateProvider(); Partition mockPartition = createMockPartitionWithLocation("/data/TestEvent/daily_late/2016/02/02"); Assert.assertEquals(updateProvider.getUpdateTime(mockPartition), EPOCH_2016_02_02); }
@Test public void testHourly() throws Exception { HiveUnitUpdateProvider updateProvider = new DatePatternUpdateProvider(); Partition mockPartition = createMockPartitionWithLocation("/data/TestEvent/hourly/2016/02/02/10"); Assert.assertEquals(updateProvider.getUpdateTime(mockPartition), EPOCH_2016_02_02_10); }
@Test public void testHourlyDeduped() throws Exception { HiveUnitUpdateProvider updateProvider = new DatePatternUpdateProvider(); Partition mockPartition = createMockPartitionWithLocation("/data/TestEvent/hourly_deduped/2016/02/02/10"); Assert.assertEquals(updateProvider.getUpdateTime(mockPartition), EPOCH_2016_02_02_10); }