/** * Commit all partitions that were added to batch addition queue. //TODO: improve batch addition implementation. * * @return true if all the partitions were added successfully, or no partitions needed to be added * @throws LensException */ public boolean commitBatchAdditions() throws LensException { if (getAll() == null) { return true; } log.info("initializing timeline from batch addition: {},{},{}", getStorageTableName(), getUpdatePeriod(), getPartCol()); boolean result = add(getAll()); all = null; log.info("initialized to: {}", this); return result; }
/** * Extracts timeline implementation class from table params and instantiates it with other arguments, also in table * params. * * @param table * @throws LensException * @see #updateTableParams(org.apache.hadoop.hive.ql.metadata.Table) */ public void init(Table table) throws LensException { HashMap<String, String> props = Maps.newHashMap(); String prefix = MetastoreUtil.getPartitionInfoKeyPrefix(getUpdatePeriod(), getPartCol()); for (Map.Entry<String, String> entry : table.getParameters().entrySet()) { if (entry.getKey().startsWith(prefix)) { props.put(entry.getKey().substring(prefix.length()), entry.getValue()); } } log.info("initializing timeline from table properties: {},{},{}", getStorageTableName(), getUpdatePeriod(), getPartCol()); initFromProperties(props); log.info("initialized to: {}", this); }
/** * Sets PartitionTimeline implementation class's name and specific params in table param. * * @param table * @see #init(org.apache.hadoop.hive.ql.metadata.Table) */ public void updateTableParams(Table table) { String prefix = MetastoreUtil.getPartitionInfoKeyPrefix(getUpdatePeriod(), getPartCol()); String storageClass = MetastoreUtil.getPartitionTimelineStorageClassKey(getUpdatePeriod(), getPartCol()); table.getParameters().put(storageClass, this.getClass().getCanonicalName()); for (Map.Entry<String, String> entry : toProperties().entrySet()) { table.getParameters().put(prefix + entry .getKey(), entry.getValue()); } }
Map<String, String> props = inst1.toProperties(); assertTrue(inst2.initFromProperties(props)); assertTrue(inst1.isEmpty()); assertTrue(inst2.isEmpty()); assertTrue(inst1.add(TimePartition.of(period, TestTimePartition.NOW))); assertFalse(inst1.equals(inst2)); assertTrue(inst2.add(TimePartition.of(period, TestTimePartition.NOW))); assertTrue(inst1.isConsistent()); assertTrue(inst2.isConsistent()); assertEquals(inst1, inst2); assertTrue(inst2.initFromProperties(props)); assertFalse(inst1.equals(inst2)); assertTrue(inst2.initFromProperties(inst1.toProperties())); assertEquals(inst1, inst2); inst1.initFromProperties(props); inst2.initFromProperties(props); assertTrue(inst1.add(TimePartition.of(period, TestTimePartition.timeAtDiff(TestTimePartition.NOW, period, i * 2)))); assertTrue(inst1.isConsistent()); inst2.initFromProperties(inst1.toProperties()); assertTrue(inst2.isConsistent());
addedPartitions.add(part); for (PartitionTimeline timeline : timelines) { timeline.add(part); if (!addedPartitions.contains(part)) { for (PartitionTimeline timeline : timelines) { timeline.drop(part); assertTrue(timeline.isConsistent()); assertTrue(timeline.isEmpty());
/** * Add partition range to the timeline. Default implementation is to iterate over the range and add * each time partition belonging to the given range. Implementing classes can override. * * @param partitionRange * @return whether add was successful * @throws LensException */ boolean add(TimePartitionRange partitionRange) throws LensException { boolean ret = true; for (TimePartition part : partitionRange) { ret &= add(part); } return ret; }
private void loadTimelinesFromTableProperties(UpdatePeriod updatePeriod, String storageTableName, String timeLineKey) throws HiveException, LensException { log.info("loading from table properties: {}", storageTableName); for (String partCol : getTimePartColNamesOfTable(storageTableName)) { ensureEntry(timeLineKey, storageTableName, updatePeriod, partCol).init(getTable(storageTableName)); } }
/** * * @param fact fact * @param storage storage * @param partCol part column * @return true if all the timelines for fact-storage table are empty for all valid update periods. * @throws HiveException * @throws LensException */ public boolean noPartitionsExist(String fact, String storage, String partCol) throws HiveException, LensException { if (get(fact, storage) == null) { return true; } for (UpdatePeriod updatePeriod : get(fact, storage).keySet()) { PartitionTimeline timeline = get(fact, storage, updatePeriod, partCol); if (timeline != null && !timeline.isEmpty()) { return false; } } return true; }
/** update partition timeline cache for deletion of time partition */ public boolean updateForDeletion(String cubeTableName, String storageName, UpdatePeriod updatePeriod, Map<String, Date> timePartSpec) throws HiveException, LensException { // fail fast. All part cols mentioned in all partitions should exist. for (String partCol : timePartSpec.keySet()) { getAndFailFast(cubeTableName, storageName, updatePeriod, partCol); } boolean updated = false; for (Map.Entry<String, Date> entry : timePartSpec.entrySet()) { TimePartition part = TimePartition.of(updatePeriod, entry.getValue()); if (!partitionExistsByFilter(cubeTableName, storageName, updatePeriod, StorageConstants.getPartFilter(entry.getKey(), part.getDateString()))) { get(cubeTableName, storageName, updatePeriod, entry.getKey()).drop(part); updated = true; } } return updated; } }
/** check partition existence in the appropriate timeline if it exists */ public boolean partitionTimeExists(String name, String storage, UpdatePeriod period, String partCol, Date partSpec) throws HiveException, LensException { return get(name, storage, period, partCol) != null && get(name, storage, period, partCol).exists(TimePartition.of(period, partSpec)); }
/** * Adds given partition(for storageTable, updatePeriod, partitionColum=partition) for batch addition in an * appropriate timeline object. Ignore if partition is not valid. * * @param timeLineKey key for the timeLine map * @param storageTableName hive table name * @param updatePeriod update period * @param partitionColumn partition column * @param partition partition */ public void addForBatchAddition(String timeLineKey, String storageTableName, UpdatePeriod updatePeriod, String partitionColumn, String partition) { try { ensureEntry(timeLineKey, storageTableName, updatePeriod, partitionColumn) .addForBatchAddition(TimePartition.of(updatePeriod, partition)); } catch (LensException e) { // to take care of the case where partition name is something like `latest` log.error("Couldn't parse partition: {} with update period: {}, skipping.", partition, updatePeriod, e); } }
/** * Add partition range to the timeline. Default implementation is to iterate over the range and add * each time partition belonging to the given range. Implementing classes can override. * * @param partitionRange * @return whether add was successful * @throws LensException */ boolean add(TimePartitionRange partitionRange) throws LensException { boolean ret = true; for (TimePartition part : partitionRange) { ret &= add(part); } return ret; }
private void loadTimelinesFromTableProperties(UpdatePeriod updatePeriod, String storageTableName, String timeLineKey) throws HiveException, LensException { log.info("loading from table properties: {}", storageTableName); for (String partCol : getTimePartColNamesOfTable(storageTableName)) { ensureEntry(timeLineKey, storageTableName, updatePeriod, partCol).init(getTable(storageTableName)); } }
/** * * @param fact fact * @param storage storage * @param partCol part column * @return true if all the timelines for fact-storage table are empty for all valid update periods. * @throws HiveException * @throws LensException */ public boolean noPartitionsExist(String fact, String storage, String partCol) throws HiveException, LensException { if (get(fact, storage) == null) { return true; } for (UpdatePeriod updatePeriod : get(fact, storage).keySet()) { PartitionTimeline timeline = get(fact, storage, updatePeriod, partCol); if (timeline != null && !timeline.isEmpty()) { return false; } } return true; }
/** update partition timeline cache for deletion of time partition */ public boolean updateForDeletion(String cubeTableName, String storageName, UpdatePeriod updatePeriod, Map<String, Date> timePartSpec) throws HiveException, LensException { // fail fast. All part cols mentioned in all partitions should exist. for (String partCol : timePartSpec.keySet()) { getAndFailFast(cubeTableName, storageName, updatePeriod, partCol); } boolean updated = false; for (Map.Entry<String, Date> entry : timePartSpec.entrySet()) { TimePartition part = TimePartition.of(updatePeriod, entry.getValue()); if (!partitionExistsByFilter(cubeTableName, storageName, updatePeriod, StorageConstants.getPartFilter(entry.getKey(), part.getDateString()))) { get(cubeTableName, storageName, updatePeriod, entry.getKey()).drop(part); updated = true; } } return updated; } }
/** check partition existence in the appropriate timeline if it exists */ public boolean partitionTimeExists(String name, String storage, UpdatePeriod period, String partCol, Date partSpec) throws HiveException, LensException { return get(name, storage, period, partCol) != null && get(name, storage, period, partCol).exists(TimePartition.of(period, partSpec)); }
/** * Adds given partition(for storageTable, updatePeriod, partitionColum=partition) for batch addition in an * appropriate timeline object. Ignore if partition is not valid. * * @param timeLineKey key for the timeLine map * @param storageTableName hive table name * @param updatePeriod update period * @param partitionColumn partition column * @param partition partition */ public void addForBatchAddition(String timeLineKey, String storageTableName, UpdatePeriod updatePeriod, String partitionColumn, String partition) { try { ensureEntry(timeLineKey, storageTableName, updatePeriod, partitionColumn) .addForBatchAddition(TimePartition.of(updatePeriod, partition)); } catch (LensException e) { // to take care of the case where partition name is something like `latest` log.error("Couldn't parse partition: {} with update period: {}, skipping.", partition, updatePeriod, e); } }
/** * Commit all partitions that were added to batch addition queue. //TODO: improve batch addition implementation. * * @return true if all the partitions were added successfully, or no partitions needed to be added * @throws LensException */ public boolean commitBatchAdditions() throws LensException { if (getAll() == null) { return true; } log.info("initializing timeline from batch addition: {},{},{}", getStorageTableName(), getUpdatePeriod(), getPartCol()); boolean result = add(getAll()); all = null; log.info("initialized to: {}", this); return result; }
/** * Extracts timeline implementation class from table params and instantiates it with other arguments, also in table * params. * * @param table * @throws LensException * @see #updateTableParams(org.apache.hadoop.hive.ql.metadata.Table) */ public void init(Table table) throws LensException { HashMap<String, String> props = Maps.newHashMap(); String prefix = MetastoreUtil.getPartitionInfoKeyPrefix(getUpdatePeriod(), getPartCol()); for (Map.Entry<String, String> entry : table.getParameters().entrySet()) { if (entry.getKey().startsWith(prefix)) { props.put(entry.getKey().substring(prefix.length()), entry.getValue()); } } log.info("initializing timeline from table properties: {},{},{}", getStorageTableName(), getUpdatePeriod(), getPartCol()); initFromProperties(props); log.info("initialized to: {}", this); }
/** * Sets PartitionTimeline implementation class's name and specific params in table param. * * @param table * @see #init(org.apache.hadoop.hive.ql.metadata.Table) */ public void updateTableParams(Table table) { String prefix = MetastoreUtil.getPartitionInfoKeyPrefix(getUpdatePeriod(), getPartCol()); String storageClass = MetastoreUtil.getPartitionTimelineStorageClassKey(getUpdatePeriod(), getPartCol()); table.getParameters().put(storageClass, this.getClass().getCanonicalName()); for (Map.Entry<String, String> entry : toProperties().entrySet()) { table.getParameters().put(prefix + entry .getKey(), entry.getValue()); } }