private boolean checkpointGreaterThanNextPartition(@NonNull final Optional<StringValue> checkPoint) { if (checkPoint.isPresent() && checkPoint.get().getValue().compareTo(this.nextPartition.get()) > 0) { return true; } return false; } }
private void serialize(final ObjectOutputStream out) throws IOException { out.writeInt(SERIALIZATION_VERSION); out.writeInt(this.metadataMap.size()); for (final Map.Entry<String, StringValue> entry : this.metadataMap.entrySet()) { log.info("Serializing key: {} and value: {}", entry.getKey(), entry.getValue().getValue()); out.writeUTF(entry.getKey()); out.writeUTF(entry.getValue().getValue()); } }
/** * Updates in-memory metadata map with given key-value pair. */ public void set(@NotEmpty final String key, @NonNull final StringValue value) { if (!this.saveChanges.get()) { throw new JobRuntimeException( String.format("Metadata manager changes are already saved.key:%s:value%s", key, value)); } this.metadataMap.put(key, value.getValue()); }
private List<String> listPartitionsAfterCheckpoint(final Optional<StringValue> checkpoint) throws IOException { final List<String> partitions = getExistingPartitions(); if (checkpoint.isPresent()) { return partitions.stream() .filter(partition -> partition.compareTo(checkpoint.get().getValue()) > 0) .collect(Collectors.toList()); } else { return partitions; } }
/*** * Returns the metadata for the given DAG * @param key * @return * @throws IOException */ public Optional<Map<String, String>> get(@NotEmpty final String key) throws IOException { final Optional<StringValue> metadataValues = this.metadataManager.get(key); if (metadataValues.isPresent()) { return Optional.of(mapper.readValue(metadataValues.get().getValue(), typeRef)); } return Optional.absent(); }
public void init() { final Optional<StringValue> serialisedStats = this.metadataManager.get(getMetakey()); if (serialisedStats.isPresent()) { final Map<String, String> statHistory = MapUtil.deserializeMap(serialisedStats.get().getValue()); for (int i = 0; i < statHistory.size(); i++) { this.sinkStatQ.add(SinkStat.deserialize(statHistory.get(Integer.toString(i)))); } } }
@Override public void initPreviousRunState(@NonNull final IMetadataManager<StringValue> metadataManager) { final String topicName = this.conf.getTopicName(); final Map<Integer, Long> metadata = new HashMap<>(); final String topicSpecificName = getTopicSpecificMetadataKey(topicName); final List<String> toDelete = new LinkedList<>(); metadataManager.getAllKeys().forEach(key -> { if (key.startsWith(topicSpecificName)) { // this is my specific topic metadata.put(Integer.parseInt(key.substring(topicSpecificName.length())), Long.parseLong(metadataManager.get(key).get().getValue())); } else if (key.startsWith(KAFKA_METADATA_WITH_SEPARATOR)) { // this is a specific topic, but not mine. ignore. assert true; } else if (key.startsWith(KAFKA_METADATA_PREFIX)) { // this is unspecified topic metadata.put(Integer.parseInt(key.substring(KAFKA_METADATA_PREFIX.length())), Long.parseLong(metadataManager.get(key).get().getValue())); // delete the old, unspecified metadata toDelete.add(key); } }); toDelete.forEach(metadataManager::remove); this.previousRunState = Optional.of(new KafkaRunState(metadata)); }
public static Map<String, StringValue> deserialize(final ObjectInputStream ois) throws IOException { final int version = ois.readInt(); if (version == SERIALIZATION_VERSION) { final Map<String, StringValue> map = new HashMap<>(); final int numEntries = ois.readInt(); for (int i = 0; i < numEntries; i++) { final String key = ois.readUTF(); final StringValue value = new StringValue(ois.readUTF()); log.info("Deserializing key: {} and value: {}", key, value.getValue()); map.put(key, value); } if (ois.available() > 0) { throw new MetadataException("Deserialization error, not all bytes were read off the stream"); } return map; } else { throw new MetadataException("Version: " + version + " is not supported"); } }
public static void main(final String[] args) throws ParseException, IOException { final CommandLineParser parser = new GnuParser(); final Options options = getCLIOptions(); final CommandLine cmd; try { cmd = parser.parse(options, args); } catch (final ParseException e) { final String cmdLineSyntax = "java -cp [jar_name] com.uber.marmaray.tools.HDFSMetadataPrinter " + "-m [METADATA_FILE]"; final String header = "This tools prints out all the metadata contents of a HDFS metadata file."; final String footer = "For help, please contact the Hadoop Data Platform team"; CommandLineUtil.printHelp(options, cmdLineSyntax, header, footer); throw e; } final String metadataFilePath = cmd.getOptionValue(METADATA_FILE_OPTION); Preconditions.checkState(!Strings.isNullOrEmpty(metadataFilePath)); log.info("Printing contents of metadata file: " + metadataFilePath); final Configuration conf = new Configuration(); final FileSystem fs = FSUtils.getFs(conf); try (final InputStream is = new BufferedInputStream(fs.open(new Path(metadataFilePath)))) { try (final ObjectInputStream input = new ObjectInputStream(is)) { final Map<String, StringValue> metadataMap = HDFSMetadataManager.deserialize(input); metadataMap.entrySet() .stream() .forEach(entry -> log.info("Key: " + entry.getKey() + " Value: " + entry.getValue().getValue())); } } }
private void validateDeserializedMapEqualsInMemoryMap(final Map<String, StringValue> deserializedMap) { for (Map.Entry<String, StringValue> entry : deserializedMap.entrySet()) { final Optional<StringValue> valueInMemory = this.metadataManager.get(entry.getKey()); Assert.assertTrue(valueInMemory.isPresent()); Assert.assertEquals(valueInMemory.get().getValue(), entry.getValue().getValue()); } Assert.assertEquals(this.metadataManager.getAllKeys(), deserializedMap.keySet()); }
@Test public void testHDFSOverwriteCheckpointValue() throws IOException, InterruptedException { final StringValue val1 = new StringValue("testVal"); this.metadataManager.set(MetadataConstants.CHECKPOINT_KEY, val1); final StringValue val2 = new StringValue("testVal2"); this.metadataManager.set(MetadataConstants.CHECKPOINT_KEY, val2); final Optional<StringValue> readValue = this.metadataManager.get(MetadataConstants.CHECKPOINT_KEY); Assert.assertTrue(readValue.isPresent()); Assert.assertTrue(readValue.get().getValue().equals("testVal2")); this.metadataManager.saveChanges(); final Optional<FileStatus> fs = this.metadataManager.getLatestMetadataFile(); Assert.assertTrue(fs.isPresent()); final Map<String, StringValue> loadedMap = this.metadataManager.loadMetadata(fs.get().getPath()); validateDeserializedMapEqualsInMemoryMap(loadedMap); }
@Test public void testHDFSReadWriteSingleMetadataFile() throws IOException { // Test in memory final StringValue val = new StringValue("testVal"); this.metadataManager.set(MetadataConstants.CHECKPOINT_KEY, val); final Optional<StringValue> readValue = this.metadataManager.get(MetadataConstants.CHECKPOINT_KEY); Assert.assertTrue(readValue.isPresent()); Assert.assertTrue(readValue.get().getValue().equals("testVal")); this.metadataManager.set("foo", new StringValue("bar")); // Serialize the metadata map to a file this.metadataManager.saveChanges(); final Optional<FileStatus> fs = this.metadataManager.getLatestMetadataFile(); Assert.assertTrue(fs.isPresent()); // Deserialize the metadata map and check contents are the same final Map<String, StringValue> loadedMap = this.metadataManager.loadMetadata(fs.get().getPath()); validateDeserializedMapEqualsInMemoryMap(loadedMap); }
Assert.assertEquals(PARTITION1, latestCheckpoint2.get().getValue()); Assert.assertTrue(pm2.getNextPartition(latestCheckpoint2).isPresent()); Assert.assertEquals(pm2.getNextPartition(latestCheckpoint2).get(), PARTITION2); Assert.assertEquals(PARTITION2, latestCheckpoint3.get().getValue()); Assert.assertFalse(pm3.getNextPartition(latestCheckpoint3).isPresent());
private void virtuallyProcessPartition(@NonNull final HDFSPartitionManager partitionManager, @NonNull final HDFSMetadataManager metadataManager, @NotEmpty final Optional<StringValue> expectedLatestCheckpoint, @NotEmpty final String expectedNextPartition) throws IOException { Assert.assertEquals(expectedLatestCheckpoint, getLatestCheckpoint(metadataManager)); final ParquetWorkUnitCalculator calculator = new ParquetWorkUnitCalculator(this.hiveConfig, this.fileSystem); calculator.initPreviousRunState(metadataManager); final ParquetWorkUnitCalculatorResult result = calculator.computeWorkUnits(); final List<String> workUnits = result.getWorkUnits(); Assert.assertEquals(1, workUnits.size()); Assert.assertEquals(expectedNextPartition, workUnits.get(0)); Assert.assertTrue(result.getNextRunState().getPartition().isPresent()); Assert.assertEquals(expectedNextPartition, result.getNextRunState().getPartition().get()); calculator.saveNextRunState(metadataManager, result.getNextRunState()); Assert.assertEquals(expectedNextPartition, metadataManager.get(MetadataConstants.CHECKPOINT_KEY).get().getValue()); metadataManager.saveChanges(); }
Assert.assertEquals(this.dataPath, latestCheckpoint2.get().getValue()); final IWorkUnitCalculator.IWorkUnitCalculatorResult<String, HiveRunState> iresult = calc.computeWorkUnits(); calc.saveNextRunState(metadataManager2, iresult.getNextRunState()); Assert.assertEquals(this.dataPath, metadataManager2.get(MetadataConstants.CHECKPOINT_KEY).get().getValue());