com.uber.hoodie.common.model.HoodieKey.getRecordKey java code examples

 public String getRecordKey() {
  assert key != null;
  return key.getRecordKey();
 }
}

@Override
public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(JavaRDD<HoodieKey> hoodieKeys,
  JavaSparkContext jsc, HoodieTable<T> hoodieTable) {
 return hoodieKeys.mapToPair(hk -> new Tuple2<>(hk, Optional.of(getBucket(hk.getRecordKey()))));
}

@Override
public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(JavaRDD<HoodieKey> hoodieKeys,
  JavaSparkContext jsc, HoodieTable<T> hoodieTable) {
 return hoodieKeys.mapToPair(hk -> new Tuple2<>(hk, Optional.of(getBucket(hk.getRecordKey()))));
}

@Override
public String getRowKey(@NonNull final RawData rawdata) {
  try {
    return ((HoodieRecord) rawdata.getData()).getKey().getRecordKey();
  } catch (Exception e) {
    log.debug("Not able to extract Hadoop_row_key from RawData");
    return DEFAULT_ROW_KEY;
  }
}

@Override
protected void processNextDeletedKey(HoodieKey hoodieKey) {
 records.put(hoodieKey.getRecordKey(), SpillableMapUtils.generateEmptyPayload(hoodieKey.getRecordKey(),
   hoodieKey.getPartitionPath(), getPayloadClassFQN()));
}

public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(JavaRDD<HoodieKey> hoodieKeys,
  JavaSparkContext jsc, HoodieTable<T> hoodieTable) {
 JavaPairRDD<String, String> partitionRecordKeyPairRDD = hoodieKeys
   .mapToPair(key -> new Tuple2<>(key.getPartitionPath(), key.getRecordKey()));
 // Lookup indexes for all the partition/recordkey pair
 JavaPairRDD<String, String> rowKeyFilenamePairRDD = lookupIndex(partitionRecordKeyPairRDD, jsc, hoodieTable);
 JavaPairRDD<String, HoodieKey> rowKeyHoodieKeyPairRDD = hoodieKeys
   .mapToPair(key -> new Tuple2<>(key.getRecordKey(), key));
 return rowKeyHoodieKeyPairRDD.leftOuterJoin(rowKeyFilenamePairRDD).mapToPair(keyPathTuple -> {
  Optional<String> recordLocationPath;
  if (keyPathTuple._2._2.isPresent()) {
   String fileName = keyPathTuple._2._2.get();
   String partitionPath = keyPathTuple._2._1.getPartitionPath();
   recordLocationPath = Optional
     .of(new Path(new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath), fileName)
       .toUri().getPath());
  } else {
   recordLocationPath = Optional.absent();
  }
  return new Tuple2<>(keyPathTuple._2._1, recordLocationPath);
 });
}

public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(JavaRDD<HoodieKey> hoodieKeys,
  JavaSparkContext jsc, HoodieTable<T> hoodieTable) {
 JavaPairRDD<String, String> partitionRecordKeyPairRDD = hoodieKeys
   .mapToPair(key -> new Tuple2<>(key.getPartitionPath(), key.getRecordKey()));
 // Lookup indexes for all the partition/recordkey pair
 JavaPairRDD<String, String> rowKeyFilenamePairRDD = lookupIndex(partitionRecordKeyPairRDD, jsc, hoodieTable);
 JavaPairRDD<String, HoodieKey> rowKeyHoodieKeyPairRDD = hoodieKeys
   .mapToPair(key -> new Tuple2<>(key.getRecordKey(), key));
 return rowKeyHoodieKeyPairRDD.leftOuterJoin(rowKeyFilenamePairRDD).mapToPair(keyPathTuple -> {
  Optional<String> recordLocationPath;
  if (keyPathTuple._2._2.isPresent()) {
   String fileName = keyPathTuple._2._2.get();
   String partitionPath = keyPathTuple._2._1.getPartitionPath();
   recordLocationPath = Optional
     .of(new Path(new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath), fileName)
       .toUri().getPath());
  } else {
   recordLocationPath = Optional.absent();
  }
  return new Tuple2<>(keyPathTuple._2._1, recordLocationPath);
 });
}

/**
 * Generates a new avro record of the above schema format, retaining the key if optionally provided.
 */
public static TestRawTripPayload generateRandomValue(HoodieKey key, String commitTime) throws IOException {
 GenericRecord rec = generateGenericRecord(key.getRecordKey(), "rider-" + commitTime, "driver-" + commitTime, 0.0);
 HoodieAvroUtils.addCommitMetadataToRecord(rec, commitTime, "-1");
 return new TestRawTripPayload(rec.toString(), key.getRecordKey(), key.getPartitionPath(), TRIP_EXAMPLE_SCHEMA);
}

public HoodieRecord generateDeleteRecord(HoodieKey key) throws IOException {
 TestRawTripPayload payload = new TestRawTripPayload(Optional.empty(), key.getRecordKey(), key.getPartitionPath(),
   null, true);
 return new HoodieRecord(key, payload);
}

@Test
public void testRangePruning() {
 HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
 HoodieBloomIndex index = new HoodieBloomIndex(config);
 final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo = new HashMap<>();
 partitionToFileIndexInfo.put("2017/10/22", Arrays.asList(new BloomIndexFileInfo("f1"),
   new BloomIndexFileInfo("f2", "000", "000"), new BloomIndexFileInfo("f3", "001", "003"),
   new BloomIndexFileInfo("f4", "002", "007"), new BloomIndexFileInfo("f5", "009", "010")));
 JavaPairRDD<String, String> partitionRecordKeyPairRDD = jsc.parallelize(Arrays.asList(
   new Tuple2<>("2017/10/22", "003"), new Tuple2<>("2017/10/22", "002"), new Tuple2<>("2017/10/22", "005"),
   new Tuple2<>("2017/10/22", "004"))).mapToPair(t -> t);
 List<Tuple2<String, Tuple2<String, HoodieKey>>> comparisonKeyList = index.explodeRecordRDDWithFileComparisons(
   partitionToFileIndexInfo, partitionRecordKeyPairRDD).collect();
 assertEquals(10, comparisonKeyList.size());
 Map<String, List<String>> recordKeyToFileComps = comparisonKeyList.stream().collect(Collectors.groupingBy(
   t -> t._2()._2().getRecordKey(), Collectors.mapping(t -> t._2()._1().split("#")[0], Collectors.toList())));
 assertEquals(4, recordKeyToFileComps.size());
 assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("002"));
 assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("003"));
 assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("004"));
 assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("005"));
}

/**
 * Deduplicate Hoodie records, using the given deduplication funciton.
 */
JavaRDD<HoodieRecord<T>> deduplicateRecords(JavaRDD<HoodieRecord<T>> records,
  int parallelism) {
 boolean isIndexingGlobal = index.isGlobal();
 return records
   .mapToPair(record -> {
    HoodieKey hoodieKey = record.getKey();
    // If index used is global, then records are expected to differ in their partitionPath
    Object key = isIndexingGlobal ? hoodieKey.getRecordKey() : hoodieKey;
    return new Tuple2<>(key, record);
   })
   .reduceByKey((rec1, rec2) -> {
    @SuppressWarnings("unchecked") T reducedData = (T) rec1.getData()
      .preCombine(rec2.getData());
    // we cannot allow the user to change the key or partitionPath, since that will affect
    // everything
    // so pick it from one of the records.
    return new HoodieRecord<T>(rec1.getKey(), reducedData);
   }, parallelism).map(Tuple2::_2);
}

/**
 * Deduplicate Hoodie records, using the given deduplication funciton.
 */
JavaRDD<HoodieRecord<T>> deduplicateRecords(JavaRDD<HoodieRecord<T>> records,
  int parallelism) {
 boolean isIndexingGlobal = index.isGlobal();
 return records
   .mapToPair(record -> {
    HoodieKey hoodieKey = record.getKey();
    // If index used is global, then records are expected to differ in their partitionPath
    Object key = isIndexingGlobal ? hoodieKey.getRecordKey() : hoodieKey;
    return new Tuple2<>(key, record);
   })
   .reduceByKey((rec1, rec2) -> {
    @SuppressWarnings("unchecked") T reducedData = (T) rec1.getData()
      .preCombine(rec2.getData());
    // we cannot allow the user to change the key or partitionPath, since that will affect
    // everything
    // so pick it from one of the records.
    return new HoodieRecord<T>(rec1.getKey(), reducedData);
   }, parallelism).map(recordTuple -> recordTuple._2());
}

final List<String> readKeys = new ArrayList<>(200);
final List<Boolean> emptyPayloads = new ArrayList<>();
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
scanner.forEach(s -> {
 try {
scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "101",
  10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
assertEquals("Stream collect should return all 200 records after rollback of delete", 200, readKeys.size());

t -> t._2()._2().getRecordKey(), Collectors.mapping(t -> t._2()._1().split("#")[0], Collectors.toList())));

assertEquals("", 200, scanner.getTotalLogRecords());
Set<String> readKeys = new HashSet<>(200);
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
assertEquals("Stream collect should return all 200 records", 200, readKeys.size());
copyOfRecords1.addAll(copyOfRecords2);

scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
assertEquals("Stream collect should return all 0 records", 0, readKeys.size());

assertEquals("We read 200 records from 2 write batches", 200, scanner.getTotalLogRecords());
Set<String> readKeys = new HashSet<>(200);
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
assertEquals("Stream collect should return all 200 records", 200, readKeys.size());
copyOfRecords1.addAll(copyOfRecords3);

@Test
public void testAvroLogRecordReaderWithInvalidRollback()
  throws IOException, URISyntaxException, InterruptedException {
 Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
 // Set a small threshold so that every block is a new version
 Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
   .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
   .overBaseCommit("100").withFs(fs).build();
 // Write 1
 List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
 Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
 header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
 header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
 HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records1, header);
 writer = writer.appendBlock(dataBlock);
 // Write invalid rollback for a failed write (possible for in-flight commits)
 header.put(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME, "101");
 header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
   String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
 HoodieCommandBlock commandBlock = new HoodieCommandBlock(header);
 writer = writer.appendBlock(commandBlock);
 List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
   "100").map(s -> s.getPath().toString()).collect(Collectors.toList());
 HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
   "100", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
 assertEquals("We still would read 100 records", 100, scanner.getTotalLogRecords());
 final List<String> readKeys = new ArrayList<>(100);
 scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
 assertEquals("Stream collect should return all 150 records", 100, readKeys.size());
}

if (record._1.getRecordKey().equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0")) {
 assertTrue(record._2.isPresent());
 Path path1 = new Path(record._2.get());
 assertEquals(FSUtils.getFileId(filename1), FSUtils.getFileId(path1.getName()));
} else if (record._1.getRecordKey().equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0")) {
 assertTrue(record._2.isPresent());
 Path path2 = new Path(record._2.get());
 assertEquals(FSUtils.getFileId(filename2), FSUtils.getFileId(path2.getName()));
} else if (record._1.getRecordKey().equals("3eb5b87c-1fej-4edd-87b4-6ec96dc405a0")) {
 assertTrue(!record._2.isPresent());
} else if (record._1.getRecordKey().equals("4eb5b87c-1fej-4edd-87b4-6ec96dc405a0")) {
 assertTrue(record._2.isPresent());
 Path path3 = new Path(record._2.get());

javaRDD = index.tagLocation(writeRecords, jsc, hoodieTable);
assertTrue(javaRDD.filter(record -> record.isCurrentLocationKnown()).collect().size() == 200);
assertTrue(javaRDD.map(record -> record.getKey().getRecordKey()).distinct().count() == 200);
assertTrue(javaRDD.filter(
  record -> (record.getCurrentLocation() != null && record.getCurrentLocation().getCommitTime()

Popular methods of HoodieKey

Popular in Java

Updating database using SQL prepared statement
findViewById (Activity)
onCreateOptionsMenu (Activity)
requestLocationUpdates (LocationManager)
EOFException (java.io)
Thrown when a program encounters the end of a file or stream during an input operation.
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
Charset (java.nio.charset)
A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode
SecureRandom (java.security)
This class generates cryptographically secure pseudo-random numbers. It is best to invoke SecureRand
TimeUnit (java.util.concurrent)
A TimeUnit represents time durations at a given unit of granularity and provides utility methods to
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Top plugins for Android Studio

How to use getRecordKeymethodin com.uber.hoodie.common.model.HoodieKey

Best Java code snippets using com.uber.hoodie.common.model.HoodieKey.getRecordKey (Showing top 20 results out of 315)

How to use
getRecordKey
method
in
com.uber.hoodie.common.model.HoodieKey