private void writeToBuffer(HoodieRecord<T> record) { // update the new location of the record, so we know where to find it next record.setNewLocation(new HoodieRecordLocation(commitTime, fileId)); Optional<IndexedRecord> indexedRecord = getIndexedRecord(record); if (indexedRecord.isPresent()) { recordList.add(indexedRecord.get()); } else { keysToDelete.add(record.getKey()); } numberOfRecords++; }
long addUpdates(HoodieRecordLocation location, long numUpdates) { updateLocationToCount.put(location.getFileId(), Pair.of(location.getCommitTime(), numUpdates)); return this.numUpdates += numUpdates; }
HoodieRecord updatedRecord1 = new HoodieRecord( new HoodieKey(updateRowChanges1.getRowKey(), updateRowChanges1.getPartitionPath()), updateRowChanges1); updatedRecord1.setCurrentLocation(new HoodieRecordLocation(null, FSUtils.getFileId(parquetFile.getName()))); table = new HoodieCopyOnWriteTable(config, jsc); Iterator<List<WriteStatus>> iter = table .handleUpdate(newCommitTime, updatedRecord1.getCurrentLocation().getFileId(), updatedRecords.iterator());
assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename0))); } else if (record.getRecordKey().equals("001")) { assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename2))); } else if (record.getRecordKey().equals("002")) { assertTrue(!record.isCurrentLocationKnown()); } else if (record.getRecordKey().equals("004")) { assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename3)));
/** * Ensure records have location field set * * @param taggedRecords Tagged Records * @param commitTime Commit Timestamp */ void checkTaggedRecords(List<HoodieRecord> taggedRecords, String commitTime) { for (HoodieRecord rec : taggedRecords) { assertTrue("Record " + rec + " found with no location.", rec.isCurrentLocationKnown()); assertEquals("All records should have commit time " + commitTime + ", since updates were made", rec.getCurrentLocation().getCommitTime(), commitTime); } }
assertTrue(javaRDD.map(record -> record.getKey().getRecordKey()).distinct().count() == 200); assertTrue(javaRDD.filter( record -> (record.getCurrentLocation() != null && record.getCurrentLocation().getCommitTime() .equals(newCommitTime))).distinct().count() == 200);
long addUpdates(HoodieRecordLocation location, long numUpdates) { updateLocationToCount.put(location.getFileId(), Pair.of(location.getCommitTime(), numUpdates)); return this.numUpdates += numUpdates; }
private void writeToBuffer(HoodieRecord<T> record) { // update the new location of the record, so we know where to find it next record.setNewLocation(new HoodieRecordLocation(commitTime, fileId)); Optional<IndexedRecord> indexedRecord = getIndexedRecord(record); if (indexedRecord.isPresent()) { recordList.add(indexedRecord.get()); } else { keysToDelete.add(record.getRecordKey()); } numberOfRecords++; }
assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename1))); } else if (record.getRecordKey().equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0")) { assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename2))); } else if (record.getRecordKey().equals("3eb5b87c-1fej-4edd-87b4-6ec96dc405a0")) { assertTrue(!record.isCurrentLocationKnown()); } else if (record.getRecordKey().equals("4eb5b87c-1fej-4edd-87b4-6ec96dc405a0")) { assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename3)));
Bytes.toBytes(loc.get().getCommitTime())); put.addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN, Bytes.toBytes(loc.get().getFileId())); put.addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN, Bytes.toBytes(rec.getPartitionPath()));
@Override public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD, JavaSparkContext jsc, HoodieTable<T> hoodieTable) throws HoodieIndexException { return recordRDD.map(record -> { String bucket = getBucket(record.getRecordKey()); //HACK(vc) a non-existent commit is provided here. record.setCurrentLocation(new HoodieRecordLocation("000", bucket)); return record; }); }
assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename))); } else if (record.getRecordKey().equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0")) { assertFalse(record.isCurrentLocationKnown());
Bytes.toBytes(loc.get().getCommitTime())); put.addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN, Bytes.toBytes(loc.get().getFileId())); put.addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN, Bytes.toBytes(rec.getPartitionPath()));
@Override public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD, JavaSparkContext jsc, HoodieTable<T> hoodieTable) throws HoodieIndexException { return recordRDD.map(record -> { String bucket = getBucket(record.getRecordKey()); //HACK(vc) a non-existent commit is provided here. record.setCurrentLocation(new HoodieRecordLocation("000", bucket)); return record; }); }
public static void writeRecordsToLogFiles(FileSystem fs, String basePath, Schema schema, List<HoodieRecord> updatedRecords) { Map<HoodieRecordLocation, List<HoodieRecord>> groupedUpdated = updatedRecords.stream().collect( Collectors.groupingBy(HoodieRecord::getCurrentLocation)); groupedUpdated.entrySet().forEach(s -> { HoodieRecordLocation location = s.getKey(); String partitionPath = s.getValue().get(0).getPartitionPath(); Writer logWriter; try { logWriter = HoodieLogFormat.newWriterBuilder().onParentPath(new Path(basePath, partitionPath)) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId(location.getFileId()) .overBaseCommit(location.getCommitTime()).withFs(fs).build(); Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, location.getCommitTime()); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString()); logWriter.appendBlock(new HoodieAvroDataBlock(s.getValue().stream().map(r -> { try { GenericRecord val = (GenericRecord) r.getData().getInsertValue(schema).get(); HoodieAvroUtils.addHoodieKeyToRecord(val, r.getRecordKey(), r.getPartitionPath(), ""); return (IndexedRecord) val; } catch (IOException e) { return null; } }).collect(Collectors.toList()), header)); logWriter.close(); } catch (Exception e) { fail(e.toString()); } }); }
new HoodieKey(currentRecord.getRecordKey(), partitionPath), currentRecord.getData()); currentRecord.setCurrentLocation(new HoodieRecordLocation(commitTs, fileId)); taggedRecords.add(currentRecord);
assert onDiskHoodieRecord.getKey().equals(records.get(dkey).getKey()); assert records.get(ikey).getCurrentLocation().getFileId().equals(SpillableMapTestUtils.DUMMY_FILE_ID); assert records.get(ikey).getCurrentLocation().getCommitTime().equals(SpillableMapTestUtils.DUMMY_COMMIT_TIME);
new HoodieKey(currentRecord.getRecordKey(), partitionPath), currentRecord.getData()); currentRecord.setCurrentLocation(new HoodieRecordLocation(commitTs, fileId)); taggedRecords.add(currentRecord);
/** * Perform the actual writing of the given record into the backing file. */ public void write(HoodieRecord record, Optional<IndexedRecord> avroRecord) { Optional recordMetadata = record.getData().getMetadata(); try { if (avroRecord.isPresent()) { storageWriter.writeAvroWithMetadata(avroRecord.get(), record); // update the new location of record, so we know where to find it next record.setNewLocation(new HoodieRecordLocation(commitTime, writeStatus.getFileId())); recordsWritten++; insertRecordsWritten++; } else { recordsDeleted++; } writeStatus.markSuccess(record, recordMetadata); // deflate record payload after recording success. This will help users access payload as a // part of marking // record successful. record.deflate(); } catch (Throwable t) { // Not throwing exception from here, since we don't want to fail the entire job // for a single record writeStatus.markFailure(record, t, recordMetadata); logger.error("Error writing record " + record, t); } }