/** * Obtain all the commits, compactions that have occurred on the timeline, whose instant times * could be fed into the datasource options. */ public static HoodieTimeline allCompletedCommitsCompactions(FileSystem fs, String basePath) { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath, true); if (metaClient.getTableType().equals(HoodieTableType.MERGE_ON_READ)) { return metaClient.getActiveTimeline().getTimelineOfActions( Sets.newHashSet(HoodieActiveTimeline.COMMIT_ACTION, HoodieActiveTimeline.DELTA_COMMIT_ACTION)); } else { return metaClient.getCommitTimeline().filterCompletedInstants(); } } }
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(HoodieCLI.tableMetadata.getHadoopConf(), HoodieCLI.tableMetadata.getBasePath(), true); FileSystem fs = HoodieCLI.fs; String globPath = String.format("%s/%s/*", HoodieCLI.tableMetadata.getBasePath(), globRegex); FileStatus[] statuses = fs.globStatus(new Path(globPath)); Stream<HoodieInstant> instantsStream = null; timeline = metaClient.getActiveTimeline().getCommitTimeline(); } else if (excludeCompaction) { timeline = metaClient.getActiveTimeline().getCommitsTimeline(); } else { timeline = metaClient.getActiveTimeline().getCommitsAndCompactionTimeline(); (Function<HoodieInstant, Optional<byte[]>> & Serializable) metaClient.getActiveTimeline()::getInstantDetails); return new HoodieTableFileSystemView(metaClient, filteredTimeline, statuses);
@CliCommand(value = "commits refresh", help = "Refresh the commits") public String refreshCommits() throws IOException { HoodieTableMetaClient metadata = new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath()); HoodieCLI.setTableMetadata(metadata); return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed."; }
protected HoodieActiveTimeline(HoodieTableMetaClient metaClient, Set<String> includedExtensions) { // Filter all the filter in the metapath and include only the extensions passed and // convert them into HoodieInstant try { this.instants = HoodieTableMetaClient.scanHoodieInstantsFromFileSystem(metaClient.getFs(), new Path(metaClient.getMetaPath()), includedExtensions); log.info("Loaded instants " + instants); } catch (IOException e) { throw new HoodieIOException("Failed to scan metadata", e); } this.metaClient = metaClient; // multiple casts will make this lambda serializable - // http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16 this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails; }
/** * Get the compacted commit timeline visible for this table */ public HoodieTimeline getCommitTimeline() { switch (this.getTableType()) { case COPY_ON_WRITE: case MERGE_ON_READ: // We need to include the parquet files written out in delta commits in tagging return getActiveTimeline().getCommitTimeline(); default: throw new HoodieException("Unsupported table type :" + this.getTableType()); } }
public AbstractHoodieLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths, Schema readerSchema, String latestInstantTime, boolean readBlocksLazily, boolean reverseReader, int bufferSize) { this.readerSchema = readerSchema; this.latestInstantTime = latestInstantTime; this.hoodieTableMetaClient = new HoodieTableMetaClient(fs.getConf(), basePath); // load class from the payload fully qualified class name this.payloadClassFQN = this.hoodieTableMetaClient.getTableConfig().getPayloadClass(); this.totalLogFiles.addAndGet(logFilePaths.size()); this.logFilePaths = logFilePaths; this.readBlocksLazily = readBlocksLazily; this.reverseReader = reverseReader; this.fs = fs; this.bufferSize = bufferSize; }
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); List<Pair<HoodieLogFile, HoodieLogFile>> renameActions = getRenamingActionsForUnschedulingCompactionPlan(metaClient, compactionInstant, parallelism, .build(); HoodieInstant inflight = new HoodieInstant(State.INFLIGHT, COMPACTION_ACTION, compactionInstant); Path inflightPath = new Path(metaClient.getMetaPath(), inflight.getFileName()); if (metaClient.getFs().exists(inflightPath)) { metaClient.getActiveTimeline().saveToCompactionRequested( new HoodieInstant(State.REQUESTED, COMPACTION_ACTION, compactionInstant), AvroUtils.serializeCompactionPlan(newPlan));
@CliCommand(value = "savepoints show", help = "Show the savepoints") public String showSavepoints() throws IOException { HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline(); HoodieTimeline timeline = activeTimeline.getSavePointTimeline().filterCompletedInstants(); List<HoodieInstant> commits = timeline.getInstants().collect(Collectors.toList()); String[][] rows = new String[commits.size()][]; Collections.reverse(commits); for (int i = 0; i < commits.size(); i++) { HoodieInstant commit = commits.get(i); rows[i] = new String[] {commit.getTimestamp()}; } return HoodiePrintHelper.print(new String[] {"SavepointTime"}, rows); }
HoodieHiveClient(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) { this.syncConfig = cfg; this.fs = fs; this.metaClient = new HoodieTableMetaClient(fs.getConf(), cfg.basePath, true); this.tableType = metaClient.getTableType(); LOG.info("Creating hive connection " + cfg.jdbcUrl); createHiveConnection(); try { this.client = new HiveMetaStoreClient(configuration); } catch (MetaException e) { throw new HoodieHiveSyncException("Failed to create HiveMetaStoreClient", e); } try { this.partitionValueExtractor = (PartitionValueExtractor) Class.forName( cfg.partitionValueExtractorClass).newInstance(); } catch (Exception e) { throw new HoodieHiveSyncException( "Failed to initialize PartitionValueExtractor class " + cfg.partitionValueExtractorClass, e); } activeTimeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(); }
private void refreshFsView(FileStatus[] statuses) { metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true); if (statuses != null) { fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline().getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants(), statuses); } else { fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline().getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants()); } roView = (TableFileSystemView.ReadOptimizedView) fsView; rtView = (TableFileSystemView.RealtimeView) fsView; }
private void moveCompactionFromRequestedToInflight(String compactionInstantTime, HoodieWriteClient client, HoodieWriteConfig cfg) throws IOException { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); HoodieInstant compactionInstant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime); HoodieCompactionPlan workload = AvroUtils.deserializeCompactionPlan( metaClient.getActiveTimeline().getInstantAuxiliaryDetails(compactionInstant).get()); metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(compactionInstant); HoodieInstant instant = metaClient.getActiveTimeline().reload().filterPendingCompactionTimeline().getInstants() .filter(in -> in.getTimestamp().equals(compactionInstantTime)).findAny().get(); assertTrue("Instant must be marked inflight", instant.isInflight()); }
/** * Describes table properties */ @CliCommand(value = "desc", help = "Describle Hoodie Table properties") public String descTable() { TableHeader header = new TableHeader() .addTableHeaderField("Property") .addTableHeaderField("Value"); List<Comparable[]> rows = new ArrayList<>(); rows.add(new Comparable[]{"basePath", HoodieCLI.tableMetadata.getBasePath()}); rows.add(new Comparable[]{"metaPath", HoodieCLI.tableMetadata.getMetaPath()}); rows.add(new Comparable[]{"fileSystem", HoodieCLI.tableMetadata.getFs().getScheme()}); HoodieCLI.tableMetadata.getTableConfig().getProps().entrySet().forEach(e -> { rows.add(new Comparable[]{e.getKey(), e.getValue()}); }); return HoodiePrintHelper.print(header, new HashMap<>(), "", false, -1, false, rows); } }
/** * Enssure compaction plan is valid * @param compactionInstant Compaction Instant * @throws Exception */ private void ensureValidCompactionPlan(String compactionInstant) throws Exception { metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true); // Ensure compaction-plan is good to begin with List<ValidationOpResult> validationResults = client.validateCompactionPlan(metaClient, compactionInstant, 1); Assert.assertFalse("Some validations failed", validationResults.stream().filter(v -> !v.isSuccess()).findAny().isPresent()); }
/** * Return all pending compactions with instant time for clients to decide what to compact next. * @return */ public List<Pair<String, HoodieCompactionPlan>> getPendingCompactions() { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), hoodieTable.getMetaClient().getBasePath(), true); return CompactionUtils.getAllPendingCompactionPlans(metaClient).stream() .map(instantWorkloadPair -> Pair.of(instantWorkloadPair.getKey().getTimestamp(), instantWorkloadPair.getValue())) .collect(Collectors.toList()); } }
@CliCommand(value = "compaction schedule", help = "Schedule Compaction") public String scheduleCompact( @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "1G", help = "Spark executor memory") final String sparkMemory) throws Exception { boolean initialized = HoodieCLI.initConf(); HoodieCLI.initFS(initialized); // First get a compaction instant time and pass it to spark launcher for scheduling compaction String compactionInstantTime = HoodieActiveTimeline.createNewCommitTime(); if (HoodieCLI.tableMetadata.getTableType() == HoodieTableType.MERGE_ON_READ) { String sparkPropertiesPath = Utils.getDefaultPropertiesFile( scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties())); SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath); sparkLauncher.addAppArgs(SparkCommand.COMPACT_SCHEDULE.toString(), HoodieCLI.tableMetadata.getBasePath(), HoodieCLI.tableMetadata.getTableConfig().getTableName(), compactionInstantTime, sparkMemory); Process process = sparkLauncher.launch(); InputStreamConsumer.captureOutput(process); int exitCode = process.waitFor(); if (exitCode != 0) { return "Failed to run compaction for " + compactionInstantTime; } return "Compaction successfully completed for " + compactionInstantTime; } else { throw new Exception("Compactions can only be run for table type : MERGE_ON_READ"); } }
client.getRenamingActionsForUnschedulingCompactionPlan(metaClient, compactionInstant, 1, Optional.absent(), false); metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true); new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline()); Set<HoodieLogFile> expLogFilesToBeRenamed = fsView.getLatestFileSlices(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]) .filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)) metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true); final HoodieTableFileSystemView newFsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), metaClient.getBasePath(), true); Map<String, Pair<String, HoodieCompactionOperation>> pendingCompactionMap = CompactionUtils.getAllPendingCompactionOperations(metaClient);
/** * Commit changes performed at the given commitTime marker */ public boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses, Optional<Map<String, String>> extraMetadata) { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true); return commit(commitTime, writeStatuses, extraMetadata, metaClient.getCommitActionType()); }
new HoodieTableMetaClient(HoodieCLI.conf, path); existing = true; } catch (DatasetNotFoundException dfe) { HoodieTableMetaClient.initTableType(HoodieCLI.conf, path, tableType, name, payloadClass);
private void deleteInstantFile(HoodieInstant instant) { log.info("Deleting instant " + instant); Path inFlightCommitFilePath = new Path(metaClient.getMetaPath(), instant.getFileName()); try { boolean result = metaClient.getFs().delete(inFlightCommitFilePath, false); if (result) { log.info("Removed in-flight " + instant); } else { throw new HoodieIOException("Could not delete in-flight instant " + instant); } } catch (IOException e) { throw new HoodieIOException( "Could not remove inflight commit " + inFlightCommitFilePath, e); } }