TimelineObjectHolder<String, DataSegment> existingHolder = Iterables.getOnlyElement(existingChunks); for (PartitionChunk<DataSegment> existing : existingHolder.getObject()) { if (max == null || max.getShardSpec().getPartitionNum() < existing.getObject() .getShardSpec() .getPartitionNum()) { max = SegmentIdentifier.fromDataSegment(existing.getObject()); pending.getVersion().compareTo(max.getVersion()) > 0 || (pending.getVersion().equals(max.getVersion()) && pending.getShardSpec().getPartitionNum() > max.getShardSpec().getPartitionNum())) { max = pending; return new SegmentIdentifier( dataSource, interval, new NumberedShardSpec(0, 0) ); } else if (!max.getInterval().equals(interval) || max.getVersion().compareTo(maxVersion) > 0) { log.warn( "Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: conflicting segment[%s].", interval, maxVersion, max.getIdentifierAsString() ); return null; } else if (max.getShardSpec() instanceof LinearShardSpec) { return new SegmentIdentifier(
private void addSegment(final SegmentIdentifier identifier) { segments.put(identifier.getInterval().getStartMillis(), identifier); try { segmentAnnouncer.announceSegment( new DataSegment( identifier.getDataSource(), identifier.getInterval(), identifier.getVersion(), ImmutableMap.of(), ImmutableList.of(), ImmutableList.of(), identifier.getShardSpec(), null, 0 ) ); } catch (IOException e) { log.makeAlert(e, "Failed to announce new segment[%s]", identifier.getDataSource()) .addData("interval", identifier.getInterval()) .emit(); } }
.stream() .map(segmentIdentifier -> { activeSegmentsAlreadySeen.add(segmentIdentifier.toString()); return SegmentWithState.newSegment(segmentIdentifier); }) .stream() .filter(segmentIdentifier -> !activeSegmentsAlreadySeen.contains( segmentIdentifier.toString())) .map(segmentIdentifier -> SegmentWithState.newSegment( segmentIdentifier,
void add(SegmentIdentifier identifier) { intervalToSegmentStates.computeIfAbsent( identifier.getInterval().getStartMillis(), k -> new SegmentsOfInterval(identifier.getInterval()) ).setAppendingSegment(SegmentWithState.newSegment(identifier)); lastSegmentId = identifier.getIdentifierAsString(); }
if (!identifiersByDataSource.containsKey(identifier.getDataSource())) { identifiersByDataSource.put(identifier.getDataSource(), Sets.newHashSet()); identifiersByDataSource.get(identifier.getDataSource()).add(identifier); if (identifiers.contains(SegmentIdentifier.fromDataSegment(segment))) { retVal.add(segment);
); final int committedHydrants = committed.getCommittedHydrants(identifier.getIdentifierAsString()); new FireHydrant( new QueryableIndexSegment( identifier.getIdentifierAsString(), indexIO.loadIndex(hydrantDir) ), identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, currSink.getInterval(), currSink.getVersion(), identifier.getShardSpec().createChunk(currSink) ); Iterables.transform( sinks.keySet(), input -> input.getIdentifierAsString()
private Sink getOrCreateSink(final SegmentIdentifier identifier) { Sink retVal = sinks.get(identifier); if (retVal == null) { retVal = new Sink( identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, tuningConfig.isReportParseExceptions(), null ); try { segmentAnnouncer.announceSegment(retVal.getSegment()); } catch (IOException e) { log.makeAlert(e, "Failed to announce new segment[%s]", schema.getDataSource()) .addData("interval", retVal.getInterval()) .emit(); } sinks.put(identifier, retVal); metrics.setSinkCount(sinks.size()); sinkTimeline.add(retVal.getInterval(), retVal.getVersion(), identifier.getShardSpec().createChunk(retVal)); } return retVal; }
@Override public Interval apply(SegmentIdentifier input) { return input.getInterval(); } })
public static SegmentIdentifier fromDataSegment(final DataSegment segment) { return new SegmentIdentifier( segment.getDataSource(), segment.getInterval(), segment.getVersion(), segment.getShardSpec() ); } }
/** * Move a set of identifiers out from "active", making way for newer segments. * This method is to support KafkaIndexTask's legacy mode and will be removed in the future. * See KakfaIndexTask.runLegacy(). */ public void moveSegmentOut(final String sequenceName, final List<SegmentIdentifier> identifiers) { synchronized (segments) { final SegmentsForSequence activeSegmentsForSequence = segments.get(sequenceName); if (activeSegmentsForSequence == null) { throw new ISE("WTF?! Asked to remove segments for sequenceName[%s] which doesn't exist...", sequenceName); } for (final SegmentIdentifier identifier : identifiers) { log.info("Moving segment[%s] out of active list.", identifier); final long key = identifier.getInterval().getStartMillis(); final SegmentsOfInterval segmentsOfInterval = activeSegmentsForSequence.get(key); if (segmentsOfInterval == null || segmentsOfInterval.getAppendingSegment() == null || !segmentsOfInterval.getAppendingSegment().getSegmentIdentifier().equals(identifier)) { throw new ISE("WTF?! Asked to remove segment[%s] that didn't exist...", identifier); } segmentsOfInterval.finishAppendingToCurrentActiveSegment(SegmentWithState::finishAppending); } } }
public static Committed create( Map<SegmentIdentifier, Integer> hydrants0, Object metadata ) { final ImmutableMap.Builder<String, Integer> hydrants = ImmutableMap.builder(); for (Map.Entry<SegmentIdentifier, Integer> entry : hydrants0.entrySet()) { hydrants.put(entry.getKey().getIdentifierAsString(), entry.getValue()); } return new Committed(hydrants.build(), metadata); }
/** * Drop segments in background. The segments should be pushed (in batch ingestion) or published (in streaming * ingestion) before being dropped. * * @param segmentsAndMetadata result of pushing or publishing * * @return a future for dropping segments */ ListenableFuture<SegmentsAndMetadata> dropInBackground(SegmentsAndMetadata segmentsAndMetadata) { log.info("Dropping segments[%s]", segmentsAndMetadata.getSegments()); final ListenableFuture<?> dropFuture = Futures.allAsList( segmentsAndMetadata .getSegments() .stream() .map(segment -> appenderator.drop(SegmentIdentifier.fromDataSegment(segment))) .collect(Collectors.toList()) ); return Futures.transform( dropFuture, (Function<Object, SegmentsAndMetadata>) x -> { final Object metadata = segmentsAndMetadata.getCommitMetadata(); return new SegmentsAndMetadata( segmentsAndMetadata.getSegments(), metadata == null ? null : ((AppenderatorDriverMetadata) metadata).getCallerMetadata() ); } ); }
final Committed oldCommit = readCommit(); if (oldCommit != null) { writeCommit(oldCommit.without(identifier.getIdentifierAsString())); .addData("identifier", identifier.getIdentifierAsString()) .emit(); throw Throwables.propagate(e); .addData("identifier", identifier.getIdentifierAsString()) .emit(); sink.getInterval(), sink.getVersion(), identifier.getShardSpec().createChunk(sink) ); for (FireHydrant hydrant : sink) {
if (!identifier.getDataSource().equals(schema.getDataSource())) { throw new IAE( "Expected dataSource[%s] but was asked to insert row for dataSource[%s]?!", schema.getDataSource(), identifier.getDataSource() );
if (identifier.equals(newSegment)) { throw new ISE( "WTF?! Allocated segment[%s] which conflicts with existing segment[%s].",
handoffNotifier.registerSegmentHandoffCallback( new SegmentDescriptor( segmentIdentifier.getInterval(), segmentIdentifier.getVersion(), segmentIdentifier.getShardSpec().getPartitionNum() ), MoreExecutors.sameThreadExecutor(),
); if (existingIdentifier.getInterval().getStartMillis() == interval.getStartMillis() && existingIdentifier.getInterval().getEndMillis() == interval.getEndMillis()) { if (previousSegmentId == null) { log.info( "Found existing pending segment [%s] for sequence[%s] in DB", existingIdentifier.getIdentifierAsString(), sequenceName ); log.info( "Found existing pending segment [%s] for sequence[%s] (previous = [%s]) in DB", existingIdentifier.getIdentifierAsString(), sequenceName, previousSegmentId "Cannot use existing pending segment [%s] for sequence[%s] in DB, " + "does not match requested interval[%s]", existingIdentifier.getIdentifierAsString(), sequenceName, interval "Cannot use existing pending segment [%s] for sequence[%s] (previous = [%s]) in DB, " + "does not match requested interval[%s]", existingIdentifier.getIdentifierAsString(), sequenceName, previousSegmentId,
SegmentsForSequence build() { final NavigableMap<Long, SegmentsOfInterval> map = new TreeMap<>(); for (Entry<SegmentIdentifier, Pair<SegmentWithState, List<SegmentWithState>>> entry : intervalToSegments.entrySet()) { map.put( entry.getKey().getInterval().getStartMillis(), new SegmentsOfInterval(entry.getKey().getInterval(), entry.getValue().lhs, entry.getValue().rhs) ); } return new SegmentsForSequence(map, lastSegmentId); } }
return new SegmentIdentifier( dataSource, interval,
private File computePersistDir(SegmentIdentifier identifier) { return new File(tuningConfig.getBasePersistDirectory(), identifier.getIdentifierAsString()); }