Refine search
final List<DataSegment> finalSegmentsToPublish = Lists.newArrayList(); for (DataSegment segment : segments) { List<TimelineObjectHolder<String, DataSegment>> existingChunks = timeline.lookup(segment.getInterval()); if (existingChunks.size() > 1) { + "Not possible to append new segment.", dataSource, segment.getInterval(), existingChunks.size())); for (PartitionChunk<DataSegment> existing : existingHolder.getObject()) { if (max == null || max.getShardSpec().getPartitionNum() < existing.getObject() .getShardSpec() .getPartitionNum()) { max = SegmentIdentifier.fromDataSegment(existing.getObject()); newShardSpec = segment.getShardSpec(); newVersion = segment.getVersion(); } else { dataSegmentPusher); finalSegmentsToPublish.add(publishedSegment); timeline.add(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().createChunk(publishedSegment)); batch.add(new ImmutableMap.Builder<String, Object>().put("id", segment.getIdentifier()) .put("dataSource", segment.getDataSource()) .put("created_date", new DateTime().toString())
existingSegments = Collections.singletonList(createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(1))); dataSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v1", new LinearShardSpec(0)); DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment, descriptorPath); druidStorageHandler.commitInsertTable(tableMock, false); Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)) .toArray()); Assert.assertEquals("v0", persistedSegment.getVersion()); Assert.assertTrue(persistedSegment.getShardSpec() instanceof LinearShardSpec); Assert.assertEquals(2, persistedSegment.getShardSpec().getPartitionNum()); dataSegmentPusher.makeIndexPathName(persistedSegment, DruidStorageHandlerUtils.INDEX_ZIP)); Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", expectedFinalHadoopPath.toString()), persistedSegment.getLoadSpec()); Assert.assertEquals("dummySegmentData", FileUtils.readFileToString(new File(expectedFinalHadoopPath.toUri())));
private static VersionedIntervalTimeline<String, DataSegment> getTimelineForIntervalWithHandle(final Handle handle, final String dataSource, final Interval interval, final MetadataStorageTablesConfig dbTables) throws IOException { Query<Map<String, Object>> sql = handle.createQuery(String.format( "SELECT payload FROM %s WHERE used = true AND dataSource = ? AND start <= ? AND \"end\" >= ?", dbTables.getSegmentsTable())) .bind(0, dataSource) .bind(1, interval.getEnd().toString()) .bind(2, interval.getStart().toString()); final VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.natural()); try (ResultIterator<byte[]> dbSegments = sql.map(ByteArrayMapper.FIRST).iterator()) { while (dbSegments.hasNext()) { final byte[] payload = dbSegments.next(); DataSegment segment = JSON_MAPPER.readValue(payload, DataSegment.class); timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment)); } } return timeline; }
dataSegment.getDataSource(), dataSegment.getIdentifier())); } catch (MalformedURLException e) { Throwables.propagate(e);
fileToUpload = Iterables.getOnlyElement(spilled); } else { List<QueryableIndex> indexes = Lists.newArrayList(); for (final File oneSpill : spilled) { indexes.add(IndexIO.loadIndex(oneSpill)); .withDimensions(ImmutableList.copyOf(mappedSegment.getAvailableDimensions())) .withBinaryVersion(SegmentUtils.getVersionFromDir(fileToUpload)); segmentToUpload.getIdentifier() );
List<DruidDataSource> validDataSources = Lists.newArrayList(); for (DruidDataSource dataSource : dataSources) { if (dataSource != null) { Iterable<DataSegment> segments = dataSource.getSegments(); for (DataSegment segment : segments) { segmentMap.put(segment.getIdentifier(), segment); ImmutableMap.<String, String>of() ).addSegments(segmentMap);
DataSegment.builder() .dataSource(this.getDataSource()) .interval(getInterval()) .build(); final File outDir = new File(toolbox.getTaskWorkDir(), segment.getIdentifier()); final File fileToUpload = IndexMerger.merge(Lists.newArrayList(emptyAdapter), new AggregatorFactory[0], outDir); segment.getDataSource(), segment.getInterval(), segment.getVersion() ); toolbox.pushSegments(ImmutableList.of(uploadedSegment));
final List<Pair<DateTime, byte[]>> cachedResults = Lists.newArrayList(); final Map<String, CachePopulator> cachePopulatorMap = Maps.newHashMap(); List<TimelineObjectHolder<String, ServerSelector>> serversLookup = Lists.newLinkedList(); for (Pair<ServerSelector, SegmentDescriptor> segment : segments) { final Cache.NamedKey segmentCacheKey = CacheUtil.computeSegmentCacheKey( segment.lhs.getSegment().getIdentifier(), segment.rhs, queryCacheKey cachedValues = cache.getBulk(cacheKeys.values()); } else { cachedValues = ImmutableMap.of(); cachedResults.add(Pair.of(segmentQueryInterval.getStart(), cachedValue)); } else if (populateCache) { final String segmentIdentifier = segment.lhs.getSegment().getIdentifier(); cachePopulatorMap.put( String.format("%s_%s", segmentIdentifier, segmentQueryInterval), descriptors = Lists.newArrayList(); serverSegments.put(server, descriptors);
private static DataSegment computeMergedSegment( final String dataSource, final String version, final List<DataSegment> segments ) { final Interval mergedInterval = computeMergedInterval(segments); final Set<String> mergedDimensions = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER); final Set<String> mergedMetrics = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER); for (DataSegment segment : segments) { mergedDimensions.addAll(segment.getDimensions()); mergedMetrics.addAll(segment.getMetrics()); } return DataSegment.builder() .dataSource(dataSource) .interval(mergedInterval) .version(version) .binaryVersion(IndexIO.CURRENT_VERSION_ID) .shardSpec(new NoneShardSpec()) .dimensions(Lists.newArrayList(mergedDimensions)) .metrics(Lists.newArrayList(mergedMetrics)) .build(); } }
"Starting merge of id[%s], segments: %s", getId(), Lists.transform( segments, new Function<DataSegment, String>() mergedSegment.getDataSource(), segments.size(), System.currentTimeMillis() - startTime emitter.emit(builder.build("merger/mergeSize", uploadedSegment.getSize())); toolbox.pushSegments(ImmutableList.of(uploadedSegment)); log.makeAlert(e, "Exception merging[%s]", mergedSegment.getDataSource()) .addData("interval", mergedSegment.getInterval()) .emit();
private List<Pair<Interval, byte[]>> pruneSegmentsWithCachedResults( final byte[] queryCacheKey, final Set<ServerToSegment> segments ) { if (queryCacheKey == null) { return Collections.emptyList(); } final List<Pair<Interval, byte[]>> alreadyCachedResults = Lists.newArrayList(); Map<ServerToSegment, Cache.NamedKey> perSegmentCacheKeys = computePerSegmentCacheKeys(segments, queryCacheKey); // Pull cached segments from cache and remove from set of segments to query final Map<Cache.NamedKey, byte[]> cachedValues = computeCachedValues(perSegmentCacheKeys); perSegmentCacheKeys.forEach((segment, segmentCacheKey) -> { final Interval segmentQueryInterval = segment.getSegmentDescriptor().getInterval(); final byte[] cachedValue = cachedValues.get(segmentCacheKey); if (cachedValue != null) { // remove cached segment from set of segments to query segments.remove(segment); alreadyCachedResults.add(Pair.of(segmentQueryInterval, cachedValue)); } else if (populateCache) { // otherwise, if populating cache, add segment to list of segments to cache final String segmentIdentifier = segment.getServer().getSegment().getIdentifier(); addCachePopulator(segmentCacheKey, segmentIdentifier, segmentQueryInterval); } }); return alreadyCachedResults; }
public static List<LocatedSegmentDescriptor> getTargetLocations( TimelineServerView serverView, DataSource datasource, List<Interval> intervals, int numCandidates ) { TimelineLookup<String, ServerSelector> timeline = serverView.getTimeline(datasource); if (timeline == null) { return Collections.emptyList(); } List<LocatedSegmentDescriptor> located = Lists.newArrayList(); for (Interval interval : intervals) { for (TimelineObjectHolder<String, ServerSelector> holder : timeline.lookup(interval)) { for (PartitionChunk<ServerSelector> chunk : holder.getObject()) { ServerSelector selector = chunk.getObject(); final SegmentDescriptor descriptor = new SegmentDescriptor( holder.getInterval(), holder.getVersion(), chunk.getChunkNumber() ); long size = selector.getSegment().getSize(); List<DruidServerMetadata> candidates = selector.getCandidates(numCandidates); located.add(new LocatedSegmentDescriptor(descriptor, size, candidates)); } } } return located; } }
Map<String, Map<String, Object>> retVal = ImmutableMap.of( "tiers", tiers, "segments", segments dataSourceSegmentSize += dataSegment.getSize(); if (dataSegment.getInterval().getStartMillis() < minTime) { minTime = dataSegment.getInterval().getStartMillis(); if (dataSegment.getInterval().getEndMillis() > maxTime) { maxTime = dataSegment.getInterval().getEndMillis(); segments.put("minTime", new DateTime(minTime)); segments.put("maxTime", new DateTime(maxTime));
final Map<Interval, Map<String, Object>> retVal = Maps.newTreeMap(comparator); for (DataSegment dataSegment : dataSource.getSegments()) { if (theInterval.contains(dataSegment.getInterval())) { Map<String, Object> segments = retVal.get(dataSegment.getInterval()); if (segments == null) { segments = Maps.newHashMap(); retVal.put(dataSegment.getInterval(), segments); Pair<DataSegment, Set<String>> val = getSegment(dataSegment.getIdentifier()); segments.put(dataSegment.getIdentifier(), ImmutableMap.of("metadata", val.lhs, "servers", val.rhs)); final Map<Interval, Map<String, Object>> retVal = Maps.newHashMap(); for (DataSegment dataSegment : dataSource.getSegments()) { if (theInterval.contains(dataSegment.getInterval())) { Map<String, Object> properties = retVal.get(dataSegment.getInterval()); if (properties == null) { properties = Maps.newHashMap(); properties.put("size", dataSegment.getSize()); properties.put("count", 1); retVal.put(dataSegment.getInterval(), properties); } else { properties.put("size", MapUtils.getLong(properties, "size", 0L) + dataSegment.getSize()); properties.put("count", MapUtils.getInt(properties, "count", 0) + 1); if (theInterval.contains(dataSegment.getInterval())) { retVal.add(dataSegment.getIdentifier());
public DataSegment uploadDataSegment( DataSegment segment, final int version, final File compressedSegmentData, final File descriptorFile, final Map<String, String> azurePaths ) throws StorageException, IOException, URISyntaxException { azureStorage.uploadBlob(compressedSegmentData, config.getContainer(), azurePaths.get("index")); azureStorage.uploadBlob(descriptorFile, config.getContainer(), azurePaths.get("descriptor")); final DataSegment outSegment = segment .withSize(compressedSegmentData.length()) .withLoadSpec( ImmutableMap.<String, Object>of( "type", AzureStorageDruidModule.SCHEME, "containerName", config.getContainer(), "blobPath", azurePaths.get("index") ) ) .withBinaryVersion(version); log.info("Deleting file [%s]", compressedSegmentData); compressedSegmentData.delete(); log.info("Deleting file [%s]", descriptorFile); descriptorFile.delete(); return outSegment; }
final QueryableIndex index = pair.lhs; if (index.getMetadata() == null) { throw new RE("Index metadata doesn't exist for segment[%s]", pair.rhs.getIdentifier()); new NoneGranularity(), rollup, ImmutableList.of(interval) );
handle, dataSource, ImmutableList.of(interval) ).lookup(interval); for (PartitionChunk<DataSegment> existing : existingHolder.getObject()) { if (max == null || max.getShardSpec().getPartitionNum() < existing.getObject() .getShardSpec() .getPartitionNum()) { max = SegmentIdentifier.fromDataSegment(existing.getObject()); new NumberedShardSpec(0, 0) ); } else if (!max.getInterval().equals(interval) || max.getVersion().compareTo(maxVersion) > 0) { log.warn( "Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: conflicting segment[%s].",
return ImmutableMap.<String, Object>of( KEY_DIMENSIONS, getDatasourceDimensions(dataSourceName, interval), KEY_METRICS, getDatasourceMetrics(dataSourceName, interval) if (interval == null || interval.isEmpty()) { DateTime now = getCurrentTime(); theInterval = new Interval(segmentMetadataQueryConfig.getDefaultHistory(), now); } else { theInterval = Intervals.of(interval); for (ServerSelector server : partitionHolder.payloads()) { final DataSegment segment = server.getSegment(); dimensions.addAll(segment.getDimensions()); metrics.addAll(segment.getMetrics()); ImmutableMap.of(KEY_DIMENSIONS, dimensions, KEY_METRICS, metrics) ); for (Map.Entry<Interval, Object> e : servedIntervals.entrySet()) { Interval ival = e.getKey(); if (curr != null && curr.abuts(ival) && cols.equals(e.getValue())) { curr = curr.withEnd(ival.getEnd()); } else { if (curr != null) {
Map<String, Map<String, Object>> retVal = ImmutableMap.of( "tiers", tiers, "segments", segments for (DataSegment dataSegment : druidDataSource.getSegments()) { if (!tierDistinctSegments.get(tier).contains(dataSegment.getIdentifier())) { dataSourceSegmentSize += dataSegment.getSize(); tierDistinctSegments.get(tier).add(dataSegment.getIdentifier()); if (!totalDistinctSegments.contains(dataSegment.getIdentifier())) { totalSegmentSize += dataSegment.getSize(); totalDistinctSegments.add(dataSegment.getIdentifier()); minTime = DateTimes.min(minTime, dataSegment.getInterval().getStart()); maxTime = DateTimes.max(maxTime, dataSegment.getInterval().getEnd());
public DruidServer addDataSegment(DataSegment segment) { synchronized (lock) { final String segmentId = segment.getIdentifier(); DataSegment shouldNotExist = segments.get(segmentId); if (shouldNotExist != null) { log.warn("Asked to add data segment that already exists!? server[%s], segment[%s]", getName(), segmentId); return this; } String dataSourceName = segment.getDataSource(); DruidDataSource dataSource = dataSources.get(dataSourceName); if (dataSource == null) { dataSource = new DruidDataSource( dataSourceName, ImmutableMap.of("client", "side") ); dataSources.put(dataSourceName, dataSource); } dataSource.addSegment(segment); segments.put(segmentId, segment); currSize += segment.getSize(); } return this; }