@Override public Return accept(PartitionDetail partitionDetail) { String sField = (String) partitionDetail.getPartitionKey().getField("s"); if (!allowedSField.equals(sField)) { return Return.SKIP; } int iField = (int) partitionDetail.getPartitionKey().getField("i"); if (stopOnI != null && stopOnI.equals(iField)) { return Return.STOP; } return Return.ACCEPT; } }
partitionsTable.delete(rowKey); if (!isExternal) { Location partitionLocation = partition.getLocation(); try { if (partitionLocation.exists()) { Location dstLocation = getQuarantineLocation().append(partition.getRelativePath()); Location dstParent = Locations.getParent(dstLocation); throw new DataSetException(String.format("Failed to move location %s into quarantine", partitionLocation), ioe); operationsInThisTx.add(new DropPartitionOperation(key, partition.getRelativePath()));
@Nullable @Override public TimePartitionDetail getPartitionByTime(long time) { PartitionDetail partitionDetail = getPartition(partitionKeyForTime(time)); return partitionDetail == null ? null : new BasicTimePartitionDetail(this, partitionDetail.getRelativePath(), partitionDetail.getPartitionKey(), partitionDetail.getMetadata()); }
Assert.assertNotNull("Output partition is null while for running without custom dataset arguments", partition); validateFileOutput(partition.getLocation()); validateFileOutput(customPartition.getLocation()); tpfs.dropPartition(partition.getPartitionKey()); tpfs.dropPartition(customPartition.getPartitionKey());
@Nullable public Location getLocation() throws IOException, InterruptedException { Location lock = lock(); try { PartitionDetail partitionDetail = getLatestPartition(); if (partitionDetail == null) { return null; } return partitionDetail.getLocation(); } finally { lock.delete(); } }
@Override public Map<String, String> getInputFormatConfiguration() { Collection<PartitionKey> inputKeys = getInputKeys(); List<Location> inputLocations = new ArrayList<>(inputKeys.size()); Map<String, PartitionKey> pathToKey = new HashMap<>(inputKeys.size()); for (PartitionKey key : inputKeys) { PartitionDetail partition = getPartition(key); String path = Objects.requireNonNull(partition).getRelativePath(); Location partitionLocation = files.getLocation(path); inputLocations.add(partitionLocation); pathToKey.put(partitionLocation.toURI().toString(), key); } Map<String, String> inputFormatConfiguration = files.getInputFormatConfiguration(inputLocations); inputFormatConfiguration.put(PATH_TO_PARTITIONING_MAPPING, GSON.toJson(pathToKey)); return inputFormatConfiguration; }
@Override public void apply() throws Exception { PartitionOutput partitionOutput = dataset.getPartitionOutput(PARTITION_KEY); long beforeTime = System.currentTimeMillis(); partitionOutput.addPartition(); long afterTime = System.currentTimeMillis(); PartitionDetail partitionDetail = dataset.getPartition(PARTITION_KEY); Assert.assertNotNull(partitionDetail); long creationTime = partitionDetail.getMetadata().getCreationTime(); long lastModificationTime = partitionDetail.getMetadata().lastModificationTime(); // lastModificationTime time should be equal to creationTime for a partition that has not been appended to Assert.assertEquals(creationTime, lastModificationTime); Assert.assertTrue(creationTime >= beforeTime && creationTime <= afterTime); } });
PartitionDetail partition = pfs.getPartition(outputKey); Assert.assertNotNull(partition); validateFileOutput(partition.getLocation()); pfs.dropPartition(partition.getPartitionKey());
@Override public void run(DatasetContext context) throws Exception { Map<String, Long> wordCounts = new HashMap<>(); for (PartitionDetail partition : partitions) { ByteBuffer content; Location location = partition.getLocation(); content = ByteBuffer.wrap(ByteStreams.toByteArray(location.getInputStream())); String string = Bytes.toString(Bytes.toBytes(content)); for (String token : string.split(" ")) { Long count = Objects.firstNonNull(wordCounts.get(token), 0L); wordCounts.put(token, count + 1); } } IncrementingKeyValueTable counts = context.getDataset("counts"); for (Map.Entry<String, Long> entry : wordCounts.entrySet()) { counts.write(Bytes.toBytes(entry.getKey()), entry.getValue()); } PartitionedFileSet outputLines = context.getDataset("outputLines"); PartitionKey partitionKey = PartitionKey.builder().addLongField("time", System.currentTimeMillis()).build(); PartitionOutput outputPartition = outputLines.getPartitionOutput(partitionKey); Location partitionDir = outputPartition.getLocation(); partitionDir.mkdirs(); Location outputLocation = partitionDir.append("file"); outputLocation.createNew(); try (OutputStream outputStream = outputLocation.getOutputStream()) { outputStream.write(Bytes.toBytes(Joiner.on("\n").join(wordCounts.values()))); } outputPartition.addPartition(); } });
@Override public Map<String, String> getInputFormatConfiguration() { Collection<PartitionKey> inputKeys = getInputKeys(); List<Location> inputLocations = new ArrayList<>(inputKeys.size()); Map<String, PartitionKey> pathToKey = new HashMap<>(inputKeys.size()); for (PartitionKey key : inputKeys) { PartitionDetail partition = getPartition(key); String path = Objects.requireNonNull(partition).getRelativePath(); Location partitionLocation = files.getLocation(path); inputLocations.add(partitionLocation); pathToKey.put(partitionLocation.toURI().toString(), key); } Map<String, String> inputFormatConfiguration = files.getInputFormatConfiguration(inputLocations); inputFormatConfiguration.put(PATH_TO_PARTITIONING_MAPPING, GSON.toJson(pathToKey)); return inputFormatConfiguration; }
combinedEntries.putAll(originalEntries); combinedEntries.putAll(updatedMetadata); Assert.assertEquals(combinedEntries, partitionDetail.getMetadata().asMap()); Assert.assertNotNull(partitionDetail); Assert.assertEquals(ImmutableMap.of("key1", "value1", "key2", "value2", "key3", "value4"), partitionDetail.getMetadata().asMap()); Assert.assertEquals(ImmutableMap.of("key1", "value1"), partitionDetail.getMetadata().asMap());
public void deleteMatchingPartitionsByTime(long upperLimit) throws IOException { if (upperLimit > 0 && upperLimit < Long.MAX_VALUE) { PartitionFilter filter = PartitionFilter.builder().addRangeCondition(SNAPSHOT_FIELD, null, upperLimit).build(); Set<PartitionDetail> partitions = files.getPartitions(filter); for (PartitionDetail partition : partitions) { files.dropPartition(partition.getPartitionKey()); } } }
@Nullable @Override public TimePartitionDetail getPartitionByTime(long time) { PartitionDetail partitionDetail = getPartition(partitionKeyForTime(time)); return partitionDetail == null ? null : new BasicTimePartitionDetail(this, partitionDetail.getRelativePath(), partitionDetail.getPartitionKey(), partitionDetail.getMetadata()); }
partitionsTable.delete(rowKey); if (!isExternal) { Location partitionLocation = partition.getLocation(); try { if (partitionLocation.exists()) { Location dstLocation = getQuarantineLocation().append(partition.getRelativePath()); Location dstParent = Locations.getParent(dstLocation); throw new DataSetException(String.format("Failed to move location %s into quarantine", partitionLocation), ioe); operationsInThisTx.add(new DropPartitionOperation(key, partition.getRelativePath()));
@Override public void apply() throws Exception { Assert.assertTrue(pfsBaseLocation.exists()); // attempt to write a new partition - should fail try { pfs.getPartitionOutput(PARTITION_KEY); Assert.fail("External partitioned file set should not allow writing files"); } catch (UnsupportedOperationException e) { // expected } // create an external file and add it as a partition File someFile = new File(absolutePath, "some.file"); OutputStream out = new FileOutputStream(someFile); out.close(); Assert.assertTrue(someFile.exists()); pfs.addPartition(PARTITION_KEY, "some.file"); Assert.assertNotNull(pfs.getPartition(PARTITION_KEY)); Assert.assertTrue(pfs.getPartition(PARTITION_KEY).getLocation().exists()); // now drop the partition and validate the file is still there pfs.dropPartition(PARTITION_KEY); Assert.assertNull(pfs.getPartition(PARTITION_KEY)); Assert.assertTrue(someFile.exists()); } });
private void validateTimePartition(TimePartitionedFileSet dataset, long time, String path) { PartitionDetail partitionDetail = dataset.getPartitionByTime(time); Assert.assertEquals(path == null, partitionDetail == null); Assert.assertTrue(path == null || partitionDetail == null || path.equals(partitionDetail.getRelativePath())); }
@Override public void apply() throws Exception { PartitionKey partitionKey = PartitionKey.builder() .addIntField("i", 42) .addLongField("l", 17L) .addStringField("s", "x") .build(); ImmutableMap<String, String> metadata = ImmutableMap.of("key1", "value", "key2", "value2", "key3", "value2"); PartitionOutput partitionOutput = dataset.getPartitionOutput(partitionKey); partitionOutput.setMetadata(metadata); partitionOutput.addPartition(); PartitionDetail partitionDetail = dataset.getPartition(partitionKey); Assert.assertNotNull(partitionDetail); Assert.assertEquals(metadata, partitionDetail.getMetadata().asMap()); } });
/** * Populates the ConsumerWorkingSet by fetching partitions from the given PartitionedFileSet. * * @param partitionedFileSet the PartitionedFileSet to fetch partitions from * @param configuration the ConsumerConfiguration which defines parameters for consuming */ public void populate(PartitionedFileSet partitionedFileSet, ConsumerConfiguration configuration) { int numToPopulate = configuration.getMaxWorkingSetSize() - partitions.size(); Predicate<PartitionDetail> predicate = configuration.getPartitionPredicate(); co.cask.cdap.api.dataset.lib.PartitionConsumerResult result = partitionedFileSet.consumePartitions(partitionConsumerState, numToPopulate, predicate); List<PartitionDetail> partitions = result.getPartitions(); for (PartitionDetail partition : partitions) { addPartition(partition.getPartitionKey()); } partitionConsumerState = result.getPartitionConsumerState(); }
@GET @Path("partitions/{partition}/subpartitions/{sub-partition}") public void read(HttpServiceRequest request, HttpServiceResponder responder, @PathParam("partition") String partition, @PathParam("sub-partition") int subPartition) { PartitionDetail partitionDetail = pfs.getPartition(PartitionKey.builder() .addStringField("partition", partition) .addIntField("sub-partition", subPartition) .build()); if (partitionDetail == null) { responder.sendString(404, "Partition not found.", Charsets.UTF_8); return; } try { responder.send(200, partitionDetail.getLocation().append("file"), "text/plain"); } catch (IOException e) { responder.sendError(400, String.format("Unable to read path '%s'", partitionDetail.getRelativePath())); } }
List<Location> locations = partition.getLocation().list(); Assert.assertEquals(1, locations.size()); Assert.assertArrayEquals(content, ByteStreams.toByteArray(Locations.newInputSupplier(locations.get(0))));