@VisibleForTesting static PartitionKey partitionKeyForTime(long time) { Calendar calendar = Calendar.getInstance(); calendar.setTimeInMillis(time); int year = calendar.get(Calendar.YEAR); int month = calendar.get(Calendar.MONTH) + 1; // otherwise January would be 0 int day = calendar.get(Calendar.DAY_OF_MONTH); int hour = calendar.get(Calendar.HOUR_OF_DAY); int minute = calendar.get(Calendar.MINUTE); return PartitionKey.builder() .addIntField(FIELD_YEAR, year) .addIntField(FIELD_MONTH, month) .addIntField(FIELD_DAY, day) .addIntField(FIELD_HOUR, hour) .addIntField(FIELD_MINUTE, minute) .build(); }
private PartitionDetail getLatestPartition() throws IOException { Long latestTime = getLatestSnapshot(); if (latestTime == null) { return null; } PartitionKey partitionKey = PartitionKey.builder().addLongField(SNAPSHOT_FIELD, latestTime).build(); PartitionDetail partitionDetail = files.getPartition(partitionKey); if (partitionDetail == null) { throw new IllegalStateException(String.format("No snapshot files found for latest recorded snapshot from '%d'. " + "This can happen if files are deleted manually without updating the state file. " + "Please fix the state file to contain the latest snapshot, or delete the file and write another snapshot.", latestTime)); } return partitionDetail; }
.addStringField("type", "x") .addLongField("time", 150000L) .build(); .addStringField("type", "y") .addLongField("time", 200000L) .build();
for (int l = 0; l < 4; l++) { final PartitionKey key = PartitionKey.builder() .addField("s", String.format("%c-%d", 'a' + s, s)) .addField("i", i * 100) .addField("l", 15L - 10 * l) .build(); BasicPartition basicPartition = dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset) .execute(new Callable<BasicPartition>() {
PartitionKey partitionKey = PartitionKey.builder().addLongField("time", partition).build(); PartitionedFileSet pfs = getContext().getDataset(dataset); final PartitionOutput partitionOutput = pfs.getPartitionOutput(partitionKey);
PartitionDetail partition = pfs.getPartition(PartitionKey.builder().addLongField("time", 1).build()); verifyDataNotification(notifications.get(0), NamespaceId.DEFAULT.dataset(FileUploadApp.PFS_NAME), Collections.singletonList(PartitionKey.builder().addLongField("time", 1L).build()));
for (int i = 0; i < 10; i++) { PartitionKey key = PartitionKey.builder() .addIntField("i", i) .addLongField("l", 17L) .addStringField("s", "partitionKeys1") .build(); partitionKeys1.add(key); for (int i = 0; i < 15; i++) { PartitionKey key = PartitionKey.builder() .addIntField("i", i) .addLongField("l", 17L) .addStringField("s", "partitionKeys2") .build(); partitionKeys2.add(key);
PartitionedFileSet cleanRecords = cleanRecordsManager.get(); PartitionKey outputPartition = PartitionKey.builder().addLongField("time", 5000).build(); PartitionOutput partitionOutput = cleanRecords.getPartitionOutput(outputPartition); Location partitionLocation = partitionOutput.getLocation();
.addField("month", 4) .addField("duration", 75 * minute) .addField("market", "asia") .addField("year", 2012) .build())); .addField("month", 7) .addField("duration", 75 * minute) .addField("year", 2012) .build())); .addField("month", 4) .addField("duration", 75 * minute) .addField("year", 2012) .build())); .addField("day", "tue") .addField("month", 4) .addField("duration", 75 * minute) .addField("year", 2012) .addField("market", "asia") .build()));
.addIntField("year", 2014) .addIntField("month", 1) .addIntField("day", 1) .addIntField("hour", 20) .addIntField("minute", 54) .build(); TimePartitionedFileSet ds1 = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args); TimePartitionedFileSetArguments.setOutputPartitionKey(args, key);
try { pfs.getPartitionOutput( PartitionKey.builder().addField("i", 1).addField("l", 2L).build()); Assert.fail("should have thrown exception due to missing field"); } catch (IllegalArgumentException e) { PartitionKey.builder().addField("i", 1).addField("l", "2").addField("s", "a").build(), "some/location"); Assert.fail("should have thrown exception due to incompatible field"); PartitionKey.builder().addField("i", 1).addField("l", 2L).addField("s", "a").addField("x", "x").build(), "some/location", ImmutableMap.of("a", "b")); Assert.fail("should have thrown exception due to extra field"); PartitionKey.builder().addField("i", 1).addField("l", 2L).addField("s", "a").build(), "some/location", ImmutableMap.of("a", "b")); try { pfs.addMetadata( PartitionKey.builder().addField("i", 1).addField("l", 2L).addField("s", "a").addField("x", "x").build(), ImmutableMap.of("abc", "xyz")); Assert.fail("should have thrown exception due to extra field"); pfs.dropPartition(PartitionKey.builder().addField("i", 1).addField("l", 2L).addField("s", 0).build()); Assert.fail("should have thrown exception due to incompatible field"); } catch (IllegalArgumentException e) {
.addIntField("i", 42) .addLongField("l", 17L) .addStringField("s", "nonexistent") .build(); dataset.addMetadata(nonexistentPartitionKey, "key2", "value3"); Assert.fail("Expected not to be able to add metadata for a nonexistent partition");
writer.write("1,x\n"); pfs.get().addPartition(PartitionKey.builder().addStringField("x", "1").build(), "some/path"); pfs.flush();
@PathParam("partition") String partition, @PathParam("sub-partition") int subPartition) { PartitionKey key = PartitionKey.builder() .addStringField("partition", partition) .addIntField("sub-partition", subPartition) .build();
DataSetManager<PartitionedFileSet> pfsManager = getDataset("pfs"); PartitionedFileSet pfs = pfsManager.get(); PartitionOutput partitionOutput = pfs.getPartitionOutput(PartitionKey.builder().addStringField("x", "nn").build()); Location location = partitionOutput.getLocation(); prepareFileInput(location); inputArgs, PartitionFilter.builder().addRangeCondition("x", "na", "nx").build()); Map<String, String> outputArgs = new HashMap<>(); PartitionKey outputKey = PartitionKey.builder().addStringField("x", "xx").build(); PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, outputKey); Map<String, String> args = new HashMap<>();
@Override public void run(DatasetContext context) throws Exception { Map<String, Long> wordCounts = new HashMap<>(); for (PartitionDetail partition : partitions) { ByteBuffer content; Location location = partition.getLocation(); content = ByteBuffer.wrap(ByteStreams.toByteArray(location.getInputStream())); String string = Bytes.toString(Bytes.toBytes(content)); for (String token : string.split(" ")) { Long count = Objects.firstNonNull(wordCounts.get(token), 0L); wordCounts.put(token, count + 1); } } IncrementingKeyValueTable counts = context.getDataset("counts"); for (Map.Entry<String, Long> entry : wordCounts.entrySet()) { counts.write(Bytes.toBytes(entry.getKey()), entry.getValue()); } PartitionedFileSet outputLines = context.getDataset("outputLines"); PartitionKey partitionKey = PartitionKey.builder().addLongField("time", System.currentTimeMillis()).build(); PartitionOutput outputPartition = outputLines.getPartitionOutput(partitionKey); Location partitionDir = outputPartition.getLocation(); partitionDir.mkdirs(); Location outputLocation = partitionDir.append("file"); outputLocation.createNew(); try (OutputStream outputStream = outputLocation.getOutputStream()) { outputStream.write(Bytes.toBytes(Joiner.on("\n").join(wordCounts.values()))); } outputPartition.addPartition(); } });
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(TokenMapper.class); job.setNumReduceTasks(0); String inputText = getContext().getRuntimeArguments().get("input.text"); Preconditions.checkNotNull(inputText); KeyValueTable kvTable = getContext().getDataset(INPUT); kvTable.write("key", inputText); context.addInput(Input.ofDataset(INPUT, kvTable.getSplits(1, null, null))); String outputDatasets = getContext().getRuntimeArguments().get("output.datasets"); outputDatasets = outputDatasets != null ? outputDatasets : PFS; for (String outputName : outputDatasets.split(",")) { String outputPartition = getContext().getRuntimeArguments().get(outputName + ".output.partition"); PartitionKey outputPartitionKey = outputPartition == null ? null : PartitionKey.builder().addField("number", Integer.parseInt(outputPartition)).build(); Map<String, String> outputArguments = new HashMap<>(); if (outputPartitionKey != null) { PartitionedFileSetArguments.setOutputPartitionKey(outputArguments, outputPartitionKey); } else { PartitionedFileSetArguments.setDynamicPartitioner(outputArguments, KeyPartitioner.class); } context.addOutput(Output.ofDataset(outputName, outputArguments)); } }
/** * @return the partition key of the output partition to be written; or null if no partition key was found * * @param arguments the runtime arguments for a partitioned dataset * @param partitioning the declared partitioning for the dataset, needed for proper interpretation of values */ @Nullable public static PartitionKey getOutputPartitionKey(Map<String, String> arguments, Partitioning partitioning) { // extract the arguments that describe the output partition key Map<String, String> keyArguments = FileSetProperties.propertiesWithPrefix(arguments, OUTPUT_PARTITION_KEY_PREFIX); if (keyArguments.isEmpty()) { return null; // there is no output partition key } // there is a partition key; now it is required to match the partitioning PartitionKey.Builder builder = PartitionKey.builder(); for (Map.Entry<String, FieldType> entry : partitioning.getFields().entrySet()) { String fieldName = entry.getKey(); FieldType fieldType = entry.getValue(); String stringValue = keyArguments.get(fieldName); Comparable fieldValue = convertFieldValue("key", "value", fieldName, fieldType, stringValue, false); builder.addField(fieldName, fieldValue); } return builder.build(); }