@Override public Tuple2<OUT_KEY, OUT_VAL> apply(KeyValue<OUT_KEY, OUT_VAL> input) { return new Tuple2<>(input.getKey(), input.getValue()); } });
@Override public void write(NullWritable key, StructuredRecord value) throws IOException, InterruptedException { KeyValue<K, V> converted = conversion.apply(value); delegate.write(converted.getKey(), converted.getValue()); }
@Override public void write(String sinkName, KeyValue<KEY_OUT, VAL_OUT> output) throws Exception { context.write(output.getKey(), output.getValue()); } }
Map<String, List<String>> getPerStageJoinKeys() { Map<String, List<String>> stageToKey = new HashMap<>(); if (Strings.isNullOrEmpty(joinKeys)) { throw new IllegalArgumentException("Join keys can not be empty"); } Iterable<String> multipleJoinKeys = Splitter.on('&').trimResults().omitEmptyStrings().split(joinKeys); if (Iterables.isEmpty(multipleJoinKeys)) { throw new IllegalArgumentException("Join keys can not be empty."); } int numJoinKeys = 0; for (String singleJoinKey : multipleJoinKeys) { KeyValueListParser kvParser = new KeyValueListParser("\\s*=\\s*", "\\."); Iterable<KeyValue<String, String>> keyValues = kvParser.parse(singleJoinKey); if (numJoinKeys == 0) { numJoinKeys = Iterables.size(keyValues); } else if (numJoinKeys != Iterables.size(keyValues)) { throw new IllegalArgumentException("There should be one join key from each of the stages. Please add join " + "keys for each stage."); } for (KeyValue<String, String> keyValue : keyValues) { String stageName = keyValue.getKey(); String joinKey = keyValue.getValue(); if (!stageToKey.containsKey(stageName)) { stageToKey.put(stageName, new ArrayList<String>()); } stageToKey.get(stageName).add(joinKey); } } return stageToKey; }
@Override public void write(String sinkName, KeyValue<KEY_OUT, VAL_OUT> output) throws Exception { for (String outputName : sinkOutputs.get(sinkName).getSinkOutputs()) { context.write(outputName, output.getKey(), output.getValue()); } } }
@Override public void transform(KeyValue<REDUCE_KEY, Iterator<REDUCE_VAL>> input, Emitter<OUT> emitter) throws Exception { GROUP_KEY groupKey = keyConversion.fromWritable(input.getKey()); Iterator<GROUP_VAL> iter = Iterators.transform(input.getValue(), new Function<REDUCE_VAL, GROUP_VAL>() { @Nullable @Override public GROUP_VAL apply(@Nullable REDUCE_VAL input) { return valConversion.fromWritable(input); } }); aggregator.aggregate(groupKey, iter, emitter); } }
@WriteOnly @Override public void write(KeyValue<byte[], byte[]> keyValue) throws IOException { write(keyValue.getKey(), keyValue.getValue()); }
@Override protected KeyValue<byte[], T> computeNext() { Preconditions.checkState(!closed); if (keyValueIterator.hasNext()) { KeyValue<byte[], byte[]> row = keyValueIterator.next(); return new KeyValue<>(row.getKey(), decode(row.getValue())); } close(); return null; }
@Override public void transform(KeyValue<REDUCE_KEY, Iterator<TaggedWritable<REDUCE_VALUE>>> input, Emitter<OUT> emitter) throws Exception { JOIN_KEY joinKey = keyConversion.fromWritable(input.getKey()); Iterator<JoinElement<INPUT_RECORD>> inputIterator = Iterators.transform(input.getValue(), new Function<TaggedWritable<REDUCE_VALUE>, JoinElement<INPUT_RECORD>>() { @Nullable @Override public JoinElement<INPUT_RECORD> apply(@Nullable TaggedWritable<REDUCE_VALUE> input) { return new JoinElement<>(input.getStageName(), inputConversion.fromWritable(input.getRecord())); } }); Join join = new Join<>(joiner, joinKey, inputIterator, numOfInputs, emitter); join.joinRecords(); } }
@Override protected KeyValue<byte[], T> computeNext() { Preconditions.checkState(!closed); if (keyValueIterator.hasNext()) { KeyValue<byte[], byte[]> row = keyValueIterator.next(); return new KeyValue<>(row.getKey(), decode(row.getValue())); } close(); return null; }
@Override public void transform(KeyValue<LongWritable, Object> input, Emitter<StructuredRecord> emitter) throws Exception { Map<String, String> xmlRecord = (Map<String, String>) input.getValue(); Set<String> keySet = xmlRecord.keySet(); Iterator<String> itr = keySet.iterator(); String fileName = Iterators.getOnlyElement(itr); String record = xmlRecord.get(fileName); StructuredRecord output = StructuredRecord.builder(DEFAULT_XML_SCHEMA) .set("offset", input.getKey().get()) .set("filename", fileName) .set("record", record) .build(); emitter.emit(output); }
@Override public void run(DatasetContext dsContext) throws Exception { KeyValueTable stateTable = dsContext.getDataset(ETLRealtimeApplication.STATE_TABLE); byte[] startKey = Bytes.toBytes(String.format("%s%s", appName, SEPARATOR)); // Scan the table for appname: prefixes and remove rows which doesn't match the unique id of this application. try (CloseableIterator<KeyValue<byte[], byte[]>> rows = stateTable.scan(startKey, Bytes.stopKeyForPrefix(startKey))) { while (rows.hasNext()) { KeyValue<byte[], byte[]> row = rows.next(); if (Bytes.compareTo(stateStoreKeyBytes, row.getKey()) != 0) { stateTable.delete(row.getKey()); } } } } }, Exception.class);
@Override public void apply() throws Exception { records.write(record.getKey(), record.getValue()); } });
@Override public void transform(KeyValue<byte[], byte[]> input, Emitter<StructuredRecord> emitter) throws Exception { emitter.emit(StructuredRecord.builder(SCHEMA).set("key", input.getKey()).set("value", input.getValue()).build()); } }
/** * Writes entries to V2 MetadataTable. * * @param entries list of entries to be written. */ public void writeUpgradedRows(List<KeyValue<Long, Object>> entries) { Set<MetadataEntity> previouslySeenEntities = new HashSet<>(); for (KeyValue<Long, Object> kv : entries) { if (kv.getKey() == null) { MetadataEntry entry = (MetadataEntry) kv.getValue(); boolean isNewEntity = previouslySeenEntities.add(entry.getMetadataEntity()); Set<Indexer> indexers = getIndexersForKey(entry.getKey(), isNewEntity); writeValue(entry, indexers); // store indexes for the tags being added storeIndexes(entry, indexers); } else { MetadataV1 metadataV1 = (MetadataV1) kv.getValue(); // write history with original timestamp writeHistory(new Metadata(metadataV1.getEntityId(), metadataV1.getProperties(), metadataV1.getTags()), kv.getKey()); } } }
try (CloseableIterator<KeyValue<byte[], byte[]>> itor = resultTable.scan(null, null)) { return ImmutableSet.copyOf(Iterators.transform(itor, input -> { String word = Bytes.toString(input.getKey()); LOG.info("{}, {}", word, Bytes.toInt(input.getValue())); return word;
@Override public void apply() throws Exception { CloseableIterator<KeyValue<byte[], Record>> results = records.scan("789", null); KeyValue<byte[], Record> actualRecord = results.next(); Assert.assertFalse(results.hasNext()); Assert.assertArrayEquals(actualRecord.getKey(), recordList.get(2).getKey()); Assert.assertEquals(actualRecord.getValue(), recordList.get(2).getValue()); results.close(); results = records.scan(null, "124"); actualRecord = results.next(); Assert.assertFalse(results.hasNext()); Assert.assertArrayEquals(actualRecord.getKey(), recordList.get(0).getKey()); Assert.assertEquals(actualRecord.getValue(), recordList.get(0).getValue()); results.close(); results = records.scan(null, "123"); Assert.assertFalse(results.hasNext()); results.close(); } });
private void testSparkWithLocalFiles(Class<? extends Application> appClass, String sparkProgram, String prefix) throws Exception { ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, appClass); URI localFile = createLocalPropertiesFile(prefix); SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram) .start(Collections.singletonMap(SparkAppUsingLocalFiles.LOCAL_FILE_RUNTIME_ARG, localFile.toString())); sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS); sparkManager.waitForStopped(120, TimeUnit.SECONDS); DataSetManager<KeyValueTable> kvTableManager = getDataset(SparkAppUsingLocalFiles.OUTPUT_DATASET_NAME); KeyValueTable kvTable = kvTableManager.get(); Map<String, String> expected = ImmutableMap.of("a", "1", "b", "2", "c", "3"); List<byte[]> deleteKeys = new ArrayList<>(); try (CloseableIterator<KeyValue<byte[], byte[]>> scan = kvTable.scan(null, null)) { for (int i = 0; i < 3; i++) { KeyValue<byte[], byte[]> next = scan.next(); Assert.assertEquals(expected.get(Bytes.toString(next.getKey())), Bytes.toString(next.getValue())); deleteKeys.add(next.getKey()); } Assert.assertFalse(scan.hasNext()); } // Cleanup after run kvTableManager.flush(); for (byte[] key : deleteKeys) { kvTable.delete(key); } kvTableManager.flush(); }
KeyValue<byte[], byte[]> next = scan.next(); SparkAppUsingGetDataset.LogKey logKey = new Gson().fromJson(Bytes.toString(next.getKey()), SparkAppUsingGetDataset.LogKey.class); SparkAppUsingGetDataset.LogStats logStats = new Gson().fromJson(Bytes.toString(next.getValue()), SparkAppUsingGetDataset.LogStats.class);
private void testSparkWithGetDataset(Class<? extends Application> appClass, String sparkProgram) throws Exception { ApplicationManager applicationManager = deploy(appClass); DataSetManager<FileSet> filesetManager = getDataset("logs"); FileSet fileset = filesetManager.get(); Location location = fileset.getLocation("nn"); prepareInputFileSetWithLogData(location); Map<String, String> inputArgs = new HashMap<>(); FileSetArguments.setInputPath(inputArgs, "nn"); Map<String, String> args = new HashMap<>(); args.putAll(RuntimeArguments.addScope(Scope.DATASET, "logs", inputArgs)); args.put("input", "logs"); args.put("output", "logStats"); SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram); sparkManager.startAndWaitForRun(args, ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES); DataSetManager<KeyValueTable> logStatsManager = getDataset("logStats"); KeyValueTable logStatsTable = logStatsManager.get(); validateGetDatasetOutput(logStatsTable); // Cleanup after run location.delete(true); logStatsManager.flush(); try (CloseableIterator<KeyValue<byte[], byte[]>> scan = logStatsTable.scan(null, null)) { while (scan.hasNext()) { logStatsTable.delete(scan.next().getKey()); } } logStatsManager.flush(); }