@Override public KeyValue<byte[], byte[]> makeRecord(byte[] key, byte[] value) { return new KeyValue<>(key, value); } }
@Override public void transform(KeyValue<NullWritable, StructuredRecord> input, Emitter<StructuredRecord> emitter) { emitter.emit(input.getValue()); } }
@Override public void run(DatasetContext dsContext) throws Exception { KeyValueTable stateTable = dsContext.getDataset(ETLRealtimeApplication.STATE_TABLE); byte[] startKey = Bytes.toBytes(String.format("%s%s", appName, SEPARATOR)); // Scan the table for appname: prefixes and remove rows which doesn't match the unique id of this application. try (CloseableIterator<KeyValue<byte[], byte[]>> rows = stateTable.scan(startKey, Bytes.stopKeyForPrefix(startKey))) { while (rows.hasNext()) { KeyValue<byte[], byte[]> row = rows.next(); if (Bytes.compareTo(stateStoreKeyBytes, row.getKey()) != 0) { stateTable.delete(row.getKey()); } } } } }, Exception.class);
@Override public void transform(StructuredRecord input, Emitter<KeyValue<NullWritable, StructuredRecord>> emitter) { emitter.emit(new KeyValue<>(NullWritable.get(), input)); }
@Override public void transform(KeyValue<LongWritable, Text> input, Emitter<StructuredRecord> emitter) throws Exception { emitter.emit(GSON.fromJson(input.getValue().toString(), StructuredRecord.class)); }
private void testSparkWithGetDataset(Class<? extends Application> appClass, String sparkProgram) throws Exception { ApplicationManager applicationManager = deploy(appClass); DataSetManager<FileSet> filesetManager = getDataset("logs"); FileSet fileset = filesetManager.get(); Location location = fileset.getLocation("nn"); prepareInputFileSetWithLogData(location); Map<String, String> inputArgs = new HashMap<>(); FileSetArguments.setInputPath(inputArgs, "nn"); Map<String, String> args = new HashMap<>(); args.putAll(RuntimeArguments.addScope(Scope.DATASET, "logs", inputArgs)); args.put("input", "logs"); args.put("output", "logStats"); SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram); sparkManager.startAndWaitForRun(args, ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES); DataSetManager<KeyValueTable> logStatsManager = getDataset("logStats"); KeyValueTable logStatsTable = logStatsManager.get(); validateGetDatasetOutput(logStatsTable); // Cleanup after run location.delete(true); logStatsManager.flush(); try (CloseableIterator<KeyValue<byte[], byte[]>> scan = logStatsTable.scan(null, null)) { while (scan.hasNext()) { logStatsTable.delete(scan.next().getKey()); } } logStatsManager.flush(); }
@Override public void transform(StructuredRecord input, Emitter<KeyValue<NullWritable, String>> emitter) throws Exception { emitter.emit(new KeyValue<>(NullWritable.get(), GSON.toJson(input))); }
@Override public void transform(KeyValue<NullWritable, StructuredRecord> input, Emitter<StructuredRecord> emitter) { emitter.emit(input.getValue()); } }
Bytes.toBytes("world"), ByteBuffer.wrap(Bytes.toBytes("yo")), UUID.randomUUID()); final List<KeyValue<byte[], Record>> recordList = Lists.newArrayList(); recordList.add(new KeyValue<>(Bytes.toBytes("123"), record1)); recordList.add(new KeyValue<>(Bytes.toBytes("456"), record2)); recordList.add(new KeyValue<>(Bytes.toBytes("789"), record3)); KeyValue<byte[], Record> expected = recordList.get(i); KeyValue<byte[], Record> actual = actualList.get(i); Assert.assertArrayEquals(expected.getKey(), actual.getKey()); Assert.assertEquals(expected.getValue(), actual.getValue());
/** * Transform the input received from previous stage to a {@link KeyValue} pair which can be consumed by the output, * as set in {@link BatchSink#prepareRun}. By default, the input object is used as both key and value. * This method is invoked inside the Batch run. * * @param input the input to transform * @param emitter {@link Emitter} to emit data to the next stage * @throws Exception if there's an error during this method invocation */ @Override public void transform(IN input, Emitter<KeyValue<KEY_OUT, VAL_OUT>> emitter) throws Exception { emitter.emit(new KeyValue<>((KEY_OUT) input, (VAL_OUT) input)); }
@Override public void transform(KeyValue<NullWritable, StructuredRecord> input, Emitter<StructuredRecord> emitter) throws Exception { emitter.emit(input.getValue()); }
@Override public void transform(StructuredRecord input, Emitter<KeyValue<NullWritable, StructuredRecord>> emitter) { emitter.emit(new KeyValue<>(NullWritable.get(), input)); }
@Override public void transform(KeyValue<NullWritable, StructuredRecord> input, Emitter<StructuredRecord> emitter) { emitter.emit(input.getValue()); }