static <T> IsmRecordCoder<WindowedValue<T>> coderForListLike( Coder<? extends BoundedWindow> windowCoder, Coder<T> valueCoder) { // TODO: swap to use a variable length long coder which has values which compare // the same as their byte representation compare lexicographically within the key coder return IsmRecordCoder.of( 1, // We hash using only the window 0, // There are no metadata records ImmutableList.of(windowCoder, BigEndianLongCoder.of()), FullWindowedValueCoder.of(valueCoder, windowCoder)); } }
ExactlyOnceWriter(Write<K, V> spec, Coder<KV<K, V>> elemCoder) { this.spec = spec; this.outOfOrderBufferSpec = StateSpecs.bag(KvCoder.of(BigEndianLongCoder.of(), TimestampedValueCoder.of(elemCoder))); }
ExactlyOnceWriter(Write<K, V> spec, Coder<KV<K, V>> elemCoder) { this.spec = spec; this.outOfOrderBufferSpec = StateSpecs.bag(KvCoder.of(BigEndianLongCoder.of(), TimestampedValueCoder.of(elemCoder))); }
static <V> IsmRecordCoder<WindowedValue<V>> coderForMapLike( Coder<? extends BoundedWindow> windowCoder, Coder<?> keyCoder, Coder<V> valueCoder) { // TODO: swap to use a variable length long coder which has values which compare // the same as their byte representation compare lexicographically within the key coder return IsmRecordCoder.of( 1, // We use only the key for hashing when producing value records 2, // Since the key is not present, we add the window to the hash when // producing metadata records ImmutableList.of(MetadataKeyCoder.of(keyCoder), windowCoder, BigEndianLongCoder.of()), FullWindowedValueCoder.of(valueCoder, windowCoder)); } }
@Override public OutgoingMessage decode(InputStream inStream) throws CoderException, IOException { byte[] elementBytes = ByteArrayCoder.of().decode(inStream); Map<String, String> attributes = ATTRIBUTES_CODER.decode(inStream); long timestampMsSinceEpoch = BigEndianLongCoder.of().decode(inStream); @Nullable String recordId = RECORD_ID_CODER.decode(inStream); return new OutgoingMessage(elementBytes, attributes, timestampMsSinceEpoch, recordId); } }
@Override public void encode(OutgoingMessage value, OutputStream outStream) throws CoderException, IOException { ByteArrayCoder.of().encode(value.elementBytes, outStream); ATTRIBUTES_CODER.encode(value.attributes, outStream); BigEndianLongCoder.of().encode(value.timestampMsSinceEpoch, outStream); RECORD_ID_CODER.encode(value.recordId, outStream); }
@Override public List<? extends Coder<?>> getCoderArguments() { return ImmutableList.<Coder<?>>builder() .addAll(super.getCoderArguments()) .add(BigEndianLongCoder.of()) .build(); } }
@Test public void testGloballyOutputCoder() { p.enableAbandonedNodeEnforcement(false); BigEndianLongCoder inputCoder = BigEndianLongCoder.of(); PCollection<Long> output = p.apply(Create.of(1L, 2L).withCoder(inputCoder)).apply(Latest.globally()); Coder<Long> outputCoder = output.getCoder(); assertThat(outputCoder, instanceOf(NullableCoder.class)); assertEquals(inputCoder, ((NullableCoder<?>) outputCoder).getValueCoder()); }
@Test public void testMultiCoderCycle() throws Exception { LengthPrefixCoder<Long> lengthPrefixedValueCoder = LengthPrefixCoder.of(BigEndianLongCoder.of()); LengthPrefixCoder<byte[]> lengthPrefixedBytesCoder = LengthPrefixCoder.of(ByteArrayCoder.of()); // [0x08, 0, 0, 0, 0, 0, 0, 0, 0x16] byte[] userEncoded = CoderUtils.encodeToByteArray(lengthPrefixedValueCoder, 22L); // [0, 0, 0, 0, 0, 0, 0, 0x16] byte[] decodedToBytes = CoderUtils.decodeFromByteArray(lengthPrefixedBytesCoder, userEncoded); // [0x08, 0, 0, 0, 0, 0, 0, 0, 0x16] byte[] reencodedBytes = CoderUtils.encodeToByteArray(lengthPrefixedBytesCoder, decodedToBytes); long userDecoded = CoderUtils.decodeFromByteArray(lengthPrefixedValueCoder, reencodedBytes); assertFalse( "Length-prefix decoding to bytes should drop the length", Arrays.equals(userEncoded, decodedToBytes)); assertArrayEquals(userEncoded, reencodedBytes); assertEquals(22L, userDecoded); } }
@Test public void boundedSourceEvaluatorClosesReader() throws Exception { TestSource<Long> source = new TestSource<>(BigEndianLongCoder.of(), 1L, 2L, 3L); PCollection<Long> pcollection = p.apply(Read.from(source)); AppliedPTransform<?, ?, ?> sourceTransform = DirectGraphs.getProducer(pcollection); UncommittedBundle<Long> output = bundleFactory.createBundle(pcollection); when(context.createBundle(pcollection)).thenReturn(output); TransformEvaluator<BoundedSourceShard<Long>> evaluator = factory.forApplication( sourceTransform, bundleFactory.createRootBundle().commit(Instant.now())); evaluator.processElement(WindowedValue.valueInGlobalWindow(BoundedSourceShard.of(source))); evaluator.finishBundle(); CommittedBundle<Long> committed = output.commit(Instant.now()); assertThat(committed.getElements(), containsInAnyOrder(gw(2L), gw(3L), gw(1L))); assertThat(TestSource.readerClosed, is(true)); }
@Test public void boundedSourceEvaluatorNoElementsClosesReader() throws Exception { TestSource<Long> source = new TestSource<>(BigEndianLongCoder.of()); PCollection<Long> pcollection = p.apply(Read.from(source)); AppliedPTransform<?, ?, ?> sourceTransform = DirectGraphs.getProducer(pcollection); UncommittedBundle<Long> output = bundleFactory.createBundle(pcollection); when(context.createBundle(pcollection)).thenReturn(output); TransformEvaluator<BoundedSourceShard<Long>> evaluator = factory.forApplication( sourceTransform, bundleFactory.createRootBundle().commit(Instant.now())); evaluator.processElement(WindowedValue.valueInGlobalWindow(BoundedSourceShard.of(source))); evaluator.finishBundle(); CommittedBundle<Long> committed = output.commit(Instant.now()); assertThat(committed.getElements(), emptyIterable()); assertThat(TestSource.readerClosed, is(true)); }
ContiguousSet.create(Range.openClosed(0L, (long) numElements), DiscreteDomain.longs()); TestUnboundedSource<Long> source = new TestUnboundedSource<>(BigEndianLongCoder.of(), elems.toArray(new Long[0])); source.advanceWatermarkToInfinity = true;
@Test public void evaluatorThrowsInCloseRethrows() throws Exception { ContiguousSet<Long> elems = ContiguousSet.create(Range.closed(0L, 20L), DiscreteDomain.longs()); TestUnboundedSource<Long> source = new TestUnboundedSource<>(BigEndianLongCoder.of(), elems.toArray(new Long[0])) .throwsOnClose(); PCollection<Long> pcollection = p.apply(Read.from(source)); AppliedPTransform<?, ?, ?> sourceTransform = DirectGraphs.getGraph(p).getProducer(pcollection); when(context.createRootBundle()).thenReturn(bundleFactory.createRootBundle()); UncommittedBundle<Long> output = bundleFactory.createBundle(pcollection); when(context.createBundle(pcollection)).thenReturn(output); WindowedValue<UnboundedSourceShard<Long, TestCheckpointMark>> shard = WindowedValue.valueInGlobalWindow( UnboundedSourceShard.unstarted(source, NeverDeduplicator.create())); CommittedBundle<UnboundedSourceShard<Long, TestCheckpointMark>> inputBundle = bundleFactory .<UnboundedSourceShard<Long, TestCheckpointMark>>createRootBundle() .add(shard) .commit(Instant.now()); UnboundedReadEvaluatorFactory factory = new UnboundedReadEvaluatorFactory(context, options, 0.0 /* never reuse */); TransformEvaluator<UnboundedSourceShard<Long, TestCheckpointMark>> evaluator = factory.forApplication(sourceTransform, inputBundle); thrown.expect(IOException.class); thrown.expectMessage("throws on close"); evaluator.processElement(shard); }
@Test public void testUnboundedSourceSplits() throws Exception { int numElements = 1000; int numSplits = 10; // Coders must be specified explicitly here due to the way the transform // is used in the test. UnboundedSource<KafkaRecord<Integer, Long>, ?> initial = mkKafkaReadTransform(numElements, null) .withKeyDeserializerAndCoder(IntegerDeserializer.class, BigEndianIntegerCoder.of()) .withValueDeserializerAndCoder(LongDeserializer.class, BigEndianLongCoder.of()) .makeSource(); List<? extends UnboundedSource<KafkaRecord<Integer, Long>, ?>> splits = initial.split(numSplits, p.getOptions()); assertEquals("Expected exact splitting", numSplits, splits.size()); long elementsPerSplit = numElements / numSplits; assertEquals("Expected even splits", numElements, elementsPerSplit * numSplits); PCollectionList<Long> pcollections = PCollectionList.empty(p); for (int i = 0; i < splits.size(); ++i) { pcollections = pcollections.and( p.apply("split" + i, Read.from(splits.get(i)).withMaxNumRecords(elementsPerSplit)) .apply("Remove Metadata " + i, ParDo.of(new RemoveKafkaMetadata<>())) .apply("collection " + i, Values.create())); } PCollection<Long> input = pcollections.apply(Flatten.pCollections()); addCountingAsserts(input, numElements); p.run(); }
ContiguousSet<Long> elems = ContiguousSet.create(Range.closed(0L, 20L), DiscreteDomain.longs()); TestUnboundedSource<Long> source = new TestUnboundedSource<>(BigEndianLongCoder.of(), elems.toArray(new Long[0]));
@Test @Category({ValidatesRunner.class, FlattenWithHeterogeneousCoders.class}) public void testFlattenMultipleCoders() throws CannotProvideCoderException { PCollection<Long> bigEndianLongs = p.apply( "BigEndianLongs", Create.of(0L, 1L, 2L, 3L, null, 4L, 5L, null, 6L, 7L, 8L, null, 9L) .withCoder(NullableCoder.of(BigEndianLongCoder.of()))); PCollection<Long> varLongs = p.apply("VarLengthLongs", GenerateSequence.from(0).to(5)).setCoder(VarLongCoder.of()); PCollection<Long> flattened = PCollectionList.of(bigEndianLongs) .and(varLongs) .apply(Flatten.pCollections()) .setCoder(NullableCoder.of(VarLongCoder.of())); PAssert.that(flattened) .containsInAnyOrder( 0L, 0L, 1L, 1L, 2L, 3L, 2L, 4L, 5L, 3L, 6L, 7L, 4L, 8L, 9L, null, null, null); p.run(); }
@Test public void testGetAccumulatorCoderEquals() { Combine.BinaryCombineIntegerFn sumIntegerFn = Sum.ofIntegers(); assertEquals( sumIntegerFn.getAccumulatorCoder(STANDARD_REGISTRY, VarIntCoder.of()), sumIntegerFn.getAccumulatorCoder(STANDARD_REGISTRY, VarIntCoder.of())); assertNotEquals( sumIntegerFn.getAccumulatorCoder(STANDARD_REGISTRY, VarIntCoder.of()), sumIntegerFn.getAccumulatorCoder(STANDARD_REGISTRY, BigEndianIntegerCoder.of())); Combine.BinaryCombineLongFn sumLongFn = Sum.ofLongs(); assertEquals( sumLongFn.getAccumulatorCoder(STANDARD_REGISTRY, VarLongCoder.of()), sumLongFn.getAccumulatorCoder(STANDARD_REGISTRY, VarLongCoder.of())); assertNotEquals( sumLongFn.getAccumulatorCoder(STANDARD_REGISTRY, VarLongCoder.of()), sumLongFn.getAccumulatorCoder(STANDARD_REGISTRY, BigEndianLongCoder.of())); Combine.BinaryCombineDoubleFn sumDoubleFn = Sum.ofDoubles(); assertEquals( sumDoubleFn.getAccumulatorCoder(STANDARD_REGISTRY, DoubleCoder.of()), sumDoubleFn.getAccumulatorCoder(STANDARD_REGISTRY, DoubleCoder.of())); } }
2, ImmutableList.of( MetadataKeyCoder.of(keyCoder), IntervalWindow.getCoder(), BigEndianLongCoder.of()), FullWindowedValueCoder.of(VarLongCoder.of(), windowCoder));
BigEndianLongCoder customCoder = BigEndianLongCoder.of(); PCollection<Long> elems = complexPipeline.apply(GenerateSequence.from(0L).to(207L)); PCollection<Long> counted = elems.apply(Count.globally()).setCoder(customCoder);
pipeline .apply("counts with alternative coder", GenerateSequence.from(0).to(10)) .setCoder(BigEndianLongCoder.of()); pipeline .apply(