@Override public PCollection<String> expand(PCollection<Row> input) { return input.apply( "rowToCsv", MapElements.into(TypeDescriptors.strings()).via(row -> beamRow2CsvLine(row, csvFormat))); } }
@Override public PCollection<String> expand(PCollection<Row> input) { return input.apply( "rowsToLines", MapElements.into(TypeDescriptors.strings()).via((Row row) -> row.getString(0) + "\n")); } }
@Test public void testTypeDescriptorsSets() throws Exception { TypeDescriptor<Set<String>> descriptor = sets(strings()); assertEquals(descriptor, new TypeDescriptor<Set<String>>() {}); }
@Test public void testTypeDescriptorsIterables() throws Exception { TypeDescriptor<Iterable<String>> descriptor = iterables(strings()); assertEquals(descriptor, new TypeDescriptor<Iterable<String>>() {}); }
@Override public <T> Schema schemaFor(TypeDescriptor<T> typeDescriptor) { if (typeDescriptor.equals(TypeDescriptors.strings())) { return STRING_SCHEMA; } else if (typeDescriptor.equals(TypeDescriptors.integers())) { return INTEGER_SCHEMA; } else { return null; } }
/** * Basic test of {@link MapElements} with a lambda wrapped into a {@link SimpleFunction} to * remember its type. */ @Test public void testGoodTypeForMethodRef() throws Exception { SimpleFunction<Integer, String> fn = new SimpleFunction<Integer, String>(SimpleFunctionTest::toStringThisThing) {}; assertThat(fn.getInputTypeDescriptor(), equalTo(TypeDescriptors.integers())); assertThat(fn.getOutputTypeDescriptor(), equalTo(TypeDescriptors.strings())); }
/** * Basic test of {@link MapElements} with a lambda (which is instantiated as a {@link * SerializableFunction}). */ @Test public void testGoodTypeForLambda() throws Exception { SimpleFunction<Integer, String> fn = new SimpleFunction<Integer, String>(Object::toString) {}; assertThat(fn.getInputTypeDescriptor(), equalTo(TypeDescriptors.integers())); assertThat(fn.getOutputTypeDescriptor(), equalTo(TypeDescriptors.strings())); }
@Test public void testTypeDescriptorsLists() throws Exception { TypeDescriptor<List<String>> descriptor = lists(strings()); assertEquals(descriptor, new TypeDescriptor<List<String>>() {}); assertNotEquals(descriptor, new TypeDescriptor<List<Boolean>>() {}); }
@Test public void testTypeDescriptorsKV() throws Exception { TypeDescriptor<KV<String, Integer>> descriptor = kvs(strings(), integers()); assertEquals(descriptor, new TypeDescriptor<KV<String, Integer>>() {}); }
@Override public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> gameInfo) { return gameInfo .apply( MapElements.into( TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) .via((GameActionInfo gInfo) -> KV.of(gInfo.getKey(field), gInfo.getScore()))) .apply(Sum.integersPerKey()); } }
@Test public void testTypeDescriptorsTypeParameterOf() throws Exception { assertEquals(strings(), extractFooT(new Generic<String, Integer>() {})); assertEquals(integers(), extractBarT(new Generic<String, Integer>() {})); assertEquals(kvs(strings(), integers()), extractKV(new Generic<String, Integer>() {})); }
@Test public void testTypeDescriptorsListsOfLists() throws Exception { TypeDescriptor<List<List<String>>> descriptor = lists(lists(strings())); assertEquals(descriptor, new TypeDescriptor<List<List<String>>>() {}); assertNotEquals(descriptor, new TypeDescriptor<List<String>>() {}); assertNotEquals(descriptor, new TypeDescriptor<List<Boolean>>() {}); }
@Test public void testMapTypeToJavaType() { assertEquals( TypeDescriptors.maps(TypeDescriptors.strings(), TypeDescriptors.longs()), FieldTypeDescriptors.javaTypeForFieldType( FieldType.map(FieldType.STRING, FieldType.INT64))); assertEquals( TypeDescriptors.maps( TypeDescriptors.strings(), TypeDescriptors.lists(TypeDescriptors.longs())), FieldTypeDescriptors.javaTypeForFieldType( FieldType.map(FieldType.STRING, FieldType.array(FieldType.INT64)))); }
private static void runReadPipeline(Options options) { Pipeline pipeline = Pipeline.create(options); pipeline .apply("Find files", FileIO.match().filepattern(options.getInput())) .apply("Read matched files", FileIO.readMatches()) .apply("Read parquet files", ParquetIO.readFiles(SCHEMA)) .apply("Map records to strings", MapElements.into(strings()).via(new GetRecordsFn())); pipeline.run(); }
@Test public void testMapTypeToFieldType() { assertEquals( FieldType.map(FieldType.STRING, FieldType.INT64), FieldTypeDescriptors.fieldTypeForJavaType( TypeDescriptors.maps(TypeDescriptors.strings(), TypeDescriptors.longs()))); assertEquals( FieldType.map(FieldType.STRING, FieldType.array(FieldType.INT64)), FieldTypeDescriptors.fieldTypeForJavaType( TypeDescriptors.maps( TypeDescriptors.strings(), TypeDescriptors.lists(TypeDescriptors.longs())))); } }
@Test public void testCoderProvidersFromStaticMethodsForParameterlessTypes() throws Exception { CoderProvider factory = CoderProviders.fromStaticMethods(String.class, StringUtf8Coder.class); assertEquals( StringUtf8Coder.of(), factory.coderFor(TypeDescriptors.strings(), Collections.emptyList())); factory = CoderProviders.fromStaticMethods(Double.class, DoubleCoder.class); assertEquals( DoubleCoder.of(), factory.coderFor(TypeDescriptors.doubles(), Collections.emptyList())); factory = CoderProviders.fromStaticMethods(byte[].class, ByteArrayCoder.class); assertEquals( ByteArrayCoder.of(), factory.coderFor(TypeDescriptor.of(byte[].class), Collections.emptyList())); }
@Test public void testArrayTypeToFieldType() { assertEquals( FieldType.array(FieldType.STRING), FieldTypeDescriptors.fieldTypeForJavaType( TypeDescriptors.lists(TypeDescriptors.strings()))); assertEquals( FieldType.array(FieldType.array(FieldType.STRING)), FieldTypeDescriptors.fieldTypeForJavaType( TypeDescriptors.lists(TypeDescriptors.lists(TypeDescriptors.strings())))); assertEquals( FieldType.array(FieldType.STRING), FieldTypeDescriptors.fieldTypeForJavaType( TypeDescriptor.of(new ArrayList<String>() {}.getClass()))); }
/** A basic smoke test that ensures there is no crash at pipeline construction time. */ @Test public void testMinimalWordCount() throws Exception { p.getOptions().as(GcsOptions.class).setGcsUtil(buildMockGcsUtil()); p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")) .apply( FlatMapElements.into(TypeDescriptors.strings()) .via((String word) -> Arrays.asList(word.split("[^a-zA-Z']+")))) .apply(Filter.by((String word) -> !word.isEmpty())) .apply(Count.perElement()) .apply( MapElements.into(TypeDescriptors.strings()) .via( (KV<String, Long> wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())) .apply(TextIO.write().to("gs://your-output-bucket/and-output-prefix")); }
@Test public void testTypeDescriptorsTypeParameterOfErased() throws Exception { Generic<Integer, String> instance = TypeDescriptorsTest.typeErasedGeneric(); TypeDescriptor<Integer> fooT = extractFooT(instance); assertNotNull(fooT); // Using toString() assertions because verifying the contents of a Type is very cumbersome, // and the expected types can not be easily constructed directly. assertEquals("ActualFooT", fooT.toString()); assertEquals(strings(), extractBarT(instance)); TypeDescriptor<KV<Integer, String>> kvT = extractKV(instance); assertNotNull(kvT); assertThat(kvT.toString(), CoreMatchers.containsString("KV<ActualFooT, java.lang.String>")); } }
@Test public void testRegisterForType() throws NoSuchSchemaException { SchemaRegistry registry = SchemaRegistry.createDefault(); registry.registerSchemaForType( TypeDescriptors.strings(), STRING_SCHEMA, s -> Row.withSchema(STRING_SCHEMA).addValue(s).build(), r -> r.getString("string")); registry.registerSchemaForType( TypeDescriptors.integers(), INTEGER_SCHEMA, s -> Row.withSchema(INTEGER_SCHEMA).addValue(s).build(), r -> r.getInt32("integer")); tryGetters(registry); }