@Override protected Object get(Record struct, int pos) { return struct.get(pos); } }
@Override protected void set(Record struct, int pos, Object value) { struct.set(pos, value); } }
@Test public void testFullProjection() throws Exception { Schema schema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = GenericRecord.create(schema.asStruct()); record.setField("id", 34L); record.setField("data", "test"); Record projected = writeAndRead("full_projection", schema, schema, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id")); int cmp = Comparators.charSequences() .compare("test", (CharSequence) projected.getField("data")); Assert.assertTrue("Should contain the correct data value", cmp == 0); }
@Test public void testReorderedFullProjection() throws Exception { Schema schema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = GenericRecord.create(schema.asStruct()); record.setField("id", 34L); record.setField("data", "test"); Schema reordered = new Schema( Types.NestedField.optional(1, "data", Types.StringType.get()), Types.NestedField.required(0, "id", Types.LongType.get()) ); Record projected = writeAndRead("full_projection", schema, reordered, record); Assert.assertEquals("Should contain the correct 0 value", "test", projected.get(0).toString()); Assert.assertEquals("Should contain the correct 1 value", 34L, projected.get(1)); }
@Test public void testProject() { Iterable<Record> results = IcebergGenerics.read(sharedTable).select("id").build(); Set<Long> expected = Sets.newHashSet(); expected.addAll(Lists.transform(file1Records, record -> (Long) record.getField("id"))); expected.addAll(Lists.transform(file2Records, record -> (Long) record.getField("id"))); expected.addAll(Lists.transform(file3Records, record -> (Long) record.getField("id"))); results.forEach(record -> Assert.assertEquals("Record should have one projected field", 1, record.size())); Assert.assertEquals("Should project only id columns", expected, Sets.newHashSet(transform(results, record -> (Long) record.getField("id")))); }
record.copy(ImmutableMap.of("id", 0L, "data", "clarification")), record.copy(ImmutableMap.of("id", 1L, "data", "risky")), record.copy(ImmutableMap.of("id", 2L, "data", "falafel")) ); InputFile file1 = writeFile(sharedTableLocation, format.addExtension("file-1"), file1Records); Record nullData = record.copy(); nullData.setField("id", 11L); nullData.setField("data", null); record.copy(ImmutableMap.of("id", 10L, "data", "clammy")), record.copy(ImmutableMap.of("id", 11L, "data", "evacuate")), record.copy(ImmutableMap.of("id", 12L, "data", "tissue")) ); InputFile file2 = writeFile(sharedTableLocation, format.addExtension("file-2"), file2Records); record.copy(ImmutableMap.of("id", 20L, "data", "ocean")), record.copy(ImmutableMap.of("id", 21L, "data", "holistic")), record.copy(ImmutableMap.of("id", 22L, "data", "preventative")) ); InputFile file3 = writeFile(sharedTableLocation, format.addExtension("file-3"), file3Records);
@Test public void testFilter() { Iterable<Record> result = IcebergGenerics.read(sharedTable).where(lessThan("id", 3)).build(); Assert.assertEquals("Records should match file 1", Sets.newHashSet(file1Records), Sets.newHashSet(result)); result = IcebergGenerics.read(sharedTable).where(lessThanOrEqual("id", 1)).build(); Assert.assertEquals("Records should match file 1 without id 2", Sets.newHashSet(filter(file1Records, r -> (Long) r.getField("id") <= 1)), Sets.newHashSet(result)); }
@Test public void testSchemaEvolution() throws Exception { List<ByteBuffer> buffers = Lists.newArrayList(); List<Record> records = Ordering.usingToString().sortedCopy( Iterables.concat(V1_RECORDS, V2_RECORDS)); MessageEncoder<Record> v1Encoder = new IcebergEncoder<>(SCHEMA_V1); MessageEncoder<Record> v2Encoder = new IcebergEncoder<>(SCHEMA_V2); for (Record record : records) { if (record.struct() == SCHEMA_V1.asStruct()) { buffers.add(v1Encoder.encode(record)); } else { buffers.add(v2Encoder.encode(record)); } } Set<Record> allAsV2 = Sets.newHashSet(V2_RECORDS); allAsV2.add(v2Record(1L, "m-1", null)); allAsV2.add(v2Record(2L, "m-2", null)); allAsV2.add(v2Record(4L, "m-4", null)); allAsV2.add(v2Record(6L, "m-6", null)); IcebergDecoder<Record> v2Decoder = new IcebergDecoder<>(SCHEMA_V2); v2Decoder.addSchema(SCHEMA_V1); Set<Record> decodedUsingV2 = Sets.newHashSet(); for (ByteBuffer buffer : buffers) { decodedUsingV2.add(v2Decoder.decode(buffer)); } Assert.assertEquals(allAsV2, decodedUsingV2); }
@Test public void testListProjection() throws IOException { Schema writeSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(10, "values", Types.ListType.ofOptional(11, Types.LongType.get())) ); List<Long> values = ImmutableList.of(56L, 57L, 58L); Record record = GenericRecord.create(writeSchema.asStruct()); record.setField("id", 34L); record.setField("values", values); Schema idOnly = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()) ); Record projected = writeAndRead("id_only", writeSchema, idOnly, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id")); Assert.assertNull("Should not project values list", projected.getField("values")); Schema elementOnly = writeSchema.select("values.element"); projected = writeAndRead("element_only", writeSchema, elementOnly, record); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertEquals("Should project entire list", values, projected.getField("values")); Schema listOnly = writeSchema.select("values"); projected = writeAndRead("list_only", writeSchema, listOnly, record); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertEquals("Should project entire list", values, projected.getField("values")); }
@Test public void testReorderedProjection() throws Exception { Schema schema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = GenericRecord.create(schema.asStruct()); record.setField("id", 34L); record.setField("data", "test"); Schema reordered = new Schema( Types.NestedField.optional(2, "missing_1", Types.StringType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()), Types.NestedField.optional(3, "missing_2", Types.LongType.get()) ); Record projected = writeAndRead("full_projection", schema, reordered, record); Assert.assertNull("Should contain the correct 0 value", projected.get(0)); Assert.assertEquals("Should contain the correct 1 value", "test", projected.get(1).toString()); Assert.assertNull("Should contain the correct 2 value", projected.get(2)); }
@Test public void testProjectWithMissingFilterColumn() { Iterable<Record> results = IcebergGenerics.read(sharedTable) .where(Expressions.greaterThanOrEqual("id", 1)) .where(Expressions.lessThan("id", 21)) .select("data").build(); Set<String> expected = Sets.newHashSet(); for (Record record : concat(file1Records, file2Records, file3Records)) { Long id = (Long) record.getField("id"); if (id >= 1 && id < 21) { expected.add(record.getField("data").toString()); } } results.forEach(record -> Assert.assertEquals("Record should have two projected fields", 2, record.size())); Assert.assertEquals("Should project correct rows", expected, Sets.newHashSet(transform(results, record -> record.getField("data").toString()))); }
record.setField("id", 34L); Record p1 = GenericRecord.create(writeSchema.findType("points").asListType().elementType().asStructType()); p1.setField("x", 1); p1.setField("y", 2); Record p2 = GenericRecord.create(writeSchema.findType("points").asListType().elementType().asStructType()); p2.setField("x", 3); p2.setField("y", null); record.setField("points", ImmutableList.of(p1, p2)); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id")); Assert.assertNull("Should not project points list", projected.getField("points")); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertEquals("Should project points list", record.getField("points"), projected.getField("points")); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertNotNull("Should project points list", projected.getField("points")); List<Record> points = (List<Record>) projected.getField("points"); Assert.assertEquals("Should read 2 points", 2, points.size()); Record projectedP1 = points.get(0); Assert.assertEquals("Should project x", 1, (int) projectedP1.getField("x")); Assert.assertNull("Should not project y", projectedP1.getField("y")); Record projectedP2 = points.get(1); Assert.assertEquals("Should project x", 3, (int) projectedP2.getField("x")); Assert.assertNull("Should not project y", projectedP2.getField("y")); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertNotNull("Should project points list", projected.getField("points"));
@Test public void testEmptyProjection() throws Exception { Schema schema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = GenericRecord.create(schema.asStruct()); record.setField("id", 34L); record.setField("data", "test"); Record projected = writeAndRead("empty_projection", schema, schema.select(), record); Assert.assertNotNull("Should read a non-null record", projected); try { projected.get(0); Assert.fail("Should not retrieve value with ordinal 0"); } catch (ArrayIndexOutOfBoundsException e) { // this is expected because there are no values } }
@Override protected Object get(Record struct, int index) { return struct.get(index); } }
@Override protected void set(Record struct, int pos, Object value) { struct.set(pos, value); } }
record.setField("id", 34L); Record location = GenericRecord.create(writeSchema.findType("location").asStructType()); location.setField("lat", 52.995143f); location.setField("long", -1.539054f); record.setField("location", location); Record projectedLocation = (Record) projected.getField("location"); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id")); Assert.assertNull("Should not project location", projectedLocation); projectedLocation = (Record) projected.getField("location"); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertNotNull("Should project location", projected.getField("location")); Assert.assertNull("Should not project longitude", projectedLocation.getField("long")); Assert.assertEquals("Should project latitude", 52.995143f, (float) projectedLocation.getField("lat"), 0.000001f); projectedLocation = (Record) projected.getField("location"); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertNotNull("Should project location", projected.getField("location")); Assert.assertNull("Should not project latitutde", projectedLocation.getField("lat")); Assert.assertEquals("Should project longitude", -1.539054f, (float) projectedLocation.getField("long"), 0.000001f); projectedLocation = (Record) projected.getField("location"); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertNotNull("Should project location", projected.getField("location")); Assert.assertEquals("Should project latitude", 52.995143f, (float) projectedLocation.getField("lat"), 0.000001f);
@Override @SuppressWarnings("unchecked") protected Object getField(Record intermediate, int pos) { return intermediate.get(pos); }
@Override public Record struct(Types.StructType struct, Iterable<Object> fieldResults) { Record rec = GenericRecord.create(struct); List<Object> values = Lists.newArrayList(fieldResults); for (int i = 0; i < values.size(); i += 1) { rec.set(i, values.get(i)); } return rec; }