@Override public Table create(Schema schema, String tableIdentifier) { return create(schema, PartitionSpec.unpartitioned(), tableIdentifier); }
public Table create(Schema schema, String database, String table) { return create(schema, PartitionSpec.unpartitioned(), database, table); }
default Table create(Schema schema, String tableIdentifier) { return create(schema, PartitionSpec.unpartitioned(), ImmutableMap.of(), tableIdentifier); }
private Table createTable(Schema schema, File location) { HadoopTables tables = new HadoopTables(CONF); return tables.create(schema, PartitionSpec.unpartitioned(), location.toString()); }
@Test public void testFilterFilesUnpartitionedTable() { PartitionSpec spec = PartitionSpec.unpartitioned(); Table table = TestTables.create(tableDir, "test", schema, spec); testFilterFiles(table); }
@Test public void testRandomData() throws IOException { List<Record> expected = RandomGenericData.generate(SCHEMA, 1000, 435691832918L); File location = temp.newFolder(format.name()); Assert.assertTrue(location.delete()); Table table = TABLES.create(SCHEMA, PartitionSpec.unpartitioned(), ImmutableMap.of(TableProperties.DEFAULT_FILE_FORMAT, format.name()), location.toString()); AppendFiles append = table.newAppend(); int fileNum = 0; int recordsPerFile = 200; Iterator<Record> iter = expected.iterator(); while (iter.hasNext()) { Path path = new Path(location.toString(), format.addExtension("file-" + fileNum)); int numRecords; List<Record> records = Lists.newArrayList(); for (numRecords = 0; numRecords < recordsPerFile && iter.hasNext(); numRecords += 1) { records.add(iter.next()); } writeFile(location.toString(), format.addExtension("file-" + fileNum), records); append.appendFile(fromInputFile(HadoopInputFile.fromPath(path, CONF), numRecords)); fileNum += 1; } append.commit(); Set<Record> records = Sets.newHashSet(IcebergGenerics.read(table).build()); Assert.assertEquals("Should produce correct number of records", expected.size(), records.size()); Assert.assertEquals("Random record set should match", Sets.newHashSet(expected), records); }
Table table = tables.create(schema, PartitionSpec.unpartitioned(), location.toString()); DataFile file = DataFiles.builder(PartitionSpec.unpartitioned()) .withRecordCount(100) .withFileSizeInBytes(avroFile.length())
@Test public void testCreateTransactionConflict() throws IOException { File tableDir = temp.newFolder(); Assert.assertTrue(tableDir.delete()); Transaction t = TestTables.beginCreate(tableDir, "test_conflict", SCHEMA, SPEC); Assert.assertNull("Starting a create transaction should not commit metadata", TestTables.readMetadata("test_conflict")); Assert.assertNull("Should have no metadata version", TestTables.metadataVersion("test_conflict")); Table conflict = TestTables.create(tableDir, "test_conflict", SCHEMA, unpartitioned()); Assert.assertEquals("Table schema should match with reassigned IDs", assignFreshIds(SCHEMA).asStruct(), conflict.schema().asStruct()); Assert.assertEquals("Table spec should match conflict table, not transaction table", unpartitioned(), conflict.spec()); Assert.assertFalse("Table should not have any snapshots", conflict.snapshots().iterator().hasNext()); AssertHelpers.assertThrows("Transaction commit should fail", CommitFailedException.class, "Commit failed: table was updated", t::commitTransaction); }
@Test public void testCreateDetectsUncommittedChange() throws IOException { File tableDir = temp.newFolder(); Assert.assertTrue(tableDir.delete()); Transaction t = TestTables.beginCreate(tableDir, "uncommitted_change", SCHEMA, unpartitioned()); Assert.assertNull("Starting a create transaction should not commit metadata", TestTables.readMetadata("uncommitted_change")); Assert.assertNull("Should have no metadata version", TestTables.metadataVersion("uncommitted_change")); t.updateProperties().set("test-property", "test-value"); // not committed AssertHelpers.assertThrows("Should reject commit when last operation has not committed", IllegalStateException.class, "Cannot create new DeleteFiles: last operation has not committed", t::newDelete); }
@Test public void testCreateDetectsUncommittedChangeOnCommit() throws IOException { File tableDir = temp.newFolder(); Assert.assertTrue(tableDir.delete()); Transaction t = TestTables.beginCreate(tableDir, "uncommitted_change", SCHEMA, unpartitioned()); Assert.assertNull("Starting a create transaction should not commit metadata", TestTables.readMetadata("uncommitted_change")); Assert.assertNull("Should have no metadata version", TestTables.metadataVersion("uncommitted_change")); t.updateProperties().set("test-property", "test-value"); // not committed AssertHelpers.assertThrows("Should reject commit when last operation has not committed", IllegalStateException.class, "Cannot commit transaction: last operation has not committed", t::commitTransaction); }
@Test public void testCreateTransaction() throws IOException { File tableDir = temp.newFolder(); Assert.assertTrue(tableDir.delete()); Transaction t = TestTables.beginCreate(tableDir, "test_create", SCHEMA, unpartitioned()); Assert.assertNull("Starting a create transaction should not commit metadata", TestTables.readMetadata("test_create")); Assert.assertNull("Should have no metadata version", TestTables.metadataVersion("test_create")); t.commitTransaction(); TableMetadata meta = TestTables.readMetadata("test_create"); Assert.assertNotNull("Table metadata should be created after transaction commits", meta); Assert.assertEquals("Should have metadata version 0", 0, (int) TestTables.metadataVersion("test_create")); Assert.assertEquals("Should have 0 manifest files", 0, listManifestFiles(tableDir).size()); Assert.assertEquals("Table schema should match with reassigned IDs", assignFreshIds(SCHEMA).asStruct(), meta.schema().asStruct()); Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec()); Assert.assertEquals("Table should not have any snapshots", 0, meta.snapshots().size()); }
Assert.assertTrue(tableDir.delete()); Transaction t = TestTables.beginCreate(tableDir, "test_properties", SCHEMA, unpartitioned()); Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec()); Assert.assertEquals("Table should not have any snapshots", 0, meta.snapshots().size()); Assert.assertEquals("Should have one table property", 1, meta.properties().size());
Assert.assertTrue(tableDir.delete()); Transaction t = TestTables.beginCreate(tableDir, "test_properties", SCHEMA, unpartitioned()); Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec()); Assert.assertEquals("Table should not have any snapshots", 0, meta.snapshots().size()); Assert.assertEquals("Should have one table property", 1, meta.properties().size());
@Test public void testCreateAndAppendWithTransaction() throws IOException { File tableDir = temp.newFolder(); Assert.assertTrue(tableDir.delete()); Transaction t = TestTables.beginCreate(tableDir, "test_append", SCHEMA, unpartitioned()); Assert.assertNull("Starting a create transaction should not commit metadata", TestTables.readMetadata("test_append")); Assert.assertNull("Should have no metadata version", TestTables.metadataVersion("test_append")); t.newAppend() .appendFile(FILE_A) .appendFile(FILE_B) .commit(); Assert.assertNull("Appending in a transaction should not commit metadata", TestTables.readMetadata("test_append")); Assert.assertNull("Should have no metadata version", TestTables.metadataVersion("test_append")); t.commitTransaction(); TableMetadata meta = TestTables.readMetadata("test_append"); Assert.assertNotNull("Table metadata should be created after transaction commits", meta); Assert.assertEquals("Should have metadata version 0", 0, (int) TestTables.metadataVersion("test_append")); Assert.assertEquals("Should have 1 manifest file", 1, listManifestFiles(tableDir).size()); Assert.assertEquals("Table schema should match with reassigned IDs", assignFreshIds(SCHEMA).asStruct(), meta.schema().asStruct()); Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec()); Assert.assertEquals("Table should have one snapshot", 1, meta.snapshots().size()); validateSnapshot(null, meta.currentSnapshot(), FILE_A, FILE_B); }
Transaction replace = TestTables.beginReplace(tableDir, "test_append", SCHEMA, unpartitioned()); Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec()); Assert.assertEquals("Table should have one snapshot", 1, meta.snapshots().size());
@Test public void testReplaceWithUnpartitionedTable() throws IOException { File tableDir = temp.newFolder(); Assert.assertTrue(tableDir.delete()); Table unpartitioned = TestTables.create( tableDir, "unpartitioned", SCHEMA, PartitionSpec.unpartitioned()); Assert.assertEquals("Table version should be 0", 0, (long) TestTables.metadataVersion("unpartitioned")); unpartitioned.newAppend() .appendFile(FILE_A) .commit(); // make sure the data was successfully added Assert.assertEquals("Table version should be 1", 1, (long) TestTables.metadataVersion("unpartitioned")); validateSnapshot(null, TestTables.readMetadata("unpartitioned").currentSnapshot(), FILE_A); unpartitioned.newReplacePartitions() .addFile(FILE_B) .commit(); Assert.assertEquals("Table version should be 2", 2, (long) TestTables.metadataVersion("unpartitioned")); TableMetadata replaceMetadata = TestTables.readMetadata("unpartitioned"); long replaceId = replaceMetadata.currentSnapshot().snapshotId(); Assert.assertEquals("Table should have 2 manifests", 2, replaceMetadata.currentSnapshot().manifests().size()); validateManifestEntries(replaceMetadata.currentSnapshot().manifests().get(0), ids(replaceId), files(FILE_B), statuses(Status.ADDED)); validateManifestEntries(replaceMetadata.currentSnapshot().manifests().get(1), ids(replaceId), files(FILE_A), statuses(Status.DELETED)); }
@Test public void testReplaceAndMergeWithUnpartitionedTable() throws IOException { File tableDir = temp.newFolder(); Assert.assertTrue(tableDir.delete()); Table unpartitioned = TestTables.create( tableDir, "unpartitioned", SCHEMA, PartitionSpec.unpartitioned()); // ensure the overwrite results in a merge unpartitioned.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "1").commit(); Assert.assertEquals("Table version should be 1", 1, (long) TestTables.metadataVersion("unpartitioned")); unpartitioned.newAppend() .appendFile(FILE_A) .commit(); // make sure the data was successfully added Assert.assertEquals("Table version should be 2", 2, (long) TestTables.metadataVersion("unpartitioned")); validateSnapshot(null, TestTables.readMetadata("unpartitioned").currentSnapshot(), FILE_A); unpartitioned.newReplacePartitions() .addFile(FILE_B) .commit(); Assert.assertEquals("Table version should be 3", 3, (long) TestTables.metadataVersion("unpartitioned")); TableMetadata replaceMetadata = TestTables.readMetadata("unpartitioned"); long replaceId = replaceMetadata.currentSnapshot().snapshotId(); Assert.assertEquals("Table should have 1 manifest", 1, replaceMetadata.currentSnapshot().manifests().size()); validateManifestEntries(replaceMetadata.currentSnapshot().manifests().get(0), ids(replaceId, replaceId), files(FILE_B, FILE_A), statuses(Status.ADDED, Status.DELETED)); }
@Test public void testReplaceWithNewPartitionSpec() { PartitionSpec newSpec = PartitionSpec.unpartitioned(); Snapshot start = table.currentSnapshot(); Schema schema = table.schema(); table.newAppend() .appendFile(FILE_A) .commit(); Assert.assertEquals("Version should be 1", 1L, (long) version()); validateSnapshot(start, table.currentSnapshot(), FILE_A); Transaction replace = TestTables.beginReplace(tableDir, "test", table.schema(), newSpec); replace.commitTransaction(); table.refresh(); Assert.assertEquals("Version should be 2", 2L, (long) version()); Assert.assertNull("Table should not have a current snapshot", table.currentSnapshot()); Assert.assertEquals("Schema should use new schema, not compatible with previous", schema.asStruct(), table.schema().asStruct()); Assert.assertEquals("Table should have new unpartitioned spec", 0, table.spec().fields().size()); }
@Test public void testReplaceTransaction() { Schema newSchema = new Schema( required(4, "id", Types.IntegerType.get()), required(5, "data", Types.StringType.get())); Snapshot start = table.currentSnapshot(); Schema schema = table.schema(); table.newAppend() .appendFile(FILE_A) .commit(); Assert.assertEquals("Version should be 1", 1L, (long) version()); validateSnapshot(start, table.currentSnapshot(), FILE_A); Transaction replace = TestTables.beginReplace(tableDir, "test", newSchema, unpartitioned()); replace.commitTransaction(); table.refresh(); Assert.assertEquals("Version should be 2", 2L, (long) version()); Assert.assertNull("Table should not have a current snapshot", table.currentSnapshot()); Assert.assertEquals("Schema should match previous schema", schema.asStruct(), table.schema().asStruct()); Assert.assertEquals("Partition spec should have no fields", 0, table.spec().fields().size()); }
@Test public void testReplaceWithIncompatibleSchemaUpdate() { Schema newSchema = new Schema( required(4, "obj_id", Types.IntegerType.get())); Snapshot start = table.currentSnapshot(); table.newAppend() .appendFile(FILE_A) .commit(); Assert.assertEquals("Version should be 1", 1L, (long) version()); validateSnapshot(start, table.currentSnapshot(), FILE_A); Transaction replace = TestTables.beginReplace(tableDir, "test", newSchema, unpartitioned()); replace.commitTransaction(); table.refresh(); Assert.assertEquals("Version should be 2", 2L, (long) version()); Assert.assertNull("Table should not have a current snapshot", table.currentSnapshot()); Assert.assertEquals("Schema should use new schema, not compatible with previous", new Schema(required(1, "obj_id", Types.IntegerType.get())).asStruct(), table.schema().asStruct()); }