@Test public void testCommaSeparatedTextFile() throws Exception { TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter(","); fileConfig.setLineDelimiter("\n"); fileConfig.setName("comma.txt"); String fileUrlPath = getUrlPath("/datasets/text/comma.txt"); String fileParentUrlPath = getUrlPath("/datasets/text/"); doc("preview data for source file"); JobDataFragment data = expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_preview" + fileUrlPath)).buildPost(Entity.json(fileConfig)), JobDataFragment.class); assertEquals(4, data.getReturnedRowCount()); assertEquals(3, data.getColumns().size()); checkCounts(fileParentUrlPath, "comma.txt", false /* false because we have not saved dataset yet */, 0, 0, 0); // previews are internal queries }
fileConfig.setName("comma.txt");
@Test public void testPreviewTinyAcqWithHeader() throws Exception { TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter("|"); fileConfig.setLineDelimiter("\n"); fileConfig.setName("tinyacq.txt"); fileConfig.setExtractHeader(true); String fileUrlPath = getUrlPath("/datasets/tinyacq.txt"); doc("preview data for source file"); JobDataFragment data = expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_preview/" + fileUrlPath)).buildPost(Entity.json(fileConfig)), JobDataFragment.class); assertEquals(23, data.getColumns().size()); assertEquals(getCurrentDremioDaemon().getBindingProvider().lookup(SabotContext.class).getOptionManager().getOption(FormatTools.TARGET_RECORDS), data.getReturnedRowCount()); }
@Test public void testCommaSeparatedCsvTrimHeader() throws Exception { TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter(","); // we set the wrong delimiter to test the header trimming fileConfig.setLineDelimiter("\n"); fileConfig.setTrimHeader(true); fileConfig.setExtractHeader(true); fileConfig.setName("comma_windows_lineseparator.csv"); String fileUrlPath = getUrlPath("/datasets/csv/comma_windows_lineseparator.csv"); JobDataFragment data = expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_preview/" + fileUrlPath)).buildPost(Entity.json(fileConfig)), JobDataFragment.class); assertEquals(3, data.getReturnedRowCount()); assertEquals(3, data.getColumns().size()); // the column name would be address\r if trimHeader was false assertEquals("address", data.getColumns().get(2).getName()); fileConfig.setTrimHeader(false); JobDataFragment data2 = expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_preview/" + fileUrlPath)).buildPost(Entity.json(fileConfig)), JobDataFragment.class); assertEquals(3, data2.getReturnedRowCount()); assertEquals(3, data2.getColumns().size()); // with header trimming turned off, we should see the \r character assertEquals("address\r", data2.getColumns().get(2).getName()); }
fileConfig.setName("fff"); fileConfig.setVersion(null);
@Test public void testQueryTinyAcqWithHeader() throws Exception { final JobsService jobsService = l(JobsService.class); TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter("|"); fileConfig.setLineDelimiter("\n"); fileConfig.setName("tinyacq.txt"); fileConfig.setExtractHeader(true); String fileUrlPath = getUrlPath("/datasets/tinyacq.txt"); expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_format/" + fileUrlPath)).buildPut(Entity.json(fileConfig))); JobUI job = new JobUI(jobsService.submitJob(JobRequest.newBuilder() .setSqlQuery(createQuery("/datasets/tinyacq.txt")) .build(), NoOpJobStatusListener.INSTANCE)); JobDataFragment jobData = job.getData().truncate(500); assertEquals(23, jobData.getColumns().size()); assertEquals(500, jobData.getReturnedRowCount()); }
TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter(","); fileConfig.setName("comma_windows_lineseparator.csv"); String fileUrlPath = getUrlPath("/datasets/csv/comma_windows_lineseparator.csv"); String fileParentUrlPath = getUrlPath("/datasets/csv/");
fileConfig.setFieldDelimiter(","); fileConfig.setLineDelimiter("\n"); fileConfig.setName("comma.csv"); String fileUrlPath = getUrlPath("/datasets/text/comma.txt"); String fileParentUrlPath = getUrlPath("/datasets/text/");
fileConfig.setFieldDelimiter("|"); fileConfig.setExtractHeader(true); fileConfig.setName("fff");