@Test public void testLineDelimiterTextFile() throws Exception { TextFileConfig fileFormat = new TextFileConfig(); fileFormat.setLineDelimiter("\t"); String tableOptions = fileFormat.toTableOptions(); assertContains("lineDelimiter => '\t'", tableOptions); }
@Test public void testLineDelimiterTextFileWithSingleQuote() throws Exception { TextFileConfig fileFormat = new TextFileConfig(); fileFormat.setLineDelimiter("'a"); String tableOptions = fileFormat.toTableOptions(); assertContains("lineDelimiter => '''a'", tableOptions); }
textFileConfig.setExtractHeader(settings.isHeaderExtractionEnabled()); textFileConfig.setSkipFirstLine(settings.isSkipFirstLine()); textFileConfig.setLineDelimiter(new String(settings.getNewLineDelimiter())); textFileConfig.setAutoGenerateColumnNames(settings.isAutoGenerateColumnNames()); textFileConfig.setTrimHeader(settings.isTrimHeader());
@Test public void testCommaSeparatedTextFile() throws Exception { TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter(","); fileConfig.setLineDelimiter("\n"); fileConfig.setName("comma.txt"); String fileUrlPath = getUrlPath("/datasets/text/comma.txt"); String fileParentUrlPath = getUrlPath("/datasets/text/"); doc("preview data for source file"); JobDataFragment data = expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_preview" + fileUrlPath)).buildPost(Entity.json(fileConfig)), JobDataFragment.class); assertEquals(4, data.getReturnedRowCount()); assertEquals(3, data.getColumns().size()); checkCounts(fileParentUrlPath, "comma.txt", false /* false because we have not saved dataset yet */, 0, 0, 0); // previews are internal queries }
@Test public void testCommaSeparatedCsvTrimHeader() throws Exception { TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter(","); // we set the wrong delimiter to test the header trimming fileConfig.setLineDelimiter("\n"); fileConfig.setTrimHeader(true); fileConfig.setExtractHeader(true); fileConfig.setName("comma_windows_lineseparator.csv"); String fileUrlPath = getUrlPath("/datasets/csv/comma_windows_lineseparator.csv"); JobDataFragment data = expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_preview/" + fileUrlPath)).buildPost(Entity.json(fileConfig)), JobDataFragment.class); assertEquals(3, data.getReturnedRowCount()); assertEquals(3, data.getColumns().size()); // the column name would be address\r if trimHeader was false assertEquals("address", data.getColumns().get(2).getName()); fileConfig.setTrimHeader(false); JobDataFragment data2 = expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_preview/" + fileUrlPath)).buildPost(Entity.json(fileConfig)), JobDataFragment.class); assertEquals(3, data2.getReturnedRowCount()); assertEquals(3, data2.getColumns().size()); // with header trimming turned off, we should see the \r character assertEquals("address\r", data2.getColumns().get(2).getName()); }
@Test public void testPreviewTinyAcqWithHeader() throws Exception { TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter("|"); fileConfig.setLineDelimiter("\n"); fileConfig.setName("tinyacq.txt"); fileConfig.setExtractHeader(true); String fileUrlPath = getUrlPath("/datasets/tinyacq.txt"); doc("preview data for source file"); JobDataFragment data = expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_preview/" + fileUrlPath)).buildPost(Entity.json(fileConfig)), JobDataFragment.class); assertEquals(23, data.getColumns().size()); assertEquals(getCurrentDremioDaemon().getBindingProvider().lookup(SabotContext.class).getOptionManager().getOption(FormatTools.TARGET_RECORDS), data.getReturnedRowCount()); }
textFileConfig.setLineDelimiter("\n"); dataset = createPDS(folder.getPath(), textFileConfig);
@Test public void testQueryTinyAcqWithHeader() throws Exception { final JobsService jobsService = l(JobsService.class); TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter("|"); fileConfig.setLineDelimiter("\n"); fileConfig.setName("tinyacq.txt"); fileConfig.setExtractHeader(true); String fileUrlPath = getUrlPath("/datasets/tinyacq.txt"); expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_format/" + fileUrlPath)).buildPut(Entity.json(fileConfig))); JobUI job = new JobUI(jobsService.submitJob(JobRequest.newBuilder() .setSqlQuery(createQuery("/datasets/tinyacq.txt")) .build(), NoOpJobStatusListener.INSTANCE)); JobDataFragment jobData = job.getData().truncate(500); assertEquals(23, jobData.getColumns().size()); assertEquals(500, jobData.getReturnedRowCount()); }
textFileConfig.setLineDelimiter("\n"); Dataset dataset = createPDS(folder.getPath(), textFileConfig);
TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter(","); fileConfig.setLineDelimiter("\n"); fileConfig.setName("comma.csv"); String fileUrlPath = getUrlPath("/datasets/text/comma.txt");