public TextFileConfig newMessage() { return new TextFileConfig(); }
public static FileFormat getEmptyConfig(FileType type) { switch (type) { case TEXT: case CSV: case TSV: case PSV: return new TextFileConfig(); case JSON: return new JsonFileConfig(); case PARQUET: return new ParquetFileConfig(); case AVRO: return new AvroFileConfig(); case HTTP_LOG: return new HttpLogFileConfig(); case EXCEL: return new ExcelFileConfig(); case XLS: return new XlsFileConfig(); case UNKNOWN: break; default: break; } return null; }
@Test public void testEscapeTextFile() throws Exception { TextFileConfig fileFormat = new TextFileConfig(); fileFormat.setEscape("\\"); String tableOptions = fileFormat.toTableOptions(); assertContains("\"escape\" => '\\", tableOptions); }
@Test public void testFieldDelimiterTextFile() throws Exception { TextFileConfig fileFormat = new TextFileConfig(); fileFormat.setFieldDelimiter("@"); String tableOptions = fileFormat.toTableOptions(); assertContains("fieldDelimiter => '@'", tableOptions); }
@Test public void testExtractHeaderTextFile() throws Exception { TextFileConfig fileFormat = new TextFileConfig(); fileFormat.setExtractHeader(true); String tableOptions = fileFormat.toTableOptions(); assertContains("extractHeader => true", tableOptions); }
@Test public void testCommentTextFile() throws Exception { TextFileConfig fileFormat = new TextFileConfig(); fileFormat.setComment("$"); String tableOptions = fileFormat.toTableOptions(); assertContains("comment => '$'", tableOptions); }
@Test public void testSingleQuoteTextFile() throws Exception { TextFileConfig fileFormat = new TextFileConfig(); fileFormat.setQuote("'"); String tableOptions = fileFormat.toTableOptions(); assertContains("quote => ''''", tableOptions); }
@Test public void testLineDelimiterTextFile() throws Exception { TextFileConfig fileFormat = new TextFileConfig(); fileFormat.setLineDelimiter("\t"); String tableOptions = fileFormat.toTableOptions(); assertContains("lineDelimiter => '\t'", tableOptions); }
@Test public void testLineDelimiterTextFileWithSingleQuote() throws Exception { TextFileConfig fileFormat = new TextFileConfig(); fileFormat.setLineDelimiter("'a"); String tableOptions = fileFormat.toTableOptions(); assertContains("lineDelimiter => '''a'", tableOptions); }
@Test public void testQuoteTextFile() throws Exception { TextFileConfig fileFormat = new TextFileConfig(); fileFormat.setQuote("\""); String tableOptions = fileFormat.toTableOptions(); assertContains("quote => '\"'", tableOptions); }
@Test public void testsetSkipFirstLineTextFile() throws Exception { TextFileConfig fileFormat = new TextFileConfig(); fileFormat.setSkipFirstLine(true); String tableOptions = fileFormat.toTableOptions(); assertContains("skipFirstLine => true", tableOptions); }
@Test public void testAutoGenerateColumnNamesTextFile() throws Exception { TextFileConfig fileFormat = new TextFileConfig(); fileFormat.setAutoGenerateColumnNames(false); String tableOptions = fileFormat.toTableOptions(); assertContains("autoGenerateColumnNames => false", tableOptions); }
private static void runTests(HomeFileConf homeFileStore) throws Exception { // text file Path textFile = new Path(FileUtils.getResourceAsFile("/datasets/text/comma.txt").getAbsolutePath()); uploadFile(homeFileStore, textFile, "comma", "txt", new TextFileConfig().setFieldDelimiter(","), null); Path csvFile = new Path(FileUtils.getResourceAsFile("/datasets/csv/comma.csv").getAbsolutePath()); uploadFile(homeFileStore, csvFile, "comma1", "csv", new TextFileConfig().setFieldDelimiter(","), null); Path jsonFile = new Path(FileUtils.getResourceAsFile("/datasets/users.json").getAbsolutePath()); uploadFile(homeFileStore, jsonFile, "users", "json", new JsonFileConfig(), null); Path excelFile = new Path(FileUtils.getResourceAsFile("/testfiles/excel.xlsx").getAbsolutePath()); uploadFile(homeFileStore, excelFile, "excel", "xlsx", new ExcelFileConfig(), null); // query files runQuery("comma", 4, 3, null); runQuery("comma1", 4, 3, null); runQuery("users", 3, 2, null); runQuery("excel", 6, 5, null); // add file to folder FolderPath folderPath = new FolderPath(ImmutableList.of(HomeName.getUserHomePath(DEFAULT_USER_NAME).getName(), "testupload")); newNamespaceService().addOrUpdateFolder(folderPath.toNamespaceKey(), new FolderConfig() .setName("testupload") .setFullPathList(folderPath.toPathList())); uploadFile(homeFileStore, textFile, "comma", "txt", new TextFileConfig().setFieldDelimiter(","), folderPath); runQuery("comma", 4, 3, folderPath); }
@Test public void testCommaSeparatedTextFile() throws Exception { TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter(","); fileConfig.setLineDelimiter("\n"); fileConfig.setName("comma.txt"); String fileUrlPath = getUrlPath("/datasets/text/comma.txt"); String fileParentUrlPath = getUrlPath("/datasets/text/"); doc("preview data for source file"); JobDataFragment data = expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_preview" + fileUrlPath)).buildPost(Entity.json(fileConfig)), JobDataFragment.class); assertEquals(4, data.getReturnedRowCount()); assertEquals(3, data.getColumns().size()); checkCounts(fileParentUrlPath, "comma.txt", false /* false because we have not saved dataset yet */, 0, 0, 0); // previews are internal queries }
@Test public void testCommaSeparatedCsvTrimHeader() throws Exception { TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter(","); // we set the wrong delimiter to test the header trimming fileConfig.setLineDelimiter("\n"); fileConfig.setTrimHeader(true); fileConfig.setExtractHeader(true); fileConfig.setName("comma_windows_lineseparator.csv"); String fileUrlPath = getUrlPath("/datasets/csv/comma_windows_lineseparator.csv"); JobDataFragment data = expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_preview/" + fileUrlPath)).buildPost(Entity.json(fileConfig)), JobDataFragment.class); assertEquals(3, data.getReturnedRowCount()); assertEquals(3, data.getColumns().size()); // the column name would be address\r if trimHeader was false assertEquals("address", data.getColumns().get(2).getName()); fileConfig.setTrimHeader(false); JobDataFragment data2 = expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_preview/" + fileUrlPath)).buildPost(Entity.json(fileConfig)), JobDataFragment.class); assertEquals(3, data2.getReturnedRowCount()); assertEquals(3, data2.getColumns().size()); // with header trimming turned off, we should see the \r character assertEquals("address\r", data2.getColumns().get(2).getName()); }
@Test public void testDefaultTextFileFormatOptions() throws Exception { TextFileConfig fileFormat = new TextFileConfig(); String tableOptions = fileFormat.toTableOptions(); assertContains("type => 'text'", tableOptions); assertContains("fieldDelimiter => ','", tableOptions); assertContains("comment => '#'", tableOptions); assertContains("\"escape\" => '\"'", tableOptions); assertContains("quote => '\"'", tableOptions); assertContains("lineDelimiter => '\r\n'", tableOptions); assertContains("extractHeader => false", tableOptions); assertContains("skipFirstLine => false", tableOptions); assertContains("autoGenerateColumnNames => true", tableOptions); }
@Test public void testPreviewTinyAcqWithHeader() throws Exception { TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter("|"); fileConfig.setLineDelimiter("\n"); fileConfig.setName("tinyacq.txt"); fileConfig.setExtractHeader(true); String fileUrlPath = getUrlPath("/datasets/tinyacq.txt"); doc("preview data for source file"); JobDataFragment data = expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_preview/" + fileUrlPath)).buildPost(Entity.json(fileConfig)), JobDataFragment.class); assertEquals(23, data.getColumns().size()); assertEquals(getCurrentDremioDaemon().getBindingProvider().lookup(SabotContext.class).getOptionManager().getOption(FormatTools.TARGET_RECORDS), data.getReturnedRowCount()); }
@Test public void testQueryTinyAcqWithHeader() throws Exception { final JobsService jobsService = l(JobsService.class); TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter("|"); fileConfig.setLineDelimiter("\n"); fileConfig.setName("tinyacq.txt"); fileConfig.setExtractHeader(true); String fileUrlPath = getUrlPath("/datasets/tinyacq.txt"); expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_format/" + fileUrlPath)).buildPut(Entity.json(fileConfig))); JobUI job = new JobUI(jobsService.submitJob(JobRequest.newBuilder() .setSqlQuery(createQuery("/datasets/tinyacq.txt")) .build(), NoOpJobStatusListener.INSTANCE)); JobDataFragment jobData = job.getData().truncate(500); assertEquals(23, jobData.getColumns().size()); assertEquals(500, jobData.getReturnedRowCount()); }
@Test public void testCommaSeparatedCsvWindowsLineEndings() throws Exception { final JobsService jobsService = l(JobsService.class); TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter(","); fileConfig.setName("comma_windows_lineseparator.csv");
@Test public void testCommaSeparatedCsv() throws Exception { final JobsService jobsService = l(JobsService.class); TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter(","); fileConfig.setLineDelimiter("\n");