@Test public void testFieldDelimiterTextFile() throws Exception { TextFileConfig fileFormat = new TextFileConfig(); fileFormat.setFieldDelimiter("@"); String tableOptions = fileFormat.toTableOptions(); assertContains("fieldDelimiter => '@'", tableOptions); }
private static void runTests(HomeFileConf homeFileStore) throws Exception { // text file Path textFile = new Path(FileUtils.getResourceAsFile("/datasets/text/comma.txt").getAbsolutePath()); uploadFile(homeFileStore, textFile, "comma", "txt", new TextFileConfig().setFieldDelimiter(","), null); Path csvFile = new Path(FileUtils.getResourceAsFile("/datasets/csv/comma.csv").getAbsolutePath()); uploadFile(homeFileStore, csvFile, "comma1", "csv", new TextFileConfig().setFieldDelimiter(","), null); Path jsonFile = new Path(FileUtils.getResourceAsFile("/datasets/users.json").getAbsolutePath()); uploadFile(homeFileStore, jsonFile, "users", "json", new JsonFileConfig(), null); Path excelFile = new Path(FileUtils.getResourceAsFile("/testfiles/excel.xlsx").getAbsolutePath()); uploadFile(homeFileStore, excelFile, "excel", "xlsx", new ExcelFileConfig(), null); // query files runQuery("comma", 4, 3, null); runQuery("comma1", 4, 3, null); runQuery("users", 3, 2, null); runQuery("excel", 6, 5, null); // add file to folder FolderPath folderPath = new FolderPath(ImmutableList.of(HomeName.getUserHomePath(DEFAULT_USER_NAME).getName(), "testupload")); newNamespaceService().addOrUpdateFolder(folderPath.toNamespaceKey(), new FolderConfig() .setName("testupload") .setFullPathList(folderPath.toPathList())); uploadFile(homeFileStore, textFile, "comma", "txt", new TextFileConfig().setFieldDelimiter(","), folderPath); runQuery("comma", 4, 3, folderPath); }
textFileConfig.setComment(new Character((char) settings.getComment()).toString()); textFileConfig.setEscape(new Character((char) settings.getQuoteEscape()).toString()); textFileConfig.setFieldDelimiter(new Character((char) settings.getDelimiter()).toString()); textFileConfig.setQuote(new Character((char) settings.getQuote()).toString()); textFileConfig.setExtractHeader(settings.isHeaderExtractionEnabled());
@Test public void testCommaSeparatedTextFile() throws Exception { TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter(","); fileConfig.setLineDelimiter("\n"); fileConfig.setName("comma.txt"); String fileUrlPath = getUrlPath("/datasets/text/comma.txt"); String fileParentUrlPath = getUrlPath("/datasets/text/"); doc("preview data for source file"); JobDataFragment data = expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_preview" + fileUrlPath)).buildPost(Entity.json(fileConfig)), JobDataFragment.class); assertEquals(4, data.getReturnedRowCount()); assertEquals(3, data.getColumns().size()); checkCounts(fileParentUrlPath, "comma.txt", false /* false because we have not saved dataset yet */, 0, 0, 0); // previews are internal queries }
fileConfig.setFieldDelimiter(","); fileConfig.setName("comma.txt");
@Test public void testPreviewTinyAcqWithHeader() throws Exception { TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter("|"); fileConfig.setLineDelimiter("\n"); fileConfig.setName("tinyacq.txt"); fileConfig.setExtractHeader(true); String fileUrlPath = getUrlPath("/datasets/tinyacq.txt"); doc("preview data for source file"); JobDataFragment data = expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_preview/" + fileUrlPath)).buildPost(Entity.json(fileConfig)), JobDataFragment.class); assertEquals(23, data.getColumns().size()); assertEquals(getCurrentDremioDaemon().getBindingProvider().lookup(SabotContext.class).getOptionManager().getOption(FormatTools.TARGET_RECORDS), data.getReturnedRowCount()); }
@Test public void testCommaSeparatedCsvTrimHeader() throws Exception { TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter(","); // we set the wrong delimiter to test the header trimming fileConfig.setLineDelimiter("\n"); fileConfig.setTrimHeader(true); fileConfig.setExtractHeader(true); fileConfig.setName("comma_windows_lineseparator.csv"); String fileUrlPath = getUrlPath("/datasets/csv/comma_windows_lineseparator.csv"); JobDataFragment data = expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_preview/" + fileUrlPath)).buildPost(Entity.json(fileConfig)), JobDataFragment.class); assertEquals(3, data.getReturnedRowCount()); assertEquals(3, data.getColumns().size()); // the column name would be address\r if trimHeader was false assertEquals("address", data.getColumns().get(2).getName()); fileConfig.setTrimHeader(false); JobDataFragment data2 = expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_preview/" + fileUrlPath)).buildPost(Entity.json(fileConfig)), JobDataFragment.class); assertEquals(3, data2.getReturnedRowCount()); assertEquals(3, data2.getColumns().size()); // with header trimming turned off, we should see the \r character assertEquals("address\r", data2.getColumns().get(2).getName()); }
fileConfig.setFieldDelimiter("|"); fileConfig.setFieldDelimiter(","); fileConfig.setVersion(fileFormat1.getVersion()); fileFormat1 = FileFormat.getForFolder(fileConfig.asFileConfig());
@Test // DX-5410 public void formatChangeForUploadedHomeFile() throws Exception { FormDataMultiPart form = new FormDataMultiPart(); FormDataBodyPart fileBody = new FormDataBodyPart("file", FileUtils.getResourceAsFile("/datasets/csv/pipe.csv"), MediaType.MULTIPART_FORM_DATA_TYPE); form.bodyPart(fileBody); form.bodyPart(new FormDataBodyPart("fileName", "pipe")); doc("uploading a text file"); File file1 = expectSuccess(getBuilder(getAPIv2().path("home/" + HOME_NAME + "/upload_start/").queryParam("extension", "csv")) .buildPost(Entity.entity(form, form.getMediaType())), File.class); file1 = expectSuccess(getBuilder(getAPIv2().path("home/" + HOME_NAME + "/upload_finish/pipe")) .buildPost(Entity.json(file1.getFileFormat().getFileFormat())), File.class); final FileFormat defaultFileFormat = file1.getFileFormat().getFileFormat(); assertTrue(defaultFileFormat instanceof TextFileConfig); assertEquals(",", ((TextFileConfig)defaultFileFormat).getFieldDelimiter()); doc("change the format settings of uploaded file"); final TextFileConfig newFileFormat = (TextFileConfig)defaultFileFormat; newFileFormat.setFieldDelimiter("|"); final FileFormat updatedFileFormat = expectSuccess(getBuilder(getAPIv2().path("home/" + HOME_NAME + "/file_format/pipe")) .buildPut(Entity.json(newFileFormat)), FileFormatUI.class).getFileFormat(); assertTrue(updatedFileFormat instanceof TextFileConfig); assertEquals("|", ((TextFileConfig)updatedFileFormat).getFieldDelimiter()); }
@Test public void testQueryTinyAcqWithHeader() throws Exception { final JobsService jobsService = l(JobsService.class); TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter("|"); fileConfig.setLineDelimiter("\n"); fileConfig.setName("tinyacq.txt"); fileConfig.setExtractHeader(true); String fileUrlPath = getUrlPath("/datasets/tinyacq.txt"); expectSuccess(getBuilder(getAPIv2().path("/source/dacfs_test/file_format/" + fileUrlPath)).buildPut(Entity.json(fileConfig))); JobUI job = new JobUI(jobsService.submitJob(JobRequest.newBuilder() .setSqlQuery(createQuery("/datasets/tinyacq.txt")) .build(), NoOpJobStatusListener.INSTANCE)); JobDataFragment jobData = job.getData().truncate(500); assertEquals(23, jobData.getColumns().size()); assertEquals(500, jobData.getReturnedRowCount()); }
Files.write(FileUtils.getResourceAsString("/datasets/text/comma.txt"), tmpFile, UTF_8); Path textFile = new Path(tmpFile.getAbsolutePath()); TestHomeFiles.uploadFile(homeFileStore, textFile, "comma", "txt", new TextFileConfig().setFieldDelimiter(","), null);
final JobsService jobsService = l(JobsService.class); TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter(","); fileConfig.setName("comma_windows_lineseparator.csv"); String fileUrlPath = getUrlPath("/datasets/csv/comma_windows_lineseparator.csv");
final JobsService jobsService = l(JobsService.class); TextFileConfig fileConfig = new TextFileConfig(); fileConfig.setFieldDelimiter(","); fileConfig.setLineDelimiter("\n"); fileConfig.setName("comma.csv");
fileConfig.setFieldDelimiter("|"); fileConfig.setExtractHeader(true); fileConfig.setName("fff");