@Override public int getOperatorType() { return EasyGroupScan.getEasyScanOperatorType(fileConfig.getType()); } }
private static boolean isParquetDataset(TableMetadata datasetPointer) { return datasetPointer.getFormatSettings().getType() == FileType.PARQUET; } }
@Override public int getOperatorType() { return getEasyScanOperatorType(dataset.getFormatSettings().getType()); }
private Collection<FsPermissionTask> getSplitPermissiomTasks(DatasetConfig datasetConfig, FileSystemWrapper userFs, String user) { final SplitsPointer splitsPointer = DatasetSplitsPointer.of(context.getNamespaceService(user), datasetConfig); final boolean isParquet = datasetConfig.getPhysicalDataset().getFormatSettings().getType() == FileType.PARQUET; final List<FsPermissionTask> fsPermissionTasks = Lists.newArrayList(); final List<Path> batch = Lists.newArrayList(); for (DatasetSplit split: splitsPointer.getSplitIterable()) { final Path filePath; if (isParquet) { filePath = new Path(PARQUET_DATASET_SPLIT_XATTR_SERIALIZER.revert(split.getExtendedProperty().toByteArray()).getPath()); } else { filePath = new Path(EASY_DATASET_SPLIT_XATTR_SERIALIZER.revert(split.getExtendedProperty().toByteArray()).getPath()); } batch.add(filePath); if (batch.size() == PERMISSION_CHECK_TASK_BATCH_SIZE) { // make a copy of batch fsPermissionTasks.add(new FsPermissionTask(userFs, new ArrayList<>(batch), FsAction.READ)); batch.clear(); } } if (!batch.isEmpty()) { fsPermissionTasks.add(new FsPermissionTask(userFs, batch, FsAction.READ)); } return fsPermissionTasks; }
@Override public boolean matches(RelOptRuleCall call) { FilesystemScanDrel scan = call.rel(scanIndex); if(scan.getFilter() != null){ return false; } // we only support accurate counts when using Parquet, everything else is executed normally. return scan.getPluginId().getType().equals(type) && scan.getTableMetadata().getFormatSettings().getType() == FileType.PARQUET; }
@GET @Path("file/{path: .*}") @Produces(MediaType.APPLICATION_JSON) public File getFile(@PathParam("path") String path) throws Exception { FilePath filePath = FilePath.fromURLPath(homeName, path); try { final DatasetConfig datasetConfig = namespaceService.getDataset(filePath.toNamespaceKey()); final FileConfig fileConfig = toFileConfig(datasetConfig); final File file = newFile( datasetConfig.getId().getId(), filePath, FileFormat.getForFile(fileConfig), datasetService.getJobsCount(filePath.toNamespaceKey()), false, true, fileConfig.getType() != FileType.UNKNOWN, DatasetType.PHYSICAL_DATASET_HOME_FILE ); return file; } catch (NamespaceNotFoundException nfe) { throw new FileNotFoundException(filePath, nfe); } }
public File getFileDataset(SourceName source, final SourceFilePath filePath, String owner) throws PhysicalDatasetNotFoundException, NamespaceException { final PhysicalDatasetConfig physicalDatasetConfig = getFilesystemPhysicalDataset(filePath, DatasetType.PHYSICAL_DATASET_SOURCE_FILE); final FileConfig fileConfig = physicalDatasetConfig.getFormatSettings(); fileConfig.setOwner(owner); fileConfig.setTag(physicalDatasetConfig.getTag()); final File file = File.newInstance(physicalDatasetConfig.getId(), filePath, FileFormat.getForFile(fileConfig), datasetService.getJobsCount(filePath.toNamespaceKey()), false, false, fileConfig.getType() != FileType.UNKNOWN, null ); return file; }
private static FileFormat get(FileConfig fileConfig) { // TODO (Amit H) Remove after defining classes for tsv, csv, and psv FileType fileType = fileConfig.getType(); if (fileType == FileType.CSV || fileType == FileType.TSV || fileType == FileType.PSV) { fileType = FileType.TEXT; } final Class<? extends FileFormat> fileFormatClass = FileFormatDefinitions.CLASS_TYPES.get(fileType); final Schema<FileFormat> schema = (Schema<FileFormat>) FileFormatDefinitions.SCHEMAS.get(fileFormatClass); final FileFormat fileFormat = schema.newMessage(); if (fileConfig.getExtendedConfig() != null) { ProtobufIOUtil.mergeFrom(fileConfig.getExtendedConfig().toByteArray(), fileFormat, schema); } fileFormat.setCtime(fileConfig.getCtime()); fileFormat.setName(fileConfig.getName()); fileFormat.setOwner(fileConfig.getOwner()); fileFormat.setFullPath(fileConfig.getFullPathList()); fileFormat.setVersion(fileConfig.getTag()); fileFormat.setLocation(fileConfig.getLocation()); return fileFormat; }
assert extensions != null : "TextFormatConfig.extensions should never be null"; switch (fileConfig.getType()) { case TEXT: case CSV:
assertEquals(fileConfig.getFieldDelimiter(), format1.getFieldDelimiter()); assertEquals(fileConfig.getExtractHeader(), format1.getExtractHeader()); assertEquals(fileConfig.asFileConfig().getType(), format1.asFileConfig().getType()); assertEquals(fileConfig.asFileConfig().getOwner(), format1.asFileConfig().getOwner());
fileConfig.setTag(physicalDatasetConfig.getTag()); addFolderTableToNamespaceTree(ns, folderPath, folderConfig, FileFormat.getForFolder(fileConfig), fileConfig.getType() != FileType.UNKNOWN);