@Override protected List<MatchResult> match(List<String> specs) { ImmutableList.Builder<MatchResult> resultsBuilder = ImmutableList.builder(); for (String spec : specs) { try { FileStatus[] fileStatuses = fileSystem.globStatus(new Path(spec)); if (fileStatuses == null) { resultsBuilder.add(MatchResult.create(Status.NOT_FOUND, Collections.emptyList())); continue; } List<Metadata> metadata = new ArrayList<>(); for (FileStatus fileStatus : fileStatuses) { if (fileStatus.isFile()) { URI uri = dropEmptyAuthority(fileStatus.getPath().toUri().toString()); metadata.add( Metadata.builder() .setResourceId(new HadoopResourceId(uri)) .setIsReadSeekEfficient(true) .setSizeBytes(fileStatus.getLen()) .build()); } } resultsBuilder.add(MatchResult.create(Status.OK, metadata)); } catch (IOException e) { resultsBuilder.add(MatchResult.create(Status.ERROR, e)); } } return resultsBuilder.build(); }
@ProcessElement public void process(ProcessContext c) { MatchResult.Metadata metadata = c.element(); if (metadata.resourceId().isDirectory()) { switch (spec.getDirectoryTreatment()) { case SKIP: return; case PROHIBIT: throw new IllegalArgumentException( "Trying to read " + metadata.resourceId() + " which is a directory"); default: throw new UnsupportedOperationException( "Unknown DirectoryTreatment: " + spec.getDirectoryTreatment()); } } Compression compression = (spec.getCompression() == Compression.AUTO) ? Compression.detect(metadata.resourceId().getFilename()) : spec.getCompression(); c.output( new ReadableFile( MatchResult.Metadata.builder() .setResourceId(metadata.resourceId()) .setSizeBytes(metadata.sizeBytes()) .setIsReadSeekEfficient( metadata.isReadSeekEfficient() && compression == Compression.UNCOMPRESSED) .build(), compression)); } }
private static MatchResult.Metadata createBeamMetadata( S3ResourceId path, String contentEncoding) { checkArgument(path.getSize().isPresent(), "path has size"); checkNotNull(contentEncoding, "contentEncoding"); boolean isReadSeekEfficient = !NON_READ_SEEK_EFFICIENT_ENCODINGS.contains(contentEncoding); return MatchResult.Metadata.builder() .setIsReadSeekEfficient(isReadSeekEfficient) .setResourceId(path) .setSizeBytes(path.getSize().get()) .build(); }
private static MatchResult.Metadata metadata(Path path, int size) { return MatchResult.Metadata.builder() .setResourceId(FileSystems.matchNewResource(path.toString(), false /* isDirectory */)) .setIsReadSeekEfficient(true) .setSizeBytes(size) .build(); }
private Metadata toMetadata(StorageObject storageObject) { // TODO: Address https://issues.apache.org/jira/browse/BEAM-1494 // It is incorrect to set IsReadSeekEfficient true for files with content encoding set to gzip. Metadata.Builder ret = Metadata.builder() .setIsReadSeekEfficient(true) .setResourceId(GcsResourceId.fromGcsPath(GcsPath.fromObject(storageObject))); BigInteger size = firstNonNull(storageObject.getSize(), BigInteger.ZERO); ret.setSizeBytes(size.longValue()); return ret.build(); }
private Metadata toMetadata(File file) { return Metadata.builder() .setResourceId(LocalResourceId.fromPath(file.toPath(), file.isDirectory())) .setIsReadSeekEfficient(true) .setSizeBytes(file.length()) .build(); } }
@Override public Metadata decode(InputStream is) throws IOException { ResourceId resourceId = RESOURCE_ID_CODER.decode(is); boolean isReadSeekEfficient = INT_CODER.decode(is) == 1; long sizeBytes = LONG_CODER.decode(is); return Metadata.builder() .setResourceId(resourceId) .setIsReadSeekEfficient(isReadSeekEfficient) .setSizeBytes(sizeBytes) .build(); }