org.apache.beam.sdk.io.fs.MatchResult$Metadata java code examples

  FileSystems.match(Collections.singletonList(pattern)).get(0).metadata();
for (Metadata meta : metadata) {
 outputFiles.add(new File(meta.resourceId().toString()));

@Override
protected List<MatchResult> match(List<String> specs) {
 ImmutableList.Builder<MatchResult> resultsBuilder = ImmutableList.builder();
 for (String spec : specs) {
  try {
   FileStatus[] fileStatuses = fileSystem.globStatus(new Path(spec));
   if (fileStatuses == null) {
    resultsBuilder.add(MatchResult.create(Status.NOT_FOUND, Collections.emptyList()));
    continue;
   }
   List<Metadata> metadata = new ArrayList<>();
   for (FileStatus fileStatus : fileStatuses) {
    if (fileStatus.isFile()) {
     URI uri = dropEmptyAuthority(fileStatus.getPath().toUri().toString());
     metadata.add(
       Metadata.builder()
         .setResourceId(new HadoopResourceId(uri))
         .setIsReadSeekEfficient(true)
         .setSizeBytes(fileStatus.getLen())
         .build());
    }
   }
   resultsBuilder.add(MatchResult.create(Status.OK, metadata));
  } catch (IOException e) {
   resultsBuilder.add(MatchResult.create(Status.ERROR, e));
  }
 }
 return resultsBuilder.build();
}

List<FileBasedSource<T>> splitResults = new ArrayList<>(expandedFiles.size());
for (Metadata metadata : expandedFiles) {
 FileBasedSource<T> split = createForSubrangeOfFile(metadata, 0, metadata.sizeBytes());
 verify(
   split.getMode() == Mode.SINGLE_FILE_OR_SUBRANGE,

.satisfies(
  input -> {
   assertEquals(path, input.getMetadata().resourceId().toString());
   assertEquals("Hello world".length(), input.getMetadata().sizeBytes());
   assertEquals(Compression.UNCOMPRESSED, input.getCompression());
   assertTrue(input.getMetadata().isReadSeekEfficient());
   try {
    assertEquals("Hello world", input.readFullyAsUTF8String());
.satisfies(
  input -> {
   assertEquals(pathGZ, input.getMetadata().resourceId().toString());
   assertFalse(input.getMetadata().sizeBytes() == "Hello world".length());
   assertEquals(Compression.GZIP, input.getCompression());
   assertFalse(input.getMetadata().isReadSeekEfficient());
   try {
    assertEquals("Hello world", input.readFullyAsUTF8String());

    FileSystems.match(Collections.singletonList(tempDir.toString() + "*")));
for (Metadata matchResult : singleMatch.metadata()) {
 if (allMatches.add(matchResult.resourceId())) {
  LOG.info("Will also remove unknown temporary file {}", matchResult.resourceId());

long srcSize = srcMeta.sizeBytes();
boolean shouldDownload = true;
if (Files.exists(dst)) {

List<MatchResult> matches = FileSystems.match(Collections.singletonList(pattern));
for (Metadata expectedFile : Iterables.getOnlyElement(matches).metadata()) {
 expectedFiles.add(new File(expectedFile.resourceId().toString()));

List<String> files = new ArrayList<>(strs.size());
for (Metadata match : matches) {
 String filename = match.resourceId().toString();
 files.add(filename);
 CharBuffer buf = CharBuffer.allocate((int) new File(filename).length());

.metadata()
.stream()
.filter(metadata -> metadata.resourceId().getFilename().endsWith(".js"))
.map(Metadata::resourceId)
.map(

@Override
public final BoundedReader<T> createReader(PipelineOptions options) throws IOException {
 // Validate the current source prior to creating a reader for it.
 this.validate();
 String fileOrPattern = fileOrPatternSpec.get();
 if (mode == Mode.FILEPATTERN) {
  long startTime = System.currentTimeMillis();
  List<Metadata> fileMetadata =
    FileSystems.match(fileOrPattern, emptyMatchTreatment).metadata();
  LOG.info("Matched {} files for pattern {}", fileMetadata.size(), fileOrPattern);
  List<FileBasedReader<T>> fileReaders = new ArrayList<>();
  for (Metadata metadata : fileMetadata) {
   long endOffset = metadata.sizeBytes();
   fileReaders.add(
     createForSubrangeOfFile(metadata, 0, endOffset).createSingleFileReader(options));
  }
  LOG.debug(
    "Creating a reader for file pattern {} took {} ms",
    fileOrPattern,
    System.currentTimeMillis() - startTime);
  if (fileReaders.size() == 1) {
   return fileReaders.get(0);
  }
  return new FilePatternReader(this, fileReaders);
 } else {
  return createSingleFileReader(options);
 }
}

 @ProcessElement
 public void processElement(ProcessContext context) {
  ResourceId inputFile = context.element().resourceId();
  Compression compression = compressionValue.get();
  // Add the compression extension to the output filename. Example: demo.txt -> demo.txt.gz
  String outputFilename = inputFile.getFilename() + compression.getSuggestedSuffix();
  // Resolve the necessary resources to perform the transfer
  ResourceId outputDir = FileSystems.matchNewResource(destinationLocation.get(), true);
  ResourceId outputFile =
    outputDir.resolve(outputFilename, StandardResolveOptions.RESOLVE_FILE);
  ResourceId tempFile =
    outputDir.resolve("temp-" + outputFilename, StandardResolveOptions.RESOLVE_FILE);
  // Perform the copy of the compressed channel to the destination.
  try (ReadableByteChannel readerChannel = FileSystems.open(inputFile)) {
   try (WritableByteChannel writerChannel =
     compression.writeCompressed(FileSystems.create(tempFile, MimeTypes.BINARY))) {
    // Execute the copy to the temporary file
    ByteStreams.copy(readerChannel, writerChannel);
   }
   // Rename the temporary file to the output file
   FileSystems.rename(ImmutableList.of(tempFile), ImmutableList.of(outputFile));
   // Output the path to the uncompressed file
   context.output(outputFile.toString());
  } catch (IOException e) {
   LOG.error("Error occurred during compression of {}", inputFile.toString(), e);
   context.output(DEADLETTER_TAG, KV.of(inputFile.toString(), e.getMessage()));
  }
 }
}

/**
 * Create a {@code FileBasedSource} based on a single file. This constructor must be used when
 * creating a new {@code FileBasedSource} for a subrange of a single file. Additionally, this
 * constructor must be used to create new {@code FileBasedSource}s when subclasses implement the
 * method {@link #createForSubrangeOfFile}.
 *
 * <p>See {@link OffsetBasedSource} for detailed descriptions of {@code minBundleSize}, {@code
 * startOffset}, and {@code endOffset}.
 *
 * @param fileMetadata specification of the file represented by the {@link FileBasedSource}, in
 *     suitable form for use with {@link FileSystems#match(List)}.
 * @param minBundleSize minimum bundle size in bytes.
 * @param startOffset starting byte offset.
 * @param endOffset ending byte offset. If the specified value {@code >= #getMaxEndOffset()} it
 *     implies {@code #getMaxEndOffSet()}.
 */
protected FileBasedSource(
  Metadata fileMetadata, long minBundleSize, long startOffset, long endOffset) {
 super(startOffset, endOffset, minBundleSize);
 mode = Mode.SINGLE_FILE_OR_SUBRANGE;
 this.singleFileMetadata = checkNotNull(fileMetadata, "fileMetadata");
 this.fileOrPatternSpec = StaticValueProvider.of(fileMetadata.resourceId().toString());
 // This field will be unused in this mode.
 this.emptyMatchTreatment = EmptyMatchTreatment.DISALLOW;
}

@Override
protected void startReading(ReadableByteChannel channel) throws IOException {
 try {
  metadata = readMetadataFromFile(getCurrentSource().getSingleFileMetadata().resourceId());
 } catch (IOException e) {
  throw new RuntimeException(
    "Error reading metadata from file " + getCurrentSource().getSingleFileMetadata(), e);
 }
 long startOffset = getCurrentSource().getStartOffset();
 byte[] syncMarker = metadata.getSyncMarker();
 long syncMarkerLength = syncMarker.length;
 if (startOffset != 0) {
  // Rewind order to find the sync marker ending the previous block.
  long position = Math.max(0, startOffset - syncMarkerLength);
  ((SeekableByteChannel) channel).position(position);
  startOffset = position;
 }
 // Satisfy the post condition.
 stream = createStream(channel);
 countStream = new CountingInputStream(stream);
 synchronized (progressLock) {
  currentBlockOffset = startOffset + advancePastNextSyncMarker(stream, syncMarker);
  currentBlockSizeBytes = 0;
 }
}

 @ProcessElement
 public void processElement(ProcessContext c) throws Exception {
  ReadableFile file = c.element();
  InputStream stream = Channels.newInputStream(file.open());
  try (InputStream tikaStream = TikaInputStream.get(stream)) {
   Parser parser =
     tikaConfig == null ? new AutoDetectParser() : new AutoDetectParser(tikaConfig);
   ParseContext context = new ParseContext();
   context.set(Parser.class, parser);
   Metadata tikaMetadata =
     spec.getInputMetadata() != null ? spec.getInputMetadata() : new Metadata();
   if (spec.getContentTypeHint() != null) {
    tikaMetadata.set(Metadata.CONTENT_TYPE, spec.getContentTypeHint());
   }
   String location = file.getMetadata().resourceId().toString();
   ParseResult res;
   ContentHandler tikaHandler = new ToTextContentHandler();
   try {
    parser.parse(tikaStream, tikaHandler, tikaMetadata, context);
    res = ParseResult.success(location, tikaHandler.toString(), tikaMetadata);
   } catch (Exception e) {
    res = ParseResult.failure(location, tikaHandler.toString(), tikaMetadata, e);
   }
   c.output(res);
  }
 }
}

 @ProcessElement
 public void process(ProcessContext c) {
  MatchResult.Metadata metadata = c.element();
  if (metadata.resourceId().isDirectory()) {
   switch (spec.getDirectoryTreatment()) {
    case SKIP:
     return;
    case PROHIBIT:
     throw new IllegalArgumentException(
       "Trying to read " + metadata.resourceId() + " which is a directory");
    default:
     throw new UnsupportedOperationException(
       "Unknown DirectoryTreatment: " + spec.getDirectoryTreatment());
   }
  }
  Compression compression =
    (spec.getCompression() == Compression.AUTO)
      ? Compression.detect(metadata.resourceId().getFilename())
      : spec.getCompression();
  c.output(
    new ReadableFile(
      MatchResult.Metadata.builder()
        .setResourceId(metadata.resourceId())
        .setSizeBytes(metadata.sizeBytes())
        .setIsReadSeekEfficient(
          metadata.isReadSeekEfficient() && compression == Compression.UNCOMPRESSED)
        .build(),
      compression));
 }
}

 /**
  * Check if total number of files is correct by comparing with the number that is parsed from
  * shard name using a name template. If no template is specified, "SSSS-of-NNNN" will be used as
  * default, and "NNNN" will be the expected total number of files.
  *
  * @return {@code true} if at least one shard name matches template and total number of given
  *     files equals the number that is parsed from shard name.
  */
 @VisibleForTesting
 boolean checkTotalNumOfFiles(Collection<Metadata> files) {
  for (Metadata fileMedadata : files) {
   String fileName = fileMedadata.resourceId().getFilename();

   if (fileName == null) {
    // this path has zero elements
    continue;
   }
   Matcher matcher = shardTemplate.matcher(fileName);
   if (!matcher.matches()) {
    // shard name doesn't match the pattern, check with the next shard
    continue;
   }
   // once match, extract total number of shards and compare to file list
   return files.size() == Integer.parseInt(matcher.group("numshards"));
  }
  return false;
 }
}

 /**
  * Reads all the lines of all the files.
  *
  * <p>Not suitable for use except in testing of small data, since the data size may be far more
  * than can be reasonably processed serially, in-memory, by a single thread.
  */
 @VisibleForTesting
 List<String> readLines(Collection<Metadata> files) throws IOException {
  List<String> allLines = Lists.newArrayList();
  int i = 1;
  for (Metadata file : files) {
   try (Reader reader =
     Channels.newReader(FileSystems.open(file.resourceId()), StandardCharsets.UTF_8.name())) {
    List<String> lines = CharStreams.readLines(reader);
    allLines.addAll(lines);
    LOG.debug("[{} of {}] Read {} lines from file: {}", i, files.size(), lines.size(), file);
   }
   i++;
  }
  return allLines;
 }
}

/**
 * Reads all the lines of all the files.
 *
 * <p>Not suitable for use except in testing of small data, since the data size may be far more
 * than can be reasonably processed serially, in-memory, by a single thread.
 */
@VisibleForTesting
List<String> readLines(Collection<Metadata> files) throws IOException {
 List<String> allLines = Lists.newArrayList();
 int i = 1;
 for (Metadata file : files) {
  try (Reader reader =
    Channels.newReader(FileSystems.open(file.resourceId()), StandardCharsets.UTF_8.name())) {
   List<String> lines = CharStreams.readLines(reader);
   allLines.addAll(lines);
   LOG.debug("[{} of {}] Read {} lines from file: {}", i, files.size(), lines.size(), file);
  }
  i++;
 }
 return allLines;
}

 /**
  * Reads all the lines of all the files.
  *
  * <p>Not suitable for use except in testing of small data, since the data size may be far more
  * than can be reasonably processed serially, in-memory, by a single thread.
  */
 @VisibleForTesting
 List<String> readLines(Collection<Metadata> files) throws IOException {
  List<String> allLines = Lists.newArrayList();
  int i = 1;
  for (Metadata file : files) {
   try (Reader reader =
     Channels.newReader(FileSystems.open(file.resourceId()), StandardCharsets.UTF_8.name())) {
    List<String> lines = CharStreams.readLines(reader);
    allLines.addAll(lines);
    LOG.debug("[{} of {}] Read {} lines from file: {}", i, files.size(), lines.size(), file);
   }
   i++;
  }
  return allLines;
 }
}

@Test
public void testSchemaStringIsInterned() throws Exception {
 List<Bird> birds = createRandomRecords(100);
 String filename =
   generateTestFile(
     "tmp.avro",
     birds,
     SyncBehavior.SYNC_DEFAULT,
     0,
     AvroCoder.of(Bird.class),
     DataFileConstants.NULL_CODEC);
 Metadata fileMetadata = FileSystems.matchSingleFileSpec(filename);
 String schema = AvroSource.readMetadataFromFile(fileMetadata.resourceId()).getSchemaString();
 // Add "" to the schema to make sure it is not interned.
 AvroSource<GenericRecord> sourceA = AvroSource.from(filename).withSchema("" + schema);
 AvroSource<GenericRecord> sourceB = AvroSource.from(filename).withSchema("" + schema);
 assertSame(sourceA.getReaderSchemaString(), sourceB.getReaderSchemaString());
 // Ensure that deserialization still goes through interning
 AvroSource<GenericRecord> sourceC = SerializableUtils.clone(sourceB);
 assertSame(sourceA.getReaderSchemaString(), sourceC.getReaderSchemaString());
}

Javadoc

Metadata of a matched file.

Most used methods

Popular in Java

Finding current android device location
getSharedPreferences (Context)
onRequestPermissionsResult (Fragment)
getExternalFilesDir (Context)
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
Connection (java.sql)
A connection represents a link from a Java application to a database. All SQL statements and results
LinkedList (java.util)
Doubly-linked list implementation of the List and Dequeinterfaces. Implements all optional list oper
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
ReentrantLock (java.util.concurrent.locks)
A reentrant mutual exclusion Lock with the same basic behavior and semantics as the implicit monitor
Manifest (java.util.jar)
The Manifest class is used to obtain attribute information for a JarFile and its entries.
Top plugins for Android Studio

How to useMatchResult$Metadata in org.apache.beam.sdk.io.fs

Best Java code snippets using org.apache.beam.sdk.io.fs.MatchResult$Metadata (Showing top 20 results out of 315)

How to use
MatchResult$Metadata
in
org.apache.beam.sdk.io.fs