/** * Check if total number of files is correct by comparing with the number that is parsed from * shard name using a name template. If no template is specified, "SSSS-of-NNNN" will be used as * default, and "NNNN" will be the expected total number of files. * * @return {@code true} if at least one shard name matches template and total number of given * files equals the number that is parsed from shard name. */ @VisibleForTesting boolean checkTotalNumOfFiles(Collection<Metadata> files) { for (Metadata fileMedadata : files) { String fileName = fileMedadata.resourceId().getFilename(); if (fileName == null) { // this path has zero elements continue; } Matcher matcher = shardTemplate.matcher(fileName); if (!matcher.matches()) { // shard name doesn't match the pattern, check with the next shard continue; } // once match, extract total number of shards and compare to file list return files.size() == Integer.parseInt(matcher.group("numshards")); } return false; } }
public String filenamePrefixForWindow(IntervalWindow window) { String filePrefix = prefix.isDirectory() ? "" : prefix.getFilename(); return String.format( "%s-%s-%s", filePrefix, formatter.print(window.start()), formatter.print(window.end())); }
private static String extractFilename(ResourceId input) { if (input.isDirectory()) { return ""; } else { return firstNonNull(input.getFilename(), ""); } } }
public String filenamePrefixForWindow(IntervalWindow window) { String prefix = baseFilename.isDirectory() ? "" : firstNonNull(baseFilename.getFilename(), ""); return String.format( "%s-%s-%s", prefix, FORMATTER.print(window.start()), FORMATTER.print(window.end())); }
/** * @param window interval window * @return file name prefix. */ String filenamePrefixForWindow(final IntervalWindow window) { final String prefix = baseFilename.isDirectory() ? "" : firstNonNull(baseFilename.getFilename(), ""); return String.format( "%s-%s-%s", prefix, FORMATTER.print(window.start()), FORMATTER.print(window.end())); } @Override
public MatchResult.Metadata apply(MatchResult.Metadata metadata) { ResourceId resourceId = metadata.resourceId(); System.out.println(resourceId.getScheme() + "://" + resourceId.getCurrentDirectory() + resourceId.getFilename()); return metadata; } }));
try { ResourceId sourceResourceId = FileSystems.matchNewResource(jarPath, false); File destFile = Paths.get(destRoot, sourceResourceId.getFilename()).toFile(); ResourceId destResourceId = FileSystems.matchNewResource(destFile.getAbsolutePath(), false);
.metadata() .stream() .filter(metadata -> metadata.resourceId().getFilename().endsWith(".js")) .map(Metadata::resourceId) .map(
public String filenamePrefixForWindow(IntervalWindow window) { String prefix = baseFilename.isDirectory() ? "" : firstNonNull(baseFilename.getFilename(), ""); return String.format( "%s%s-%s", prefix, FORMATTER.print(window.start()), FORMATTER.print(window.end())); }
@Override public ResourceId unwindowedFilename( int shardNumber, int numShards, OutputFileHints outputFileHints) { DecimalFormat df = new DecimalFormat("0000"); String prefix = baseFilename.isDirectory() ? "" : firstNonNull(baseFilename.getFilename(), ""); String filename = String.format( "%s-%s-of-%s%s%s", prefix, df.format(shardNumber), df.format(numShards), outputFileHints.getSuggestedFilenameSuffix(), suffix); return baseFilename .getCurrentDirectory() .resolve(filename, StandardResolveOptions.RESOLVE_FILE); } }
@ProcessElement public void processElement(ProcessContext context) { ResourceId inputFile = context.element().resourceId(); Compression compression = compressionValue.get(); // Add the compression extension to the output filename. Example: demo.txt -> demo.txt.gz String outputFilename = inputFile.getFilename() + compression.getSuggestedSuffix(); // Resolve the necessary resources to perform the transfer ResourceId outputDir = FileSystems.matchNewResource(destinationLocation.get(), true); ResourceId outputFile = outputDir.resolve(outputFilename, StandardResolveOptions.RESOLVE_FILE); ResourceId tempFile = outputDir.resolve("temp-" + outputFilename, StandardResolveOptions.RESOLVE_FILE); // Perform the copy of the compressed channel to the destination. try (ReadableByteChannel readerChannel = FileSystems.open(inputFile)) { try (WritableByteChannel writerChannel = compression.writeCompressed(FileSystems.create(tempFile, MimeTypes.BINARY))) { // Execute the copy to the temporary file ByteStreams.copy(readerChannel, writerChannel); } // Rename the temporary file to the output file FileSystems.rename(ImmutableList.of(tempFile), ImmutableList.of(outputFile)); // Output the path to the uncompressed file context.output(outputFile.toString()); } catch (IOException e) { LOG.error("Error occurred during compression of {}", inputFile.toString(), e); context.output(DEADLETTER_TAG, KV.of(inputFile.toString(), e.getMessage())); } } }
@ProcessElement public void process(ProcessContext c) { MatchResult.Metadata metadata = c.element(); if (metadata.resourceId().isDirectory()) { switch (spec.getDirectoryTreatment()) { case SKIP: return; case PROHIBIT: throw new IllegalArgumentException( "Trying to read " + metadata.resourceId() + " which is a directory"); default: throw new UnsupportedOperationException( "Unknown DirectoryTreatment: " + spec.getDirectoryTreatment()); } } Compression compression = (spec.getCompression() == Compression.AUTO) ? Compression.detect(metadata.resourceId().getFilename()) : spec.getCompression(); c.output( new ReadableFile( MatchResult.Metadata.builder() .setResourceId(metadata.resourceId()) .setSizeBytes(metadata.sizeBytes()) .setIsReadSeekEfficient( metadata.isReadSeekEfficient() && compression == Compression.UNCOMPRESSED) .build(), compression)); } }
@Override public PDone expand(PCollection<T> input) { checkArgument(getRecordClass() != null, "withRecordClass() is required"); checkArgument(getRootElement() != null, "withRootElement() is required"); checkArgument(getFilenamePrefix() != null, "to() is required"); checkArgument(getCharset() != null, "withCharset() is required"); try { JAXBContext.newInstance(getRecordClass()); } catch (JAXBException e) { throw new RuntimeException("Error binding classes to a JAXB Context.", e); } ResourceId prefix = FileSystems.matchNewResource(getFilenamePrefix(), false /* isDirectory */); input.apply( FileIO.<T>write() .via( sink(getRecordClass()) .withCharset(Charset.forName(getCharset())) .withRootElement(getRootElement())) .to(prefix.getCurrentDirectory().toString()) .withPrefix(prefix.getFilename()) .withSuffix(".xml") .withIgnoreWindowing()); return PDone.in(input.getPipeline()); }
@Override public ResourceId windowedFilename( int shardNumber, int numShards, BoundedWindow window, PaneInfo paneInfo, OutputFileHints outputFileHints) { String filenamePrefix = outputFilePrefix.isDirectory() ? "" : firstNonNull(outputFilePrefix.getFilename(), ""); IntervalWindow interval = (IntervalWindow) window; String windowStr = String.format("%s-%s", interval.start().toString(), interval.end().toString()); String filename = String.format( "%s-%s-%s-of-%s-pane-%s%s%s.avro", filenamePrefix, windowStr, shardNumber, numShards, paneInfo.getIndex(), paneInfo.isLast() ? "-last" : "", outputFileHints.getSuggestedFilenameSuffix()); return outputFilePrefix.getCurrentDirectory().resolve(filename, RESOLVE_FILE); }