@Override public Location getLocation() { return partitionedFileSetDataset.getEmbeddedFileSet().getLocation(relativePath); }
@Override public Location getLocation() { return partitionedFileSetDataset.getEmbeddedFileSet().getLocation(relativePath); }
@Override public Map<String, String> getInputFormatConfiguration() { Collection<PartitionKey> inputKeys = getInputKeys(); List<Location> inputLocations = new ArrayList<>(inputKeys.size()); Map<String, PartitionKey> pathToKey = new HashMap<>(inputKeys.size()); for (PartitionKey key : inputKeys) { PartitionDetail partition = getPartition(key); String path = Objects.requireNonNull(partition).getRelativePath(); Location partitionLocation = files.getLocation(path); inputLocations.add(partitionLocation); pathToKey.put(partitionLocation.toURI().toString(), key); } Map<String, String> inputFormatConfiguration = files.getInputFormatConfiguration(inputLocations); inputFormatConfiguration.put(PATH_TO_PARTITIONING_MAPPING, GSON.toJson(pathToKey)); return inputFormatConfiguration; }
@Override public Map<String, String> getInputFormatConfiguration() { Collection<PartitionKey> inputKeys = getInputKeys(); List<Location> inputLocations = new ArrayList<>(inputKeys.size()); Map<String, PartitionKey> pathToKey = new HashMap<>(inputKeys.size()); for (PartitionKey key : inputKeys) { PartitionDetail partition = getPartition(key); String path = Objects.requireNonNull(partition).getRelativePath(); Location partitionLocation = files.getLocation(path); inputLocations.add(partitionLocation); pathToKey.put(partitionLocation.toURI().toString(), key); } Map<String, String> inputFormatConfiguration = files.getInputFormatConfiguration(inputLocations); inputFormatConfiguration.put(PATH_TO_PARTITIONING_MAPPING, GSON.toJson(pathToKey)); return inputFormatConfiguration; }
@VisibleForTesting public void addPartitionToExplore(PartitionKey key, String path) { if (exploreEnabled) { ExploreFacade exploreFacade = exploreFacadeProvider.get(); if (exploreFacade != null) { try { exploreFacade.addPartition(datasetInstanceId, spec, key, files.getLocation(path).toURI().getPath()); } catch (Exception e) { throw new DataSetException(String.format( "Unable to add partition for key %s with path %s to explore table.", key.toString(), path), e); } } } }
@VisibleForTesting public void addPartitionToExplore(PartitionKey key, String path) { if (exploreEnabled) { ExploreFacade exploreFacade = exploreFacadeProvider.get(); if (exploreFacade != null) { try { exploreFacade.addPartition(datasetInstanceId, spec, key, files.getLocation(path).toURI().getPath()); } catch (Exception e) { throw new DataSetException(String.format( "Unable to add partition for key %s with path %s to explore table.", key.toString(), path), e); } } } }
@ReadOnly byte[] assertNotExists(PartitionKey key, boolean supportNonTransactional) { byte[] rowKey = generateRowKey(key, partitioning); if (tx == null && supportNonTransactional) { if (LOG.isWarnEnabled()) { StringBuilder sb = new StringBuilder(); for (StackTraceElement stackTraceElement : Thread.currentThread().getStackTrace()) { sb.append("\n\tat ").append(stackTraceElement.toString()); } SAMPLING_LOG.warn("Operation should be performed within a transaction. " + "This operation may require a transaction in the future. {}", sb); } // to handle backwards compatibility (user might have called PartitionedFileSet#getPartitionOutput outside // of a transaction), we can't check partition existence via the partitionsTable. As an fallback approach, // check the filesystem. Location partitionLocation = files.getLocation(getOutputPath(key)); if (exists(partitionLocation)) { throw new DataSetException(String.format("Location %s for partition key %s already exists: ", partitionLocation, key)); } } else { Row row = partitionsTable.get(rowKey); if (!row.isEmpty()) { throw new PartitionAlreadyExistsException(getName(), key); } } return rowKey; }
@ReadOnly byte[] assertNotExists(PartitionKey key, boolean supportNonTransactional) { byte[] rowKey = generateRowKey(key, partitioning); if (tx == null && supportNonTransactional) { if (LOG.isWarnEnabled()) { StringBuilder sb = new StringBuilder(); for (StackTraceElement stackTraceElement : Thread.currentThread().getStackTrace()) { sb.append("\n\tat ").append(stackTraceElement.toString()); } SAMPLING_LOG.warn("Operation should be performed within a transaction. " + "This operation may require a transaction in the future. {}", sb); } // to handle backwards compatibility (user might have called PartitionedFileSet#getPartitionOutput outside // of a transaction), we can't check partition existence via the partitionsTable. As an fallback approach, // check the filesystem. Location partitionLocation = files.getLocation(getOutputPath(key)); if (exists(partitionLocation)) { throw new DataSetException(String.format("Location %s for partition key %s already exists: ", partitionLocation, key)); } } else { Row row = partitionsTable.get(rowKey); if (!row.isEmpty()) { throw new PartitionAlreadyExistsException(getName(), key); } } return rowKey; }
private void undoPartitionCreate(AddPartitionOperation operation) throws Exception { Exception caughtExn = null; if (operation.isExplorePartitionCreated()) { try { dropPartitionFromExplore(operation.getPartitionKey()); } catch (Exception e) { caughtExn = e; } } if (operation.isFilesCreated()) { try { Location location = files.getLocation(operation.getRelativePath()); if (location.exists() && !location.delete(true)) { throw new IOException(String.format("Failed to delete location %s.", location)); } } catch (Exception e) { if (caughtExn != null) { caughtExn.addSuppressed(e); } else { caughtExn = e; } } } if (caughtExn != null) { throw caughtExn; } }
private void undoPartitionCreate(AddPartitionOperation operation) throws Exception { Exception caughtExn = null; if (operation.isExplorePartitionCreated()) { try { dropPartitionFromExplore(operation.getPartitionKey()); } catch (Exception e) { caughtExn = e; } } if (operation.isFilesCreated()) { try { Location location = files.getLocation(operation.getRelativePath()); if (location.exists() && !location.delete(true)) { throw new IOException(String.format("Failed to delete location %s.", location)); } } catch (Exception e) { if (caughtExn != null) { caughtExn.addSuppressed(e); } else { caughtExn = e; } } } if (caughtExn != null) { throw caughtExn; } }
@Override public void run() { try { String inputPath = getContext().getRuntimeArguments().get("input.path"); FileSet fileSetDataset = getContext().getDataset(CSV_FILESET_DATASET); Location inputLocation = fileSetDataset.getLocation(inputPath); try (PrintWriter writer = new PrintWriter(inputLocation.getOutputStream())) { writer.write("this,text,has"); writer.println(); writer.write("two,words,text,inside"); metrics.gauge("num.lines", 2); } } catch (Throwable t) { LOG.error("Exception occurred while running custom action ", t); } } }
private void testSparkWithFileSet(ApplicationManager applicationManager, String sparkProgram) throws Exception { DataSetManager<FileSet> filesetManager = getDataset("fs"); FileSet fileset = filesetManager.get(); Location location = fileset.getLocation("nn"); prepareFileInput(location); Map<String, String> inputArgs = new HashMap<>(); FileSetArguments.setInputPath(inputArgs, "nn"); Map<String, String> outputArgs = new HashMap<>(); FileSetArguments.setOutputPath(inputArgs, "xx"); Map<String, String> args = new HashMap<>(); args.putAll(RuntimeArguments.addScope(Scope.DATASET, "fs", inputArgs)); args.putAll(RuntimeArguments.addScope(Scope.DATASET, "fs", outputArgs)); args.put("input", "fs"); args.put("output", "fs"); SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(args); sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES); validateFileOutput(fileset.getLocation("xx"), "custom:"); // Cleanup paths after running test fileset.getLocation("nn").delete(true); fileset.getLocation("xx").delete(true); }
HttpServiceResponder responder) throws IOException { Location tmpLocation = Transactionals.execute(getContext(), context -> { return context.<FileSet>getDataset("wordcount").getLocation(UUID.randomUUID().toString()); });
@Override public void run() throws TransactionFailureException { getContext().execute(new TxRunnable() { @Override public void run(DatasetContext context) throws Exception { table.write("hello", "world"); } }); FileSet fs = getContext().getDataset(CUSTOM_FILESET); try (OutputStream out = fs.getLocation("test").getOutputStream()) { out.write(42); } catch (IOException e) { throw Throwables.propagate(e); } URL serviceURL = getContext().getServiceURL(CUSTOM_SERVICE); if (serviceURL != null) { try { HttpURLConnection con = (HttpURLConnection) new URL(serviceURL, "service").openConnection(); try (BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()))) { Assert.assertEquals("service", CharStreams.toString(in)); } finally { con.disconnect(); } } catch (IOException e) { throw Throwables.propagate(e); } } } }
private void undoPartitionDelete(DropPartitionOperation operation) throws Exception { // move from quarantine, back to original location Location srcLocation = getQuarantineLocation().append(operation.getRelativePath()); if (srcLocation.exists()) { srcLocation.renameTo(files.getLocation(operation.getRelativePath())); } // recreating the partition in Hive only makes sense if the rename succeeds addPartitionToExplore(operation.getPartitionKey(), operation.getRelativePath()); }
private void undoPartitionDelete(DropPartitionOperation operation) throws Exception { // move from quarantine, back to original location Location srcLocation = getQuarantineLocation().append(operation.getRelativePath()); if (srcLocation.exists()) { srcLocation.renameTo(files.getLocation(operation.getRelativePath())); } // recreating the partition in Hive only makes sense if the rename succeeds addPartitionToExplore(operation.getPartitionKey(), operation.getRelativePath()); }
private List<String> readFromOutput(FileSet fileSet, String relativePath) throws IOException { // small amount of data, so expect all data from just 1 file Location location = fileSet.getLocation(relativePath).append("part-m-00000"); return CharStreams.readLines(CharStreams.newReaderSupplier(Locations.newInputSupplier(location), Charsets.UTF_8)); }
@Test public void testPermissions() throws Exception { // validate that the fileset permissions and group were applied to the embedded fileset (just sanity test) PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance); Location loc = pfs.getEmbeddedFileSet().getLocation("some/random/path"); loc.getOutputStream().close(); Assert.assertEquals(fsPermissions, loc.getPermissions()); Assert.assertEquals(group, loc.getGroup()); Map<String, String> props = dsFrameworkUtil.getSpec(pfsInstance).getSpecification("partitions").getProperties(); Assert.assertEquals(tablePermissions, TableProperties.getTablePermissions(props)); }
private void testSparkWithGetDataset(Class<? extends Application> appClass, String sparkProgram) throws Exception { ApplicationManager applicationManager = deploy(appClass); DataSetManager<FileSet> filesetManager = getDataset("logs"); FileSet fileset = filesetManager.get(); Location location = fileset.getLocation("nn"); prepareInputFileSetWithLogData(location); Map<String, String> inputArgs = new HashMap<>(); FileSetArguments.setInputPath(inputArgs, "nn"); Map<String, String> args = new HashMap<>(); args.putAll(RuntimeArguments.addScope(Scope.DATASET, "logs", inputArgs)); args.put("input", "logs"); args.put("output", "logStats"); SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram); sparkManager.startAndWaitForRun(args, ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES); DataSetManager<KeyValueTable> logStatsManager = getDataset("logStats"); KeyValueTable logStatsTable = logStatsManager.get(); validateGetDatasetOutput(logStatsTable); // Cleanup after run location.delete(true); logStatsManager.flush(); try (CloseableIterator<KeyValue<byte[], byte[]>> scan = logStatsTable.scan(null, null)) { while (scan.hasNext()) { logStatsTable.delete(scan.next().getKey()); } } logStatsManager.flush(); }
@Category(SlowTests.class) @Test public void testCustomActionDatasetAccess() throws Exception { addDatasetInstance("keyValueTable", DatasetWithCustomActionApp.CUSTOM_TABLE); addDatasetInstance("fileSet", DatasetWithCustomActionApp.CUSTOM_FILESET); ApplicationManager appManager = deployApplication(DatasetWithCustomActionApp.class); ServiceManager serviceManager = appManager.getServiceManager(DatasetWithCustomActionApp.CUSTOM_SERVICE).start(); serviceManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS); WorkflowManager workflowManager = appManager.getWorkflowManager(DatasetWithCustomActionApp.CUSTOM_WORKFLOW).start(); workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES); appManager.stopAll(); DataSetManager<KeyValueTable> outTableManager = getDataset(DatasetWithCustomActionApp.CUSTOM_TABLE); KeyValueTable outputTable = outTableManager.get(); Assert.assertEquals("world", Bytes.toString(outputTable.read("hello"))); Assert.assertEquals("service", Bytes.toString(outputTable.read("hi"))); Assert.assertEquals("another.world", Bytes.toString(outputTable.read("another.hello"))); DataSetManager<FileSet> outFileSetManager = getDataset(DatasetWithCustomActionApp.CUSTOM_FILESET); FileSet fs = outFileSetManager.get(); try (InputStream in = fs.getLocation("test").getInputStream()) { Assert.assertEquals(42, in.read()); } }