private String getMetadataFileNameForBranch(WorkUnitState state, int branchId) { // Note: This doesn't follow the pattern elsewhere in Gobblin where we have branch specific config // parameters! Leaving this way for backwards compatibility. String filePrefix = state.getProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE); return ForkOperatorUtils.getPropertyNameForBranch(filePrefix, this.numBranches, branchId); }
/** * Creates the default {@link Path} for the {@link ConfigurationKeys#WRITER_FILE_PATH} key. * @param numBranches is the total number of branches for the given {@link State}. * @param branchId is the id for the specific branch that the {@link org.apache.gobblin.writer.DataWriter} will write to. * @return a {@link Path} specifying the directory where the {@link org.apache.gobblin.writer.DataWriter} will write to. */ public static Path getDefaultWriterFilePath(State state, int numBranches, int branchId) { if (state instanceof WorkUnitState) { WorkUnitState workUnitState = (WorkUnitState) state; return new Path(ForkOperatorUtils.getPathForBranch(workUnitState, workUnitState.getOutputFilePath(), numBranches, branchId)); } else if (state instanceof WorkUnit) { WorkUnit workUnit = (WorkUnit) state; return new Path(ForkOperatorUtils.getPathForBranch(workUnit, workUnit.getOutputFilePath(), numBranches, branchId)); } throw new RuntimeException("In order to get the default value for " + ConfigurationKeys.WRITER_FILE_PATH + " the given state must be of type " + WorkUnitState.class.getName() + " or " + WorkUnit.class.getName()); }
state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.FORK_BRANCH_NAME_KEY, 2, 0), branchName0); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.FORK_BRANCH_NAME_KEY, 2, 1), branchName1); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, 2, 0), ConfigurationKeys.LOCAL_FS_URI); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, 2, 1), ConfigurationKeys.LOCAL_FS_URI); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, 2, 0), writerStagingDir0.toString()); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, 2, 1), writerStagingDir1.toString()); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 2, 0), writerOutputDir0.toString()); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 2, 1), writerOutputDir1.toString()); ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 0)); fs.mkdirs(writerStagingPath0); ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 1)); fs.mkdirs(writerStagingPath1); ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 0)); fs.mkdirs(writerOutputPath0); ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 1)); fs.mkdirs(writerOutputPath1);
/** * Adjust job-level metrics when the task gets retried. * * @param branches number of forked branches */ public void adjustJobMetricsOnRetry(int branches) { TaskMetrics metrics = TaskMetrics.get(this); for (int i = 0; i < branches; i++) { String forkBranchId = ForkOperatorUtils.getForkId(this.taskId, i); long recordsWritten = metrics.getCounter(MetricGroup.TASK.name(), forkBranchId, RECORDS).getCount(); long bytesWritten = metrics.getCounter(MetricGroup.TASK.name(), forkBranchId, BYTES).getCount(); metrics.getCounter(MetricGroup.JOB.name(), this.jobId, RECORDS).dec(recordsWritten); metrics.getCounter(MetricGroup.JOB.name(), this.jobId, BYTES).dec(bytesWritten); } }
/** * Adjust job-level metrics when the task gets retried. * * @param branches number of forked branches */ public void adjustJobMetricsOnRetry(int branches) { TaskMetrics metrics = TaskMetrics.get(this); for (int i = 0; i < branches; i++) { String forkBranchId = ForkOperatorUtils.getForkId(this.taskId, i); long recordsWritten = metrics.getCounter(MetricGroup.TASK.name(), forkBranchId, RECORDS).getCount(); long bytesWritten = metrics.getCounter(MetricGroup.TASK.name(), forkBranchId, BYTES).getCount(); metrics.getCounter(MetricGroup.JOB.name(), this.jobId, RECORDS).dec(recordsWritten); metrics.getCounter(MetricGroup.JOB.name(), this.jobId, BYTES).dec(bytesWritten); } }
/** * Given a {@link FsPermission} objects, set a key, value pair in the given {@link State} for the writer to * use when creating files. This method should be used in conjunction with {@link #deserializeWriterFilePermissions(State, int, int)}. */ public static void serializeWriterFilePermissions(State state, int numBranches, int branchId, FsPermission fsPermissions) { serializeFsPermissions(state, ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_PERMISSIONS, numBranches, branchId), fsPermissions); }
@Test public void testGetPathForBranch() { State state = new State(); state.setProp(ConfigurationKeys.FORK_BRANCH_NAME_KEY + ".0", FORK_BRANCH_NAME_0); state.setProp(ConfigurationKeys.FORK_BRANCH_NAME_KEY + ".1", FORK_BRANCH_NAME_1); Assert.assertEquals(ForkOperatorUtils.getPathForBranch(state, PATH_FOO, 2, 0), PATH_FOO + "/" + FORK_BRANCH_NAME_0); Assert.assertEquals(ForkOperatorUtils.getPathForBranch(state, PATH_FOO, 2, 1), PATH_FOO + "/" + FORK_BRANCH_NAME_1); }
/** * Given a {@link String} in octal notation, set a key, value pair in the given {@link State} for the writer to * use when creating files. This method should be used in conjunction with {@link #deserializeWriterFilePermissions(State, int, int)}. */ public static void setWriterFileOctalPermissions(State state, int numBranches, int branchId, String octalPermissions) { state.setProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_PERMISSIONS, numBranches, branchId), octalPermissions); }
public Mode getDefaultMode() { try { State destinationCfg = getDestination().getProperties(); String modeKey = ForkOperatorUtils.getPathForBranch(destinationCfg, FULL_MODE_KEY, getBranches(), getBranch()); return Mode.valueOf(destinationCfg.getProp(modeKey, Mode.POST_RECORDS.name()).toUpperCase()); } catch (Throwable t) { return Mode.POST_RECORDS; } }
@Override public Converter<Class<String>, Class<String>, String, String> init(WorkUnitState workUnit) { this.pattern = Pattern.compile(Strings.nullToEmpty(workUnit.getProp( ForkOperatorUtils.getPropertyNameForBranch(workUnit, ConfigurationKeys.CONVERTER_STRING_FILTER_PATTERN)))); this.matcher = Optional.absent(); return this; }
public String getDefaultEventBusId() { State destinationCfg = getDestination().getProperties(); String eventBusIdKey = ForkOperatorUtils.getPathForBranch(destinationCfg, FULL_EVENTBUSID_KEY, getBranches(), getBranch()); if (destinationCfg.contains(eventBusIdKey)) { return destinationCfg.getProp(eventBusIdKey); } else { return WriterUtils.getWriterOutputDir(destinationCfg, getBranches(), getBranch()) .toString(); } }
/** * Given a {@link FsPermission} objects, set a key, value pair in the given {@link State} for the writer to * use when creating files. This method should be used in conjunction with {@link #deserializeWriterDirPermissions(State, int, int)}. */ public static void serializeWriterDirPermissions(State state, int numBranches, int branchId, FsPermission fsPermissions) { serializeFsPermissions(state, ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_DIR_PERMISSIONS, numBranches, branchId), fsPermissions); }
/** * Creates the default {@link Path} for the {@link ConfigurationKeys#WRITER_FILE_PATH} key. * @param numBranches is the total number of branches for the given {@link State}. * @param branchId is the id for the specific branch that the {@link org.apache.gobblin.writer.DataWriter} will write to. * @return a {@link Path} specifying the directory where the {@link org.apache.gobblin.writer.DataWriter} will write to. */ public static Path getDefaultWriterFilePath(State state, int numBranches, int branchId) { if (state instanceof WorkUnitState) { WorkUnitState workUnitState = (WorkUnitState) state; return new Path(ForkOperatorUtils.getPathForBranch(workUnitState, workUnitState.getOutputFilePath(), numBranches, branchId)); } else if (state instanceof WorkUnit) { WorkUnit workUnit = (WorkUnit) state; return new Path(ForkOperatorUtils.getPathForBranch(workUnit, workUnit.getOutputFilePath(), numBranches, branchId)); } throw new RuntimeException("In order to get the default value for " + ConfigurationKeys.WRITER_FILE_PATH + " the given state must be of type " + WorkUnitState.class.getName() + " or " + WorkUnit.class.getName()); }
/** * Given a {@link String} in octal notation, set a key, value pair in the given {@link State} for the writer to * use when creating directories. This method should be used in conjunction with {@link #deserializeWriterDirPermissions(State, int, int)}. */ public static void setWriterDirOctalPermissions(State state, int numBranches, int branchId, String octalPermissions) { state.setProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_DIR_PERMISSIONS, numBranches, branchId), octalPermissions); }
public Mode getDefaultMode() { try { State destinationCfg = getDestination().getProperties(); String modeKey = ForkOperatorUtils.getPathForBranch(destinationCfg, FULL_MODE_KEY, getBranches(), getBranch()); return Mode.valueOf(destinationCfg.getProp(modeKey, Mode.POST_RECORDS.name()).toUpperCase()); } catch (Throwable t) { return Mode.POST_RECORDS; } }
private static String getWriterPartitionSuffix(State state, int numBranches, int branchId) { String propName = ForkOperatorUtils.getPropertyNameForBranch(WRITER_PARTITION_SUFFIX, numBranches, branchId); return state.getProp(propName, StringUtils.EMPTY); }
public String getDefaultEventBusId() { State destinationCfg = getDestination().getProperties(); String eventBusIdKey = ForkOperatorUtils.getPathForBranch(destinationCfg, FULL_EVENTBUSID_KEY, getBranches(), getBranch()); if (destinationCfg.contains(eventBusIdKey)) { return destinationCfg.getProp(eventBusIdKey); } else { return WriterUtils.getWriterOutputDir(destinationCfg, getBranches(), getBranch()) .toString(); } }
private static String getWriterPartitionPrefix(State state, int numBranches, int branchId) { String propName = ForkOperatorUtils.getPropertyNameForBranch(WRITER_PARTITION_PREFIX, numBranches, branchId); return state.getProp(propName, StringUtils.EMPTY); }
private static DateTimeZone getTimeZone(State state, int numBranches, int branchId) { String propName = ForkOperatorUtils.getPropertyNameForBranch(WRITER_PARTITION_TIMEZONE, numBranches, branchId); return DateTimeZone.forID(state.getProp(propName, DEFAULT_WRITER_PARTITION_TIMEZONE)); }
public static Path getOutputDir(State state) { return new Path( state.getProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 1, 0))); }