/** * Create an ID for a new multi-task (corresponding to a {@link gobblin.source.workunit.MultiWorkUnit}) * for the job with the given job ID. * * @param jobId job ID * @param sequence multi-task sequence number * @return new multi-task ID */ public static String newMultiTaskId(String jobId, int sequence) { return Id.MultiTask.create(Id.parse(jobId).get(Id.Parts.INSTANCE_NAME), sequence).toString(); }
/** * Given a {@link FsPermission} objects, set a key, value pair in the given {@link State} for the writer to * use when creating files. This method should be used in conjunction with {@link #deserializeWriterDirPermissions(State, int, int)}. */ public static void serializeWriterDirPermissions(State state, int numBranches, int branchId, FsPermission fsPermissions) { serializeFsPermissions(state, ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_DIR_PERMISSIONS, numBranches, branchId), fsPermissions); }
private static FileSystem getTargetFileSystem(State state) throws IOException { return HadoopUtils.getOptionallyThrottledFileSystem(WriterUtils.getWriterFS(state, 1, 0), state); }
private Kafka08ConsumerClient(Config config) { super(config); bufferSize = ConfigUtils.getInt(config, CONFIG_KAFKA_BUFFER_SIZE_BYTES, CONFIG_KAFKA_BUFFER_SIZE_BYTES_DEFAULT); clientName = ConfigUtils.getString(config, CONFIG_KAFKA_CLIENT_NAME, CONFIG_KAFKA_CLIENT_NAME_DEFAULT); fetchCorrelationId = ConfigUtils.getInt(config, CONFIG_KAFKA_FETCH_REQUEST_CORRELATION_ID, CONFIG_KAFKA_FETCH_REQUEST_CORRELATION_ID_DEFAULT); fetchTopicRetries = ConfigUtils.getInt(config, CONFIG_KAFKA_FETCH_TOPIC_NUM_TRIES, CONFIG_KAFKA_FETCH_TOPIC_NUM_TRIES_DEFAULT); fetchOffsetRetries = ConfigUtils.getInt(config, CONFIG_KAFKA_FETCH_OFFSET_NUM_TRIES, CONFIG_KAFKA_FETCH_OFFSET_NUM_TRIES_DEFAULT); }
/** * Return string value at <code>path</code> if <code>config</code> has path. If not return an empty string * * @param config in which the path may be present * @param path key to look for in the config object * @return string value at <code>path</code> if <code>config</code> has path. If not return an empty string */ public static String emptyIfNotPresent(Config config, String path) { return getString(config, path, StringUtils.EMPTY); }
/** * Create a new task ID for the job with the given job ID. * * @param jobId job ID * @param sequence task sequence number * @return new task ID */ public static String newTaskId(String jobId, int sequence) { return Id.Task.create(Id.parse(jobId).get(Id.Parts.INSTANCE_NAME), sequence).toString(); }
/** * Delete the temporary {@link #schemaDir} */ public void cleanupTempSchemas() throws IOException { HadoopUtils.deleteIfExists(this.fs, this.schemaDir, true); }
public SubsetFilesCopyableDataset(final FileSystem fs, Path rootPath, Properties properties, String idenifier, List<FileStatus> subFiles) { this.rootPath = PathUtils.getPathWithoutSchemeAndAuthority(rootPath); this.fs = fs; this.files = subFiles; this.identifier = idenifier; this.props = properties; }
/** * Method to list out all files, or directory if no file exists, under a specified path. */ public static List<FileStatus> listMostNestedPathRecursively(FileSystem fs, Path path) throws IOException { return listMostNestedPathRecursively(fs, path, NO_OP_PATH_FILTER); }
/** * * Returns the cluster name on which the application is running. Uses default hadoop {@link Configuration} to get the * url of the resourceManager or jobtracker. The URL is then translated into a human readable cluster name using * {@link #getClusterName(String)} * * @see #getClusterName(Configuration) * */ public String getClusterName() { return getClusterName(HADOOP_CONFIGURATION); }
/** * Flatten the <code>inputSchema</code> * {@inheritDoc} * @see gobblin.data.management.conversion.hive.converter.AbstractAvroToOrcConverter#convertSchema(org.apache.avro.Schema, gobblin.configuration.WorkUnitState) */ @Override public Schema convertSchema(Schema inputSchema, WorkUnitState workUnit) { return AVRO_FLATTENER.flatten(inputSchema, false); }
protected double getDatasetPriority(String datasetName) { double priority = LOW_PRIORITY; if (DatasetFilterUtils.stringInPatterns(datasetName, this.highPriority)) { priority = HIGH_PRIORITY; } else if (DatasetFilterUtils.stringInPatterns(datasetName, this.normalPriority)) { priority = NORMAL_PRIORITY; } return priority; }
/** * Resolves the truly underlying object in a possible chain of {@link gobblin.util.Decorator}. * * @param obj object to resolve. * @return the true non-decorator underlying object. */ public static Object resolveUnderlyingObject(Object obj) { while(obj instanceof Decorator) { obj = ((Decorator)obj).getDecoratedObject(); } return obj; }
/** * Returns true if superUserName can proxy as userNameToProxyAs using the specified superUserKeytabLocation, false * otherwise. */ public static boolean canProxyAs(String userNameToProxyAs, String superUserName, Path superUserKeytabLocation) { try { loginAndProxyAsUser(userNameToProxyAs, superUserName, superUserKeytabLocation); } catch (IOException e) { return false; } return true; }
public static ClustersNames getInstance() { synchronized (ClustersNames.class) { if (null == THE_INSTANCE) { THE_INSTANCE = new ClustersNames(); } return THE_INSTANCE; } }
@Override public FileSystem call() throws Exception { FileSystem fs = ProxiedFileSystemUtils.createProxiedFileSystemUsingKeytab(this.userNameToProxyAs, this.superUser, this.keytabLocation, this.uri, this.configuration); if (this.referenceFS != null) { return decorateFilesystemFromReferenceFS(fs, this.referenceFS); } return fs; } }
/** * @return an auto returnable wrapper around a {@link IMetaStoreClient}. * @throws IOException * Note: if you must acquire multiple locks, please use {@link #safeGetClients} instead, as this call may deadlock. */ public AutoReturnableObject<IMetaStoreClient> getClient() throws IOException { return new AutoReturnableObject<>(this.pool); }
@Override public FileSystem call() throws Exception { FileSystem fs = ProxiedFileSystemUtils.createProxiedFileSystemUsingToken(this.userNameToProxyAs, this.userNameToken, this.uri, this.configuration); if (this.referenceFS != null) { return decorateFilesystemFromReferenceFS(fs, this.referenceFS); } return fs; } }
/** * Deserialize a String obtained via {@link #serialize(Serializable)} into an object, using * {@link BaseEncoding#base64()}. * * @param serialized The serialized String * @param clazz The class the deserialized object should be cast to. * @return The deserialized object * @throws IOException if it fails to deserialize the object */ public static <T extends Serializable> T deserialize(String serialized, Class<T> clazz) throws IOException { return deserialize(serialized, clazz, DEFAULT_ENCODING); }
/** * Given a {@link FsPermission} objects, set a key, value pair in the given {@link State} for the writer to * use when creating files. This method should be used in conjunction with {@link #deserializeWriterFilePermissions(State, int, int)}. */ public static void serializeWriterFilePermissions(State state, int numBranches, int branchId, FsPermission fsPermissions) { serializeFsPermissions(state, ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_PERMISSIONS, numBranches, branchId), fsPermissions); }