com.moz.fiji.mapreduce.FijiTableContext java code examples

 /**
  * Reads a single news article, and writes its contents to a new fiji row,
  * indexed by the article's name (A string consisting of the parent folder, and
  * this article's hash), and the a priori categorization of this article.
  *
  * @param key The fully qualified path to the current file we're reading.
  * @param value The raw data to insert into this column.
  * @param context The context to write to.
  * @throws IOException if there is an error.
  */
 @Override
 public void produce(Text key, Text value, FijiTableContext context)
   throws IOException {
  Path qualifiedPath = new Path(key.toString());

  // Category is specified on the containing folder.
  String category = qualifiedPath.getParent().getName();
  // Name is the concatenation of category and file name.
  String name = category + "." + qualifiedPath.getName();

  // write name, category, and raw article.
  EntityId entity = context.getEntityId(name);
  context.put(entity, FAMILY, ARTICLE_NAME_QUALIFIER, name);
  context.put(entity, FAMILY, CATEGORY_QUALIFIER, category);
  context.put(entity, FAMILY, RAW_ARTICLE_QUALIFIER, value.toString());
 }
}

/**
 * Post-processes rejected lines(Logging, keeping count, etc).
 *
 * @param line the line that was rejected by the producer.
 * @param context the context in which the rejection occured.
 * @param reason the reason why this line was rejected.
 */
public void reject(Text line, FijiTableContext context, String reason) {
 if (mRejectedLineCounter % mLogRate == 0L) {
  LOG.error("Rejecting line: {} with reason: {}",
    line.toString(), reason);
 }
 mRejectedLineCounter++;
 //TODO(FIJIMRLIB-9) Abort this bulk importer job early if rejected records exceed a threshold
 context.incrementCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_REJECTED);
 //TODO(FIJIMRLIB-4) Allow this to emit to a rejected output so that import can be reattempted.
}

/**
 * {@inheritDoc}
 * Cleans up job resources.
 * User overridden cleanup methods must contain super.cleanup().
 */
@Override
protected void cleanup(Context hadoopContext) throws IOException, InterruptedException {
 Preconditions.checkState(mTableContext != null);
 mTableContext.close();
 mTableContext = null;
 super.cleanup(hadoopContext);
}

/** {@inheritDoc} */
@Override
public void produce(final FijiRowData row, final FijiTableContext context) throws IOException {
 final Iterable<FijiCell<Object>> cells;
 if (mColumn.isFullyQualified()) {
  cells = row.asIterable(mColumn.getFamily(), mColumn.getQualifier());
 } else {
  cells = row.asIterable(mColumn.getFamily());
 }
 for (FijiCell<Object> cell : cells) {
  context.incrementCounter(Counters.CELLS_PROCESSED);
  final DecodedCell<Object> original =
    new DecodedCell<Object>(cell.getWriterSchema(), cell.getData());
  final DecodedCell<Object> rewritten = rewriteCell(original);
  if (rewritten != original) {
   context.put(
     row.getEntityId(),
     mColumn.getFamily(),
     mColumn.getQualifier(),
     cell.getTimestamp(),
     rewritten.getData());
   context.incrementCounter(Counters.CELLS_REWRITTEN);
  }
 }
}

/** {@inheritDoc} */
@Override
public <T> void put(long timestamp, T value)
  throws IOException {
 Preconditions.checkNotNull(mEntityId);
 Preconditions.checkNotNull(mQualifier,
   "Producer output configured for a map-type family, use put(qualifier, timestamp, value)");
 mTableContext.put(mEntityId, mFamily, mQualifier, timestamp, value);
}

/**
 * Generates the entity id for this imported line using the source from the import descriptor.
 * Called within the produce() method.
 *
 * @param fields One line of input text split on the column delimiter.
 * @param context The context used by the produce() method.
 * @return The EntityId for the data that gets imported by this line.
 */
protected EntityId getEntityId(List<String> fields, FijiTableContext context) {
 //TODO(FIJIMRLIB-3) Extend this to support composite row key ids
 String rowkey = fields.get(mFieldMap.get(getEntityIdSource()));
 return context.getEntityId(rowkey);
}

/** {@inheritDoc} */
@Override
public void flush() throws IOException {
 mTableContext.flush();
 super.flush();
}

/** {@inheritDoc} */
@Override
public <T> void put(String qualifier, long timestamp, T value)
  throws IOException {
 Preconditions.checkNotNull(mEntityId);
 Preconditions.checkState(null == mQualifier,
   "Qualifier already specified by producer configuration.");
 mTableContext.put(mEntityId, mFamily, qualifier, timestamp, value);
}

final EntityId user = context.getEntityId(firstName + "," + lastName);
context.put(user, Fields.INFO_FAMILY, Fields.FIRST_NAME, firstName);
context.put(user, Fields.INFO_FAMILY, Fields.LAST_NAME, lastName);
context.put(user, Fields.INFO_FAMILY, Fields.EMAIL, email);
context.put(user, Fields.INFO_FAMILY, Fields.TELEPHONE, telephone);
context.put(user, Fields.INFO_FAMILY, Fields.ADDRESS, streetAddr);

 context.put(eid, family, qualifier, timestamp, convert(fijiColumnName, fieldValue));
} else {
 context.put(eid, family, qualifier, convert(fijiColumnName, fieldValue));

/**
 * Post-processes incomplete lines(Logging, keeping count, etc).
 *
 * @param line the line that was marked incomplete incomplete by the producer.
 * @param context the context in which the incompletion occured.
 * @param reason the reason why this line was incomplete.
 */
public void incomplete(Text line, FijiTableContext context, String reason) {
 if (mIncompleteLineCounter % mLogRate == 0L) {
  LOG.error("Incomplete line: {} with reason: {}",
    line.toString(), reason);
 }
 mIncompleteLineCounter++;
 //TODO(FIJIMRLIB-9) Abort this bulk importer job early if incomplete records exceed a threshold
 context.incrementCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_INCOMPLETE);
 //TODO(FIJIMRLIB-4) Add a strict mode where we reject incomplete lines
}

 /** {@inheritDoc} */
 @Override
 public void close() throws IOException {
  mTableContext.close();
  super.close();
 }
}

 return;
final EntityId eid = context.getEntityId(entityIdSource);
String source = getSource(fijiColumnName);
String fieldValue = getFromPath(gson, source);
  context.put(eid, family, qualifier, timestamp, convert(fijiColumnName, fieldValue));
 } else {
  context.put(eid, family, qualifier, convert(fijiColumnName, fieldValue));

/** {@inheritDoc} */
@Override
protected void map(FijiRowData input, Context context)
  throws IOException {
 Preconditions.checkNotNull(mContext);
 mPivoter.produce(input, mContext);
 mContext.incrementCounter(JobHistoryCounters.PIVOTER_ROWS_PROCESSED);
}

/** {@inheritDoc} */
@Override
protected void cleanup(Context context) throws IOException {
 Preconditions.checkNotNull(mTableContext);
 mBulkImporter.cleanup(mTableContext);
 mTableContext.close();
 mTableContext = null;
 try {
  super.cleanup(context);
 } catch (InterruptedException ie) {
  throw new IOException(ie);
 }
}

 /** {@inheritDoc} */
 @Override
 public void produce(Text value, FijiTableContext context) throws IOException {
  Map<Field, String> fieldMap;
  try {
   fieldMap = CommonLogParser.get().parseCommonLog(value.toString());
  } catch (ParseException pe) {
   reject(value, context, "Unable to parse row: " + value.toString());
   return;
  }

  Field entityIdSource = Field.valueOf(getEntityIdSource());
  EntityId eid = context.getEntityId(fieldMap.get(entityIdSource));

  for (FijiColumnName fijiColumnName : getDestinationColumns()) {
   Field source = Field.valueOf(getSource(fijiColumnName));
   String fieldValue = fieldMap.get(source);
   if (fieldValue != null) {
    // TODO(FIJIMRLIB-12) Add some ability to use timestamps derived from the log file.
    context.put(eid,  fijiColumnName.getFamily(), fijiColumnName.getQualifier(), fieldValue);
   } else {
    reject(value, context, "Log file missing field: " + source);
   }
  }
 }
}

/** {@inheritDoc} */
@Override
protected void cleanup(Context context) throws IOException {
 Preconditions.checkNotNull(mContext);
 mPivoter.cleanup(mContext);
 mContext.close();
 mContext = null;
 super.cleanup(context);
}

final EntityId eid = context.getEntityId(entityIdStr);
  String family = fijiColumnName.getFamily();
  String qualifier = fijiColumnName.getQualifier();
  context.put(eid, family, qualifier, timestamp, convert(fijiColumnName, fieldValue));
 } else {
  incomplete(xmlText, context, "Detected missing field: " + source);

/** {@inheritDoc} */
@Override
public void produce(ImmutableBytesWritable hbaseRowKey, Result hbaseRow, FijiTableContext context)
  throws IOException {
 EntityId entity = context.getEntityId(Bytes.toString(hbaseRowKey.get()));
 for (ColumnDescriptor columnDescriptor : mColumnDescriptors) {
  KeyValue keyValue = hbaseRow.getColumnLatest(columnDescriptor.getHBaseFamilyBytes(),
    columnDescriptor.getHBaseQualifierBytes());
  if (null == keyValue) {
   // No data in this HTable column, skip it.
   continue;
  }
  // Convert the HBase cell to a Fiji cell.
  DecodedCell<?> fijiCell = decodeHBaseCell(columnDescriptor, keyValue.getValue());
  // Write it at the same timestamp as the HBase cell.
  final String family = columnDescriptor.getFijiColumnName().getFamily();
  final String qualifier = columnDescriptor.getFijiColumnName().getQualifier();
  context.put(entity, family, qualifier, keyValue.getTimestamp(), fijiCell.getData());
 }
}

Javadoc

Context for Fiji bulk-importers or reducers to output to a Fiji table.

Most used methods

put
getEntityId
Creates an entity ID for the specified Fiji row key.
incrementCounter
close
flush

Popular in Java

Making http requests using okhttp
startActivity (Activity)
getSupportFragmentManager (FragmentActivity)
getContentResolver (Context)
EOFException (java.io)
Thrown when a program encounters the end of a file or stream during an input operation.
IOException (java.io)
Signals a general, I/O-related error. Error details may be specified when calling the constructor, a
GregorianCalendar (java.util)
GregorianCalendar is a concrete subclass of Calendarand provides the standard calendar used by most
Scanner (java.util)
A parser that parses a text string of primitive types and strings with the help of regular expressio
Point (java.awt)
A point representing a location in (x,y) coordinate space, specified in integer precision.
JTable (javax.swing)
Github Copilot alternatives

How to useFijiTableContext in com.moz.fiji.mapreduce

Best Java code snippets using com.moz.fiji.mapreduce.FijiTableContext (Showing top 19 results out of 315)

How to use
FijiTableContext
in
com.moz.fiji.mapreduce