/** {@inheritDoc} */ @Override public void gather(FijiRowData input, GathererContext context) throws IOException { if (!input.containsColumn("info", "email")) { // No email data. return; } String email = input.getMostRecentValue("info", "email").toString(); int atSymbol = email.indexOf('@'); if (atSymbol < 0) { // Invalid email. return; } String domain = email.substring(atSymbol + 1); mDomain.set(domain); context.write(mDomain, ONE); }
/** {@inheritDoc} */ @Override public void produce(FijiRowData input, ProducerContext context) throws IOException { if (!input.containsColumn("info", "email")) { // This user doesn't have an email address. return; } String email = input.getMostRecentValue("info", "email").toString(); int atSymbol = email.indexOf('@'); if (atSymbol < 0) { // Couldn't find the '@' in the email address. Give up. return; } String domain = email.substring(atSymbol + 1); context.put(domain); } }
/** * This method will be called once for each row of the phonebook table. * * @param entityId The entity id for the row. * @param row The data from the row (in this case, it would only * include the address column because that is all we requested * when configuring the input format). * @param hadoopContext The MapReduce job context used to emit output. * @throws IOException If there is an IO error. */ @Override public void map(EntityId entityId, FijiRowData row, Context hadoopContext) throws IOException { // Check that the row has the info:address column. // The column names are specified as constants in the Fields.java class. if (!row.containsColumn(Fields.INFO_FAMILY, Fields.ADDRESS)) { LOG.info("Missing address field in row: " + entityId); hadoopContext.getCounter(Counter.MISSING_ADDRESS).increment(1L); return; } final String victimState = hadoopContext.getConfiguration().get(CONF_STATE, ""); final Address address = row.getMostRecentValue(Fields.INFO_FAMILY, Fields.ADDRESS); if (victimState.equals(address.getState().toString())) { // Delete the entry. mWriter.deleteRow(entityId); } }
/** {@inheritDoc} */ @Override public void produce(FijiRowData input, ProducerContext context) throws IOException { if (!input.containsColumn(getInputColumnName().getFamily(), getInputColumnName().getQualifier())) { LOG.debug("No " + getInputColumnName().getName() + " for entity: " + input.getEntityId()); } String string = input.getMostRecentValue(getInputColumnName().getFamily(), getInputColumnName().getQualifier()).toString(); // Run the regex on the input string. Matcher matcher = mPattern.matcher(string); if (matcher.matches()) { if (matcher.groupCount() == 1) { context.put(matcher.group(1)); } } else { LOG.debug(input.getEntityId().toString() + "'s data '" + string + "' does not match " + mPattern.pattern()); } } }
final FijiRowData rowData = reader.get(entityId, dataRequest); if (!rowData.containsColumn(Fields.INFO_FAMILY, Fields.FIRST_NAME)) {
if (!row.containsColumn(Fields.INFO_FAMILY, Fields.ADDRESS)) { LOG.info("Missing address field in row: " + entityId); hadoopContext.getCounter(Counter.MISSING_ADDRESS).increment(1L);