edu.stanford.nlp.ling.tokensregex.CoreMapExpressionExtractor java code examples

public CoreMapExpressionExtractor getExpressionExtractor(Env env, Reader r) throws ParseException, TokenSequenceParseException {
  try{
    TokenSequenceParser p = new TokenSequenceParser(r);
    List<SequenceMatchRules.Rule> rules = p.RuleList(env);
    return new CoreMapExpressionExtractor(env, rules);
  }catch(TokenMgrError error){
    throw new TokenSequenceParseException("Parsing failed. Error: " + error);
  }
}

  log.info("extractExpressions() extracting with " + basicExtractRule + " from " + annotation + " gives " + matchedExpressions);
 annotateExpressions(annotation, matchedExpressions);
 matchedExpressions = MatchedExpression.removeNullValues(matchedExpressions);
 matchedExpressions = MatchedExpression.removeNested(matchedExpressions);
SequenceMatchRules.ExtractRule<List<? extends CoreMap>, T> compositeExtractRule = stage.compositeExtractRule;
if (compositeExtractRule != null) {
 Pair<List<? extends CoreMap>, List<T>> p = applyCompositeRule(
     compositeExtractRule, merged, matchedExpressions, stage.limitIters);
 merged = p.first();
 matchedExpressions = p.second();
matchedExpressions = filterInvalidExpressions(stage.filterRule, matchedExpressions);
cleanupTags(annotation);

public void updateExpressionExtractor(CoreMapExpressionExtractor extractor, Reader r) throws ParseException, TokenSequenceParseException {
  try{
    TokenSequenceParser p = new TokenSequenceParser(r);
    List<SequenceMatchRules.Rule> rules = p.RuleList(extractor.getEnv());
    extractor.appendRules(rules);
  }catch(TokenMgrError error){
    throw new TokenSequenceParseException("Parsing failed. Error: " + error);
  }
}

private List<CoreMap> extract(CoreMap annotation) {
 List<CoreMap> cms;
 if (options.extractWithTokens) {
  cms = extractor.extractCoreMapsMergedWithTokens(annotation);
 } else {
  cms = extractor.extractCoreMaps(annotation);
 }
 if (options.flatten) {
  return extractor.flatten(cms);
 } else {
  return cms;
 }
}

public KBPTokensregexExtractor(String tokensregexDir, boolean verbose) {
 if (verbose)
  logger.log("Creating TokensRegexExtractor");
 // Create extractors
 for (RelationType rel : RelationType.values()) {
  String relFileNameComponent = rel.canonicalName.replaceAll(":", "_");
  String path = tokensregexDir + File.separator + relFileNameComponent.replaceAll("/", "SLASH") + ".rules";
  if (IOUtils.existsInClasspathOrFileSystem(path)) {
   List<String> listFiles = new ArrayList<>();
   listFiles.add(tokensregexDir + File.separator + "defs.rules");
   listFiles.add(path);
   if (verbose)
    logger.log("Rule files for relation " + rel + " is " + path);
   Env env = TokenSequencePattern.getNewEnv();
   env.bind("collapseExtractionRules", true);
   env.bind("verbose", verbose);
   CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.createExtractorFromFiles(env, listFiles).keepTemporaryTags();
   rules.put(rel, extr);
  }
 }
}

/**
 * Creates an extractor using the specified environment, and reading the rules from the given filename.
 * @param env
 * @param filename
 * @throws RuntimeException
 */
public static CoreMapExpressionExtractor createExtractorFromFile(Env env, String filename) throws RuntimeException {
 return createExtractorFromFiles(env, Collections.singletonList(filename));
}

private void cleanupTags(CoreMap cm) {
 cleanupTags(cm, new IdentityHashMap<>());
}

private List<CoreMap> extractCoreMapsToList(List<CoreMap> res, CoreMap annotation) {
 List<T> exprs = extractExpressions(annotation);
 for (T expr : exprs) {
  res.add(expr.getAnnotation());
 }
 return res;
}

   .createExtractorFromFiles(TokenSequencePattern.getNewEnv(), rules);
     token.get(CoreAnnotations.PartOfSpeechAnnotation.class) + ", ne=" + token.get(CoreAnnotations.NamedEntityTagAnnotation.class));
List<MatchedExpression> matchedExpressions = extractor.extractExpressions(sentence);
for (MatchedExpression matched : matchedExpressions) {

/**
 * Creates an instance with the specified environment and list of rules
 * @param env Environment to use for binding variables and applying rules
 * @param rules List of rules for this extractor
 */
public CoreMapExpressionExtractor(Env env, List<SequenceMatchRules.Rule> rules) {
 this(env);
 appendRules(rules);
}

/**
 * Returns list of coremaps that matches the specified rules.
 *
 * @param annotation
 */
public List<CoreMap> extractCoreMaps(CoreMap annotation) {
 List<CoreMap> res = new ArrayList<>();
 return extractCoreMapsToList(res, annotation);
}

/**
 * Returns list of merged tokens and original tokens.
 *
 * @param annotation
 */
public List<CoreMap> extractCoreMapsMergedWithTokens(CoreMap annotation) {
 List<CoreMap> res = extractCoreMaps(annotation);
 Integer startTokenOffset = annotation.get(CoreAnnotations.TokenBeginAnnotation.class);
 if (startTokenOffset == null) {
  startTokenOffset = 0;
 }
 final Integer startTokenOffsetFinal = startTokenOffset;
 List<CoreMap> merged = CollectionUtils.mergeListWithSortedMatchedPreAggregated(
     annotation.get(tokensAnnotationKey), res, (CoreMap in) -> Interval.toInterval(in.get(CoreAnnotations.TokenBeginAnnotation.class) - startTokenOffsetFinal,
         in.get(CoreAnnotations.TokenEndAnnotation.class) - startTokenOffsetFinal)
 );
 return merged;
}

if (verbose && extracted) log.info("applyCompositeRule() extracting with " + compositeExtractRule + " from " + merged + " gives " + newExprs);
if (extracted) {
 annotateExpressions(merged, newExprs);
 newExprs = MatchedExpression.removeNullValues(newExprs);
 if ( ! newExprs.isEmpty()) {

private List<CoreMap> extract(CoreMap annotation) {
 List<CoreMap> cms;
 if (options.extractWithTokens) {
  cms = extractor.extractCoreMapsMergedWithTokens(annotation);
 } else {
  cms = extractor.extractCoreMaps(annotation);
 }
 if (options.flatten) {
  return extractor.flatten(cms);
 } else {
  return cms;
 }
}

/**
 * Creates an extractor using the specified environment, and reading the rules from the given filenames.
 * @param env
 * @param filenames
 * @throws RuntimeException
 */
public static <M extends MatchedExpression> CoreMapExpressionExtractor<M> createExtractorFromFiles(Env env, String... filenames) throws RuntimeException {
 return createExtractorFromFiles(env, Arrays.asList(filenames));
}

private void cleanupTags(Collection objs, Map<Object, Boolean> cleaned) {
 for (Object obj:objs) {
  if (!cleaned.containsKey(obj)) {
   cleaned.put(obj, false);
   if (obj instanceof CoreMap) {
    cleanupTags((CoreMap) obj, cleaned);
   } else if (obj instanceof Collection) {
    cleanupTags((Collection) obj, cleaned);
   }
   cleaned.put(obj, true);
  }
 }
}

public List<MatchedExpression> extract(CoreMap annotation) {
 if (!annotation.containsKey(CoreAnnotations.NumerizedTokensAnnotation.class)) {
  List<CoreMap> mergedNumbers = NumberNormalizer.findAndMergeNumbers(annotation);
  annotation.set(CoreAnnotations.NumerizedTokensAnnotation.class, mergedNumbers);
 }
 return extractor.extractExpressions(annotation);
}

/**
 * Creates an instance with the specified environment and list of rules
 * @param env Environment to use for binding variables and applying rules
 * @param rules List of rules for this extractor
 */
public CoreMapExpressionExtractor(Env env, List<SequenceMatchRules.Rule> rules) {
 this(env);
 appendRules(rules);
}

/**
 * Returns list of coremaps that matches the specified rules.
 *
 * @param annotation
 */
public List<CoreMap> extractCoreMaps(CoreMap annotation) {
 List<CoreMap> res = new ArrayList<>();
 return extractCoreMapsToList(res, annotation);
}

/**
 * Returns list of merged tokens and original tokens
 * @param annotation
 */
public List<CoreMap> extractCoreMapsMergedWithTokens(CoreMap annotation)
{
 List<CoreMap> res = extractCoreMaps(annotation);
 Integer startTokenOffset = annotation.get(CoreAnnotations.TokenBeginAnnotation.class);
 if (startTokenOffset == null) {
  startTokenOffset = 0;
 }
 final Integer startTokenOffsetFinal = startTokenOffset;
 List<CoreMap> merged = CollectionUtils.mergeListWithSortedMatchedPreAggregated(
     (List<CoreMap>) annotation.get(tokensAnnotationKey), res, new Function<CoreMap, Interval<Integer>>() {
  public Interval<Integer> apply(CoreMap in) {
   return Interval.toInterval(in.get(CoreAnnotations.TokenBeginAnnotation.class) - startTokenOffsetFinal,
       in.get(CoreAnnotations.TokenEndAnnotation.class) - startTokenOffsetFinal);
  }
 });
 return merged;
}

Javadoc

Represents a list of assignment and extraction rules over sequence patterns. See SequenceMatchRules for the syntax of rules.

Assignment rules are used to assign a value to a variable for later use in extraction rules or for expansions in patterns.

Extraction rules are used to extract text/tokens matching regular expressions. Extraction rules are grouped into stages, with each stage consisting of the following:

Matching of rules over text and tokens. These rules are applied directly on the text and tokens fields of the CoreMap.
Matching of composite rules. Matched expression are merged, and composite rules are applied recursively until no more changes to the matched expressions are detected.
Filtering of an invalid expression. In the final phase, a final filtering stage filters out invalid expressions.

The different stages are numbered and are applied in numeric order.

Most used methods

<init>
Creates an instance with the specified environment and list of rules
annotateExpressions
appendRules
Add specified rules to this extractor
applyCompositeRule
cleanupTags
createExtractorFromFiles
Creates an extractor using the specified environment, and reading the rules from the given filenames
extractCoreMaps
Returns list of coremaps that matches the specified rules
extractCoreMapsToList
extractExpressions
filterInvalidExpressions
flatten
getEnv

Popular in Java

Parsing JSON documents to java classes using gson
addToBackStack (FragmentTransaction)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
scheduleAtFixedRate (ScheduledExecutorService)
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
URI (java.net)
A Uniform Resource Identifier that identifies an abstract or physical resource, as specified by RFC
Timestamp (java.sql)
A Java representation of the SQL TIMESTAMP type. It provides the capability of representing the SQL
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
Reflections (org.reflections)
Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
Top Sublime Text plugins

How to useCoreMapExpressionExtractor in edu.stanford.nlp.ling.tokensregex

Best Java code snippets using edu.stanford.nlp.ling.tokensregex.CoreMapExpressionExtractor (Showing top 20 results out of 315)

How to use
CoreMapExpressionExtractor
in
edu.stanford.nlp.ling.tokensregex