edu.stanford.nlp.sequences.ObjectBankWrapper java code examples

private DFSA<String, Integer> getViterbiSearchGraph(List<IN> doc, Class<? extends CoreAnnotation<String>> answerField) {
 if (doc.isEmpty()) {
  return new DFSA<>(null);
 }
 // TODO get rid of ObjectBankWrapper
 ObjectBankWrapper<IN> obw = new ObjectBankWrapper<>(flags, null, knownLCWords);
 doc = obw.processDocument(doc);
 SequenceModel model = getSequenceModel(doc);
 return ViterbiSearchGraphBuilder.getGraph(model, classIndex);
}

public List<IN> processDocument(List<IN> doc) {
 if (flags.mergeTags) { mergeTags(doc); }
 if (flags.iobTags) { iobTags(doc); }
 doBasicStuff(doc);
 return doc;
}

 String s = intern(WordShapeClassifier.wordShape(word, flags.wordShape, knownLCWords));
 fl.set(CoreAnnotations.ShapeAnnotation.class, s);
 fl.set(CoreAnnotations.CharAnnotation.class,intern(fix(fl.get(CoreAnnotations.CharAnnotation.class))));
} else {
 fl.set(CoreAnnotations.TextAnnotation.class, intern(fix(fl.get(CoreAnnotations.TextAnnotation.class))));

public ObjectBank<List<IN>> makeObjectBankFromFiles(Collection<File> files,
                          DocumentReaderAndWriter<IN> readerAndWriter) {
 if (files.isEmpty()) {
  throw new RuntimeException("Attempt to make ObjectBank with empty file list");
 }
 // return new ObjectBank<List<IN>>(new
 // ResettableReaderIteratorFactory(files, flags.inputEncoding),
 // readerAndWriter);
 // TODO get rid of ObjectBankWrapper
 return new ObjectBankWrapper<>(flags, new ObjectBank<>(new ResettableReaderIteratorFactory(files,
     flags.inputEncoding), readerAndWriter), knownLCWords);
}

public ObjectBank<List<IN>> makeObjectBankFromFiles(String baseDir, String filePattern,
                          DocumentReaderAndWriter<IN> readerAndWriter) {
 File path = new File(baseDir);
 FileFilter filter = new RegExFileFilter(Pattern.compile(filePattern));
 File[] origFiles = path.listFiles(filter);
 Collection<File> files = new ArrayList<>();
 for (File file : origFiles) {
  if (file.isFile()) {
   if (flags.announceObjectBankEntries) {
    log.info("Getting data from " + file + " (" + flags.inputEncoding + " encoding)");
   }
   files.add(file);
  }
 }
 if (files.isEmpty()) {
  throw new RuntimeException("No matching files: " + baseDir + '\t' + filePattern);
 }
 // return new ObjectBank<List<IN>>(new
 // ResettableReaderIteratorFactory(files, flags.inputEncoding),
 // readerAndWriter);
 // TODO get rid of ObjectBankWrapper
 return new ObjectBankWrapper<>(flags, new ObjectBank<>(new ResettableReaderIteratorFactory(files,
     flags.inputEncoding), readerAndWriter), knownLCWords);
}

private List<IN> preprocessTokens(List<? extends HasWord> tokenSequence) {
 // log.info("knownLCWords.size is " + knownLCWords.size() + "; knownLCWords.maxSize is " + knownLCWords.getMaxSize() +
 //                   ", prior to NER for " + getClass().toString());
 List<IN> document = new ArrayList<>();
 int i = 0;
 for (HasWord word : tokenSequence) {
  IN wi; // initialized below
  if (word instanceof CoreMap) {
   // copy all annotations! some are required later in
   // AbstractSequenceClassifier.classifyWithInlineXML
   // wi = (IN) new ArrayCoreMap((ArrayCoreMap) word);
   wi = tokenFactory.makeToken((IN) word);
  } else {
   wi = tokenFactory.makeToken();
   wi.set(CoreAnnotations.TextAnnotation.class, word.word());
   // wi.setWord(word.word());
  }
  wi.set(CoreAnnotations.PositionAnnotation.class, Integer.toString(i));
  wi.set(CoreAnnotations.AnswerAnnotation.class, backgroundSymbol());
  document.add(wi);
  i++;
 }
 // TODO get rid of ObjectBankWrapper
 ObjectBankWrapper<IN> wrapper = new ObjectBankWrapper<>(flags, null, knownLCWords);
 wrapper.processDocument(document);
 // log.info("Size of knownLCWords is " + knownLCWords.size() + ", after NER for " + getClass().toString());
 return document;
}

public List<IN> processDocument(List<IN> doc) {
 if (flags.mergeTags) { mergeTags(doc); }
 if (flags.iobTags) { iobTags(doc); }
 doBasicStuff(doc);
 return doc;
}

public ObjectBank<List<IN>> makeObjectBankFromFiles(String[] trainFileList,
                          DocumentReaderAndWriter<IN> readerAndWriter) {
 // try{
 Collection<File> files = new ArrayList<>();
 for (String trainFile : trainFileList) {
  File f = new File(trainFile);
  files.add(f);
 }
 // System.err.printf("trainFileList contains %d file%s in encoding %s.%n", files.size(), files.size() == 1 ? "": "s", flags.inputEncoding);
 // TODO get rid of ObjectBankWrapper
 // return new ObjectBank<List<IN>>(new
 // ResettableReaderIteratorFactory(files), readerAndWriter);
 return new ObjectBankWrapper<>(flags, new ObjectBank<>(new ResettableReaderIteratorFactory(files, flags.inputEncoding),
     readerAndWriter), knownLCWords);
 // } catch (IOException e) {
 // throw new RuntimeException(e);
 // }
}

private void doBasicStuff(List<IN> doc) {
 int position = 0;
 for (IN fl : doc) {
  // position in document
  fl.set(PositionAnnotation.class, Integer.toString((position++)));
  // word shape
  if ((flags.wordShape > WordShapeClassifier.NOWORDSHAPE) && (!flags.useShapeStrings)) {
   String s = intern(WordShapeClassifier.wordShape(fl.get(TextAnnotation.class), flags.wordShape, knownLCWords));
   fl.set(ShapeAnnotation.class, s);
  }
  // normalizing and interning
  // was the following; should presumably now be
  // if ("CTBSegDocumentReader".equalsIgnoreCase(flags.documentReader)) {
  if ("edu.stanford.nlp.wordseg.Sighan2005DocumentReaderAndWriter".equalsIgnoreCase(flags.readerAndWriter)) {
   // for Chinese segmentation, "word" is no use and ignore goldAnswer for memory efficiency.
   fl.set(CharAnnotation.class,intern(fix(fl.get(CharAnnotation.class))));
  } else {
   fl.set(TextAnnotation.class, intern(fix(fl.get(TextAnnotation.class))));
   fl.set(GoldAnswerAnnotation.class, fl.get(AnswerAnnotation.class));
  }
 }
}

ObjectBankWrapper<IN> obw = new ObjectBankWrapper<>(flags, null, knownLCWords);
doc = obw.processDocument(doc);

public List<IN> processDocument(List<IN> doc) {
 if (flags.mergeTags) { mergeTags(doc); }
 if (flags.iobTags) { iobTags(doc); }
 doBasicStuff(doc);
 return doc;
}

/**
 * Set up an ObjectBank that will allow one to iterate over a collection of
 * documents obtained from the passed in Reader. Each document will be
 * represented as a list of IN. If the ObjectBank iterator() is called until
 * hasNext() returns false, then the Reader will be read till end of file, but
 * no reading is done at the time of this call. Reading is done using the
 * reading method specified in {@code flags.documentReader}, and for some
 * reader choices, the column mapping given in {@code flags.map}.
 *
 * @param in
 *          Input data addNEWLCWords do we add new lowercase words from this
 *          data to the word shape classifier
 * @return The list of documents
 */
public ObjectBank<List<IN>> makeObjectBankFromReader(BufferedReader in,
                           DocumentReaderAndWriter<IN> readerAndWriter) {
 if (flags.announceObjectBankEntries) {
  log.info("Reading data using " + readerAndWriter.getClass());
 }
 // TODO get rid of ObjectBankWrapper
 // return new ObjectBank<List<IN>>(new ResettableReaderIteratorFactory(in),
 // readerAndWriter);
 return new ObjectBankWrapper<>(flags, new ObjectBank<>(new ResettableReaderIteratorFactory(in),
     readerAndWriter), knownLCWords);
}

 String s = intern(WordShapeClassifier.wordShape(word, flags.wordShape, knownLCWords));
 fl.set(CoreAnnotations.ShapeAnnotation.class, s);
 fl.set(CoreAnnotations.CharAnnotation.class,intern(fix(fl.get(CoreAnnotations.CharAnnotation.class))));
} else {
 fl.set(CoreAnnotations.TextAnnotation.class, intern(fix(fl.get(CoreAnnotations.TextAnnotation.class))));

private DFSA<String, Integer> getViterbiSearchGraph(List<IN> doc, Class<? extends CoreAnnotation<String>> answerField) {
 if (doc.isEmpty()) {
  return new DFSA<>(null);
 }
 // TODO get rid of ObjectBankWrapper
 ObjectBankWrapper<IN> obw = new ObjectBankWrapper<>(flags, null, knownLCWords);
 doc = obw.processDocument(doc);
 SequenceModel model = getSequenceModel(doc);
 return ViterbiSearchGraphBuilder.getGraph(model, classIndex);
}

public List<IN> processDocument(List<IN> doc) {
 if (flags.mergeTags) { mergeTags(doc); }
 if (flags.iobTags) { iobTags(doc); }
 doBasicStuff(doc);
 return doc;
}

/**
 * Reads a String into an ObjectBank object. NOTE: that the current
 * implementation of ReaderIteratorFactory will first try to interpret each
 * string as a filename, so this method will yield unwanted results if it
 * applies to a string that is at the same time a filename. It prints out a
 * warning, at least.
 *
 * @param string The String which will be the content of the ObjectBank
 * @return The ObjectBank
 */
public ObjectBank<List<IN>>
 makeObjectBankFromString(String string,
              DocumentReaderAndWriter<IN> readerAndWriter) {
 if (flags.announceObjectBankEntries) {
  log.info("Reading data using " + readerAndWriter.getClass());
  if (flags.inputEncoding == null) {
   log.info("Getting data from " + string + " (default encoding)");
  } else {
   log.info("Getting data from " + string + " (" + flags.inputEncoding + " encoding)");
  }
 }
 // return new ObjectBank<List<IN>>(new
 // ResettableReaderIteratorFactory(string), readerAndWriter);
 // TODO
 return new ObjectBankWrapper<>(flags, new ObjectBank<>(new ResettableReaderIteratorFactory(string),
     readerAndWriter), knownLCWords);
}

 String s = intern(WordShapeClassifier.wordShape(word, flags.wordShape, knownLCWords));
 fl.set(CoreAnnotations.ShapeAnnotation.class, s);
 fl.set(CoreAnnotations.CharAnnotation.class,intern(fix(fl.get(CoreAnnotations.CharAnnotation.class))));
} else {
 fl.set(CoreAnnotations.TextAnnotation.class, intern(fix(fl.get(CoreAnnotations.TextAnnotation.class))));
 fl.set(CoreAnnotations.GoldAnswerAnnotation.class, fl.get(CoreAnnotations.AnswerAnnotation.class));

public DFSA<String, Integer> getViterbiSearchGraph(List<IN> doc,
    Class<? extends CoreAnnotation<String>> answerField) {
  if (doc.isEmpty()) {
    return new DFSA<String, Integer>(null);
  }
  // TODO get rid of objectbankwrapper
  ObjectBankWrapper<IN> obw = new ObjectBankWrapper<IN>(flags, null,
      knownLCWords);
  doc = obw.processDocument(doc);
  SequenceModel model = getSequenceModel(doc);
  return ViterbiSearchGraphBuilder.getGraph(model, classIndex);
}

public ObjectBank<List<IN>> makeObjectBankFromFiles(Collection<File> files,
                          DocumentReaderAndWriter readerAndWriter) {
 if (files.isEmpty()) {
  throw new RuntimeException("Attempt to make ObjectBank with empty file list");
 }
 // return new ObjectBank<List<IN>>(new
 // ResettableReaderIteratorFactory(files, flags.inputEncoding),
 // readerAndWriter);
 // TODO get rid of objectbankwrapper
 return new ObjectBankWrapper<IN>(flags, new ObjectBank<List<IN>>(new ResettableReaderIteratorFactory(files,
   flags.inputEncoding), readerAndWriter), knownLCWords);
}

public DFSA<String, Integer> getViterbiSearchGraph(List<IN> doc, Class<? extends CoreAnnotation<String>> answerField) {
 if (doc.isEmpty()) {
  return new DFSA<String, Integer>(null);
 }
 // TODO get rid of objectbankwrapper
 ObjectBankWrapper<IN> obw = new ObjectBankWrapper<IN>(flags, null, knownLCWords);
 doc = obw.processDocument(doc);
 SequenceModel model = getSequenceModel(doc);
 return ViterbiSearchGraphBuilder.getGraph(model, classIndex);
}

Javadoc

This class is used to wrap the ObjectBank used by the sequence models and is where any sort of general processing, like the IOB mapping stuff and wordshape stuff, should go. It checks the SeqClassifierFlags to decide what to do. TODO: We should rearchitect this so that the FeatureFactory-specific stuff is done by a callback to the relevant FeatureFactory.

Most used methods

<init>
doBasicStuff
fix
intern
iobTags
mergeTags
Change some form of IOB/IOE encoding via forms like "I-PERS" to IO encoding as just "PERS".
processDocument

Popular in Java

Creating JSON documents from java classes using gson
findViewById (Activity)
runOnUiThread (Activity)
putExtra (Intent)
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
Socket (java.net)
Provides a client-side TCP socket.
URLConnection (java.net)
A connection to a URL for reading or writing. For HTTP connections, see HttpURLConnection for docume
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
Top plugins for Android Studio

How to useObjectBankWrapper in edu.stanford.nlp.sequences

Best Java code snippets using edu.stanford.nlp.sequences.ObjectBankWrapper (Showing top 20 results out of 315)

How to use
ObjectBankWrapper
in
edu.stanford.nlp.sequences