edu.stanford.nlp.sequences.PlainTextDocumentReaderAndWriter java code examples

PlainTextDocumentReaderAndWriter<CoreLabel> readerAndWriter = new PlainTextDocumentReaderAndWriter<>();
ObjectBank<List<CoreLabel>> ob = new ObjectBank<>(new ReaderIteratorFactory(reader), readerAndWriter);
PrintWriter pw = new PrintWriter(writer);
  cl.set(CoreAnnotations.AnswerAnnotation.class, tw.tag());
 readerAndWriter.printAnswers(sentence, pw, outputStyle, true);
 ++numSentences;

@Override
public void init(SeqClassifierFlags flags) {
 String options = "tokenizeNLs=false,invertible=true";
 if (flags.tokenizerOptions != null) {
  options = options + ',' + flags.tokenizerOptions;
 }
 TokenizerFactory<IN> factory;
 if (flags.tokenizerFactory != null) {
  try {
   Class<TokenizerFactory<? extends HasWord>> clazz = ErasureUtils.uncheckedCast(Class.forName(flags.tokenizerFactory));
   Method factoryMethod = clazz.getMethod("newCoreLabelTokenizerFactory", String.class);
   factory = ErasureUtils.uncheckedCast(factoryMethod.invoke(null, options));
  } catch (Exception e) {
   throw new RuntimeException(e);
  }
 } else {
  factory = ErasureUtils.uncheckedCast(PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory(options));
 }
 init(flags, factory);
}

case SLASH_TAGS:
 if (preserveSpacing) {
  printAnswersAsIsText(l, out);
 } else {
  printAnswersTokenizedText(l, out);
  printAnswersXML(l, out);
 } else {
  printAnswersTokenizedXML(l, out);
  printAnswersInlineXML(l, out);
 } else {
  printAnswersTokenizedInlineXML(l, out);
   printAnswersAsIsTextTsv(l, out);
  } else {
   printAnswersTokenizedTextTsv(l, out);
   printAnswersAsIsTextTabbed(l, out);
  } else {
   printAnswersTokenizedTextTabbed(l, out);

public void printAnswers(List<IN> l, PrintWriter out,
             OutputStyle outputStyle, boolean preserveSpacing) {
 switch (outputStyle) {
 case SLASH_TAGS:
  if (preserveSpacing) {
   printAnswersAsIsText(l, out);
  } else {
   printAnswersTokenizedText(l, out);
  }
  break;
 case XML:
  if (preserveSpacing) {
   printAnswersXML(l, out);
  } else {
   printAnswersTokenizedXML(l, out);
  }
  break;
 case INLINE_XML:
  if (preserveSpacing) {
   printAnswersInlineXML(l, out);
  } else {
   printAnswersTokenizedInlineXML(l, out);
  }
  break;
 default:
  throw new IllegalArgumentException(outputStyle +
                    " is an unsupported OutputStyle");
 }
}

 new PlainTextDocumentReaderAndWriter<IN>();
readerAndWriter.init(flags);
for (List<IN> doc : documents) {
 List<IN> docOutput = classify(doc);
 sb.append(readerAndWriter.getAnswers(docOutput, outFormat,
                    preserveSpacing));

public static void main(String[] args) throws Exception {
 Properties props = StringUtils.argsToProperties(args);
 NumberSequenceClassifier nsc =
  new NumberSequenceClassifier(props, true, props);
 String trainFile = nsc.flags.trainFile;
 String testFile = nsc.flags.testFile;
 String textFile = nsc.flags.textFile;
 String loadPath = nsc.flags.loadClassifier;
 String serializeTo = nsc.flags.serializeTo;
 if (loadPath != null) {
  nsc.loadClassifierNoExceptions(loadPath);
  nsc.flags.setProperties(props);
 } else if (trainFile != null) {
  nsc.train(trainFile);
 }
 if (serializeTo != null) {
  nsc.serializeClassifier(serializeTo);
 }
 if (testFile != null) {
  nsc.classifyAndWriteAnswers(testFile, nsc.makeReaderAndWriter(), true);
 }
 if (textFile != null) {
  DocumentReaderAndWriter<CoreLabel> readerAndWriter =
      new PlainTextDocumentReaderAndWriter<>();
  nsc.classifyAndWriteAnswers(textFile, readerAndWriter, false);
 }
} // end main

public String getAnswers(List<IN> l,
             OutputStyle outputStyle, boolean preserveSpacing) {
 StringWriter sw = new StringWriter();
 PrintWriter pw = new PrintWriter(sw);
 printAnswers(l, pw, outputStyle, preserveSpacing);
 pw.flush();
 return sw.toString();
}

if (textDocumentReaderAndWriter instanceof PlainTextDocumentReaderAndWriter) {
 sb.append(((PlainTextDocumentReaderAndWriter<IN>) textDocumentReaderAndWriter).getAnswers(docOutput, outFormat, preserveSpacing));
} else {
 StringWriter sw = new StringWriter();

public void printAnswers(List<IN> l, PrintWriter out,
             OutputStyle outputStyle, boolean preserveSpacing) {
 switch (outputStyle) {
 case SLASH_TAGS:
  if (preserveSpacing) {
   printAnswersAsIsText(l, out);
  } else {
   printAnswersTokenizedText(l, out);
  }
  break;
 case XML:
  if (preserveSpacing) {
   printAnswersXML(l, out);
  } else {
   printAnswersTokenizedXML(l, out);
  }
  break;
 case INLINE_XML:
  if (preserveSpacing) {
   printAnswersInlineXML(l, out);
  } else {
   printAnswersTokenizedInlineXML(l, out);
  }
  break;
 default:
  throw new IllegalArgumentException(outputStyle +
                    " is an unsupported OutputStyle");
 }
}

    new PlainTextDocumentReaderAndWriter<>();
cmm.classifyAndWriteAnswers(textFile, readerAndWriter, false);

/**
 * Print the classifications for the document to the given Writer. This method
 * now checks the {@code outputFormat} property, and can print in
 * slashTags, inlineXML, xml (stand-Off XML), tsv, or a 3-column tabbed format
 * for easy entity retrieval. For both the XML output
 * formats, it preserves spacing, while for the other formats, it prints
 * tokenized (since preserveSpacing output is somewhat dysfunctional with these
 * formats, but you can control this by calling getAnswers()).
 *
 * @param list List of tokens with classifier answers
 * @param out Where to print the output to
 */
@Override
public void printAnswers(List<IN> list, PrintWriter out) {
 String style = null;
 if (flags != null) {
  style = flags.outputFormat;
 }
 if (style == null || style.isEmpty()) {
  style = "slashTags";
 }
 OutputStyle outputStyle = OutputStyle.fromShortName(style);
 printAnswers(list, out, outputStyle, OutputStyle.defaultToPreserveSpacing(style));
}

      .getAnswers(docOutput, outFormat, preserveSpacing));
} else {
  StringWriter sw = new StringWriter();

case SLASH_TAGS:
 if (preserveSpacing) {
  printAnswersAsIsText(l, out);
 } else {
  printAnswersTokenizedText(l, out);
  printAnswersXML(l, out);
 } else {
  printAnswersTokenizedXML(l, out);
  printAnswersInlineXML(l, out);
 } else {
  printAnswersTokenizedInlineXML(l, out);
   printAnswersAsIsTextTsv(l, out);
  } else {
   printAnswersTokenizedTextTsv(l, out);
   printAnswersAsIsTextTabbed(l, out);
  } else {
   printAnswersTokenizedTextTabbed(l, out);

PlainTextDocumentReaderAndWriter<CoreLabel> readerAndWriter = new PlainTextDocumentReaderAndWriter<>();
ObjectBank<List<CoreLabel>> ob = new ObjectBank<>(new ReaderIteratorFactory(reader), readerAndWriter);
PrintWriter pw = new PrintWriter(writer);
  cl.set(CoreAnnotations.AnswerAnnotation.class, tw.tag());
 readerAndWriter.printAnswers(sentence, pw, outputStyle, true);
 ++numSentences;

/**
 * Classify the contents of a file.
 *
 * @param filename
 *          Contains the sentence(s) to be classified.
 * @return {@link List} of classified List of IN.
 */
public List<List<IN>> classifyFile(String filename) {
 DocumentReaderAndWriter<IN> readerAndWriter =
  new PlainTextDocumentReaderAndWriter<IN>();
 readerAndWriter.init(flags);
 ObjectBank<List<IN>> documents =
  makeObjectBankFromFile(filename, readerAndWriter);
 List<List<IN>> result = new ArrayList<List<IN>>();
 for (List<IN> document : documents) {
  // System.err.println(document);
  classify(document);
  List<IN> sentence = new ArrayList<IN>();
  for (IN wi : document) {
   sentence.add(wi);
   // System.err.println(wi);
  }
  result.add(sentence);
 }
 return result;
}

public String getAnswers(List<IN> l,
             OutputStyle outputStyle, boolean preserveSpacing) {
 StringWriter sw = new StringWriter();
 PrintWriter pw = new PrintWriter(sw);
 printAnswers(l, pw, outputStyle, preserveSpacing);
 pw.flush();
 return sw.toString();
}

public void init(SeqClassifierFlags flags, TokenizerFactory<IN> tokenizerFactory) {
 if (flags.tokenFactory == null)
  tokenFactory = (CoreTokenFactory<IN>) new CoreLabelTokenFactory();
 else {
  try {
   this.tokenFactory = (CoreTokenFactory<IN>) Class.forName(flags.tokenFactory).newInstance();
  } catch (Exception e) {
   throw new RuntimeException(e);
  }
 }
 init(flags, tokenizerFactory, tokenFactory);
}

if (textDocumentReaderAndWriter instanceof PlainTextDocumentReaderAndWriter) {
 sb.append(((PlainTextDocumentReaderAndWriter<IN>) textDocumentReaderAndWriter).getAnswers(docOutput, outFormat, preserveSpacing));
} else {
 StringWriter sw = new StringWriter();

case SLASH_TAGS:
 if (preserveSpacing) {
  printAnswersAsIsText(l, out);
 } else {
  printAnswersTokenizedText(l, out);
  printAnswersXML(l, out);
 } else {
  printAnswersTokenizedXML(l, out);
  printAnswersInlineXML(l, out);
 } else {
  printAnswersTokenizedInlineXML(l, out);
   printAnswersAsIsTextTsv(l, out);
  } else {
   printAnswersTokenizedTextTsv(l, out);
   printAnswersAsIsTextTabbed(l, out);
  } else {
   printAnswersTokenizedTextTabbed(l, out);

PlainTextDocumentReaderAndWriter<CoreLabel> readerAndWriter = new PlainTextDocumentReaderAndWriter<>();
ObjectBank<List<CoreLabel>> ob = new ObjectBank<>(new ReaderIteratorFactory(reader), readerAndWriter);
PrintWriter pw = new PrintWriter(writer);
  cl.set(CoreAnnotations.AnswerAnnotation.class, tw.tag());
 readerAndWriter.printAnswers(sentence, pw, outputStyle, true);
 ++numSentences;

Javadoc

This class provides methods for reading plain text documents and writing out those documents once classified in several different formats. The output formats are named: slashTags, xml, inlineXML, tsv, tabbedEntities. Implementation note: see itest/src/edu/stanford/nlp/ie/crf/CRFClassifierITest.java for examples and test cases for the output options. This class works over a list of anything that extends CoreMap. The usual case is CoreLabel.

Most used methods

<init>
Construct a PlainTextDocumentReaderAndWriter. You should call init() after using the constructor.
init
printAnswers
printAnswersAsIsText
printAnswersInlineXML
printAnswersTokenizedInlineXML
printAnswersTokenizedText
printAnswersTokenizedXML
printAnswersXML
getAnswers
printAnswersAsIsTextTabbed
printAnswersAsIsTextTsv

Popular in Java

Start an intent from android
getExternalFilesDir (Context)
getSystemService (Context)
getSupportFragmentManager (FragmentActivity)
BufferedReader (java.io)
Wraps an existing Reader and buffers the input. Expensive interaction with the underlying reader is
Timer (java.util)
Timers schedule one-shot or recurring TimerTask for execution. Prefer java.util.concurrent.Scheduled
ReentrantLock (java.util.concurrent.locks)
A reentrant mutual exclusion Lock with the same basic behavior and semantics as the implicit monitor
HttpServlet (javax.servlet.http)
Provides an abstract class to be subclassed to create an HTTP servlet suitable for a Web site. A sub
Base64 (org.apache.commons.codec.binary)
Provides Base64 encoding and decoding as defined by RFC 2045.This class implements section 6.8. Base
JFileChooser (javax.swing)
Best IntelliJ plugins

How to usePlainTextDocumentReaderAndWriter in edu.stanford.nlp.sequences

Best Java code snippets using edu.stanford.nlp.sequences.PlainTextDocumentReaderAndWriter (Showing top 20 results out of 315)

How to use
PlainTextDocumentReaderAndWriter
in
edu.stanford.nlp.sequences