PlainTextDocumentReaderAndWriter<CoreLabel> readerAndWriter = new PlainTextDocumentReaderAndWriter<>(); ObjectBank<List<CoreLabel>> ob = new ObjectBank<>(new ReaderIteratorFactory(reader), readerAndWriter); PrintWriter pw = new PrintWriter(writer); cl.set(CoreAnnotations.AnswerAnnotation.class, tw.tag()); readerAndWriter.printAnswers(sentence, pw, outputStyle, true); ++numSentences;
@Override public void init(SeqClassifierFlags flags) { String options = "tokenizeNLs=false,invertible=true"; if (flags.tokenizerOptions != null) { options = options + ',' + flags.tokenizerOptions; } TokenizerFactory<IN> factory; if (flags.tokenizerFactory != null) { try { Class<TokenizerFactory<? extends HasWord>> clazz = ErasureUtils.uncheckedCast(Class.forName(flags.tokenizerFactory)); Method factoryMethod = clazz.getMethod("newCoreLabelTokenizerFactory", String.class); factory = ErasureUtils.uncheckedCast(factoryMethod.invoke(null, options)); } catch (Exception e) { throw new RuntimeException(e); } } else { factory = ErasureUtils.uncheckedCast(PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory(options)); } init(flags, factory); }
case SLASH_TAGS: if (preserveSpacing) { printAnswersAsIsText(l, out); } else { printAnswersTokenizedText(l, out); printAnswersXML(l, out); } else { printAnswersTokenizedXML(l, out); printAnswersInlineXML(l, out); } else { printAnswersTokenizedInlineXML(l, out); printAnswersAsIsTextTsv(l, out); } else { printAnswersTokenizedTextTsv(l, out); printAnswersAsIsTextTabbed(l, out); } else { printAnswersTokenizedTextTabbed(l, out);
public void printAnswers(List<IN> l, PrintWriter out, OutputStyle outputStyle, boolean preserveSpacing) { switch (outputStyle) { case SLASH_TAGS: if (preserveSpacing) { printAnswersAsIsText(l, out); } else { printAnswersTokenizedText(l, out); } break; case XML: if (preserveSpacing) { printAnswersXML(l, out); } else { printAnswersTokenizedXML(l, out); } break; case INLINE_XML: if (preserveSpacing) { printAnswersInlineXML(l, out); } else { printAnswersTokenizedInlineXML(l, out); } break; default: throw new IllegalArgumentException(outputStyle + " is an unsupported OutputStyle"); } }
new PlainTextDocumentReaderAndWriter<IN>(); readerAndWriter.init(flags); for (List<IN> doc : documents) { List<IN> docOutput = classify(doc); sb.append(readerAndWriter.getAnswers(docOutput, outFormat, preserveSpacing));
public static void main(String[] args) throws Exception { Properties props = StringUtils.argsToProperties(args); NumberSequenceClassifier nsc = new NumberSequenceClassifier(props, true, props); String trainFile = nsc.flags.trainFile; String testFile = nsc.flags.testFile; String textFile = nsc.flags.textFile; String loadPath = nsc.flags.loadClassifier; String serializeTo = nsc.flags.serializeTo; if (loadPath != null) { nsc.loadClassifierNoExceptions(loadPath); nsc.flags.setProperties(props); } else if (trainFile != null) { nsc.train(trainFile); } if (serializeTo != null) { nsc.serializeClassifier(serializeTo); } if (testFile != null) { nsc.classifyAndWriteAnswers(testFile, nsc.makeReaderAndWriter(), true); } if (textFile != null) { DocumentReaderAndWriter<CoreLabel> readerAndWriter = new PlainTextDocumentReaderAndWriter<>(); nsc.classifyAndWriteAnswers(textFile, readerAndWriter, false); } } // end main
public String getAnswers(List<IN> l, OutputStyle outputStyle, boolean preserveSpacing) { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); printAnswers(l, pw, outputStyle, preserveSpacing); pw.flush(); return sw.toString(); }
if (textDocumentReaderAndWriter instanceof PlainTextDocumentReaderAndWriter) { sb.append(((PlainTextDocumentReaderAndWriter<IN>) textDocumentReaderAndWriter).getAnswers(docOutput, outFormat, preserveSpacing)); } else { StringWriter sw = new StringWriter();
public void printAnswers(List<IN> l, PrintWriter out, OutputStyle outputStyle, boolean preserveSpacing) { switch (outputStyle) { case SLASH_TAGS: if (preserveSpacing) { printAnswersAsIsText(l, out); } else { printAnswersTokenizedText(l, out); } break; case XML: if (preserveSpacing) { printAnswersXML(l, out); } else { printAnswersTokenizedXML(l, out); } break; case INLINE_XML: if (preserveSpacing) { printAnswersInlineXML(l, out); } else { printAnswersTokenizedInlineXML(l, out); } break; default: throw new IllegalArgumentException(outputStyle + " is an unsupported OutputStyle"); } }
new PlainTextDocumentReaderAndWriter<>(); cmm.classifyAndWriteAnswers(textFile, readerAndWriter, false);
/** * Print the classifications for the document to the given Writer. This method * now checks the {@code outputFormat} property, and can print in * slashTags, inlineXML, xml (stand-Off XML), tsv, or a 3-column tabbed format * for easy entity retrieval. For both the XML output * formats, it preserves spacing, while for the other formats, it prints * tokenized (since preserveSpacing output is somewhat dysfunctional with these * formats, but you can control this by calling getAnswers()). * * @param list List of tokens with classifier answers * @param out Where to print the output to */ @Override public void printAnswers(List<IN> list, PrintWriter out) { String style = null; if (flags != null) { style = flags.outputFormat; } if (style == null || style.isEmpty()) { style = "slashTags"; } OutputStyle outputStyle = OutputStyle.fromShortName(style); printAnswers(list, out, outputStyle, OutputStyle.defaultToPreserveSpacing(style)); }
.getAnswers(docOutput, outFormat, preserveSpacing)); } else { StringWriter sw = new StringWriter();
case SLASH_TAGS: if (preserveSpacing) { printAnswersAsIsText(l, out); } else { printAnswersTokenizedText(l, out); printAnswersXML(l, out); } else { printAnswersTokenizedXML(l, out); printAnswersInlineXML(l, out); } else { printAnswersTokenizedInlineXML(l, out); printAnswersAsIsTextTsv(l, out); } else { printAnswersTokenizedTextTsv(l, out); printAnswersAsIsTextTabbed(l, out); } else { printAnswersTokenizedTextTabbed(l, out);
PlainTextDocumentReaderAndWriter<CoreLabel> readerAndWriter = new PlainTextDocumentReaderAndWriter<>(); ObjectBank<List<CoreLabel>> ob = new ObjectBank<>(new ReaderIteratorFactory(reader), readerAndWriter); PrintWriter pw = new PrintWriter(writer); cl.set(CoreAnnotations.AnswerAnnotation.class, tw.tag()); readerAndWriter.printAnswers(sentence, pw, outputStyle, true); ++numSentences;
/** * Classify the contents of a file. * * @param filename * Contains the sentence(s) to be classified. * @return {@link List} of classified List of IN. */ public List<List<IN>> classifyFile(String filename) { DocumentReaderAndWriter<IN> readerAndWriter = new PlainTextDocumentReaderAndWriter<IN>(); readerAndWriter.init(flags); ObjectBank<List<IN>> documents = makeObjectBankFromFile(filename, readerAndWriter); List<List<IN>> result = new ArrayList<List<IN>>(); for (List<IN> document : documents) { // System.err.println(document); classify(document); List<IN> sentence = new ArrayList<IN>(); for (IN wi : document) { sentence.add(wi); // System.err.println(wi); } result.add(sentence); } return result; }
public String getAnswers(List<IN> l, OutputStyle outputStyle, boolean preserveSpacing) { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); printAnswers(l, pw, outputStyle, preserveSpacing); pw.flush(); return sw.toString(); }
public void init(SeqClassifierFlags flags, TokenizerFactory<IN> tokenizerFactory) { if (flags.tokenFactory == null) tokenFactory = (CoreTokenFactory<IN>) new CoreLabelTokenFactory(); else { try { this.tokenFactory = (CoreTokenFactory<IN>) Class.forName(flags.tokenFactory).newInstance(); } catch (Exception e) { throw new RuntimeException(e); } } init(flags, tokenizerFactory, tokenFactory); }
if (textDocumentReaderAndWriter instanceof PlainTextDocumentReaderAndWriter) { sb.append(((PlainTextDocumentReaderAndWriter<IN>) textDocumentReaderAndWriter).getAnswers(docOutput, outFormat, preserveSpacing)); } else { StringWriter sw = new StringWriter();
case SLASH_TAGS: if (preserveSpacing) { printAnswersAsIsText(l, out); } else { printAnswersTokenizedText(l, out); printAnswersXML(l, out); } else { printAnswersTokenizedXML(l, out); printAnswersInlineXML(l, out); } else { printAnswersTokenizedInlineXML(l, out); printAnswersAsIsTextTsv(l, out); } else { printAnswersTokenizedTextTsv(l, out); printAnswersAsIsTextTabbed(l, out); } else { printAnswersTokenizedTextTabbed(l, out);
PlainTextDocumentReaderAndWriter<CoreLabel> readerAndWriter = new PlainTextDocumentReaderAndWriter<>(); ObjectBank<List<CoreLabel>> ob = new ObjectBank<>(new ReaderIteratorFactory(reader), readerAndWriter); PrintWriter pw = new PrintWriter(writer); cl.set(CoreAnnotations.AnswerAnnotation.class, tw.tag()); readerAndWriter.printAnswers(sentence, pw, outputStyle, true); ++numSentences;