private void process(Node node, List<String> sentence, List<String> tags) { if (node != null) { for (TreeElement element : node.getElements()) { if (element.isLeaf()) { processLeaf((Leaf) element, sentence, tags); } else { process((Node) element, sentence, tags); } } } }
protected void processRoot(Node root, List<String> sentence, List<String> tags, List<String> target) { if (root != null) { TreeElement[] elements = root.getElements(); for (int i = 0; i < elements.length; i++) { if (elements[i].isLeaf()) { processLeaf((Leaf) elements[i], false, OTHER, sentence, tags, target); } else { processNode((Node) elements[i], sentence, tags, target, null); } } } }
/** * Creates a new {@link NameSample} stream from a line stream, i.e. * {@link ObjectStream}<{@link String}>, that could be a * {@link PlainTextByLineStream} object. * * @param lineStream * a stream of lines as {@link String} */ public ADChunkSampleStream(ObjectStream<String> lineStream) { this.adSentenceStream = new ADSentenceStream(lineStream); }
public ObjectStream<POSSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); language = params.getLang(); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new ADPOSSampleStream(lineStream, params.getExpandME(), params.getIncludeFeatures()); }
public ObjectStream<SentenceSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); language = params.getLang(); boolean includeTitle = params.getIncludeTitles(); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new ADSentenceSampleStream(lineStream, includeTitle); } }
/** * Recursive method to process a node in Arvores Deitadas format. * * @param node * the node to be processed * @param sentence * the sentence tokens we got so far * @param names * the names we got so far */ private void process(Node node, List<String> sentence, List<Span> names) { if (node != null) { for (TreeElement element : node.getElements()) { if (element.isLeaf()) { processLeaf((Leaf) element, sentence, names); } else { process((Node) element, sentence, names); } } } }
public ObjectStream<NameSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); language = params.getLang(); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new ADNameSampleStream(lineStream, params.getSplitHyphenatedTokens()); } }
public POSSample read() throws IOException { Sentence paragraph; while ((paragraph = this.adSentenceStream.read()) != null) { Node root = paragraph.getRoot(); List<String> sentence = new ArrayList<>(); List<String> tags = new ArrayList<>(); process(root, sentence, tags); return new POSSample(sentence, tags); } return null; }
@Test public void testSimpleReading() throws IOException { int count = 0; ADSentenceStream stream = openData(); ADSentenceStream.Sentence paragraph = stream.read(); paragraph.getRoot(); while (paragraph != null) { count++; paragraph = stream.read(); // paragraph.getRoot(); } Assert.assertEquals(ADParagraphStreamTest.NUM_SENTENCES, count); }
public static void registerFactory() { StreamFactoryRegistry.registerFactory(TokenSample.class, "ad", new ADTokenSampleStreamFactory(Parameters.class)); }
protected String getChunkTag(Leaf leaf) { String tag = leaf.getSyntacticTag(); if ("P".equals(tag)) { return "VP"; } return null; }
public ADSentenceStream(ObjectStream<String> lineStream) { super(lineStream); parser = new SentenceParser(); }
public static void registerFactory() { StreamFactoryRegistry.registerFactory(SentenceSample.class, "ad", new ADSentenceSampleStreamFactory(Parameters.class)); }
public static void registerFactory() { StreamFactoryRegistry.registerFactory(NameSample.class, "ad", new ADNameSampleStreamFactory(Parameters.class)); }
public static void registerFactory() { StreamFactoryRegistry.registerFactory(ChunkSample.class, "ad", new ADChunkSampleStreamFactory(Parameters.class)); }
public static void registerFactory() { StreamFactoryRegistry.registerFactory(POSSample.class, "ad", new ADPOSSampleStreamFactory(Parameters.class)); }
@Test public void testLeadingWithContraction() throws IOException { int count = 0; ADSentenceStream stream = openData(); ADSentenceStream.Sentence paragraph = stream.read(); while (paragraph != null) { count++; paragraph = stream.read(); } Assert.assertEquals(ADParagraphStreamTest.NUM_SENTENCES, count); }
/** * Creates a new {@link NameSample} stream from a line stream, i.e. * {@link ObjectStream}<{@link String}>, that could be a * {@link PlainTextByLineStream} object. * * @param lineStream * a stream of lines as {@link String} * @param splitHyphenatedTokens * if true hyphenated tokens will be separated: "carros-monstro" > * "carros" "-" "monstro" */ public ADNameSampleStream(ObjectStream<String> lineStream, boolean splitHyphenatedTokens) { this.adSentenceStream = new ADSentenceStream(lineStream); this.splitHyphenatedTokens = splitHyphenatedTokens; }
/** * Creates a new {@link POSSample} stream from a line stream, i.e. * {@link ObjectStream}<{@link String}>, that could be a * {@link PlainTextByLineStream} object. * * @param lineStream * a stream of lines as {@link String} * @param expandME * if true will expand the multiword expressions, each word of the * expression will have the POS Tag that was attributed to the * expression plus the prefix B- or I- (CONLL convention) * @param includeFeatures * if true will combine the POS Tag with the feature tags */ public ADPOSSampleStream(ObjectStream<String> lineStream, boolean expandME, boolean includeFeatures) { this.adSentenceStream = new ADSentenceStream(lineStream); this.expandME = expandME; this.isIncludeFeatures = includeFeatures; }
/** * Creates a new {@link SentenceSample} stream from a line stream, i.e. * {@link ObjectStream}<{@link String}>, that could be a * {@link PlainTextByLineStream} object. * * @param lineStream * a stream of lines as {@link String} * @param includeHeadlines * if true will output the sentences marked as news headlines */ public ADSentenceSampleStream(ObjectStream<String> lineStream, boolean includeHeadlines) { this.adSentenceStream = new ADSentenceStream(lineStream); ptEosCharacters = Factory.ptEosCharacters; Arrays.sort(ptEosCharacters); this.isIncludeTitles = includeHeadlines; }