public Span[] find(String[] textTokenized) { List<Span> namesFound = new LinkedList<>(); for (int offsetFrom = 0; offsetFrom < textTokenized.length; offsetFrom++) { Span nameFound = null; String[] tokensSearching; for (int offsetTo = offsetFrom; offsetTo < textTokenized.length; offsetTo++) { int lengthSearching = offsetTo - offsetFrom + 1; if (lengthSearching > mDictionary.getMaxTokenCount()) { break; } else { tokensSearching = new String[lengthSearching]; System.arraycopy(textTokenized, offsetFrom, tokensSearching, 0, lengthSearching); StringList entryForSearch = new StringList(tokensSearching); if (mDictionary.contains(entryForSearch)) { nameFound = new Span(offsetFrom, offsetTo + 1, type); } } } if (nameFound != null) { namesFound.add(nameFound); // skip over the found tokens for the next search offsetFrom += nameFound.length() - 1; } } return namesFound.toArray(new Span[namesFound.size()]); }
/** * Returns an array of probabilities for each of the specified spans which is * the arithmetic mean of the probabilities for each of the outcomes which * make up the span. * * @param spans The spans of the names for which probabilities are desired. * * @return an array of probabilities for each of the specified spans. */ public double[] probs(Span[] spans) { double[] sprobs = new double[spans.length]; double[] probs = bestSequence.getProbs(); for (int si = 0; si < spans.length; si++) { double p = 0; for (int oi = spans[si].getStart(); oi < spans[si].getEnd(); oi++) { p += probs[oi]; } p /= spans[si].length(); sprobs[si] = p; } return sprobs; }
/** * Test for {@link Span#length()}. */ @Test public void testLength() { Assert.assertEquals(11, new Span(10, 21).length()); }
@Test public void testCaseLongerEntry() { String[] sentence = {"a", "b", "michael", "jordan"}; Span[] names = mNameFinder.find(sentence); Assert.assertTrue(names.length == 1); Assert.assertTrue(names[0].length() == 2); } }
public int getSize(){ return chunkSpan.length(); } /**
public int getSize(){ return chunkSpan.length(); } /**
if (name.length() > 1) { if (name.getType() == null) { outcomes[name.getStart()] = "default" + "-" + BilouCodec.START;
offsetFrom += multiwordFound.length() - 1;
public Span[] find(String[] textTokenized) { List<Span> namesFound = new LinkedList<>(); for (int offsetFrom = 0; offsetFrom < textTokenized.length; offsetFrom++) { Span nameFound = null; String[] tokensSearching; for (int offsetTo = offsetFrom; offsetTo < textTokenized.length; offsetTo++) { int lengthSearching = offsetTo - offsetFrom + 1; if (lengthSearching > mDictionary.getMaxTokenCount()) { break; } else { tokensSearching = new String[lengthSearching]; System.arraycopy(textTokenized, offsetFrom, tokensSearching, 0, lengthSearching); StringList entryForSearch = new StringList(tokensSearching); if (mDictionary.contains(entryForSearch)) { nameFound = new Span(offsetFrom, offsetTo + 1, type); } } } if (nameFound != null) { namesFound.add(nameFound); // skip over the found tokens for the next search offsetFrom += nameFound.length() - 1; } } return namesFound.toArray(new Span[namesFound.size()]); }
public Span[] find(String[] textTokenized) { List<Span> namesFound = new LinkedList<>(); for (int offsetFrom = 0; offsetFrom < textTokenized.length; offsetFrom++) { Span nameFound = null; String[] tokensSearching; for (int offsetTo = offsetFrom; offsetTo < textTokenized.length; offsetTo++) { int lengthSearching = offsetTo - offsetFrom + 1; if (lengthSearching > mDictionary.getMaxTokenCount()) { break; } else { tokensSearching = new String[lengthSearching]; System.arraycopy(textTokenized, offsetFrom, tokensSearching, 0, lengthSearching); StringList entryForSearch = new StringList(tokensSearching); if (mDictionary.contains(entryForSearch)) { nameFound = new Span(offsetFrom, offsetTo + 1, type); } } } if (nameFound != null) { namesFound.add(nameFound); // skip over the found tokens for the next search offsetFrom += nameFound.length() - 1; } } return namesFound.toArray(new Span[namesFound.size()]); }
public static List<Token> groupTokens(String text, List<Token> toks, List<? extends Span> spans, String additionalContext) { for (int i = spans.size() - 1; i >= 0; i--) { Span span = spans.get(i); if (span.length() > 0) { int s = toks.get(span.getStart()).getStart(); int e = toks.get(span.getEnd() - 1).getEnd(); String lexeme = text.substring(s, e).replace(" ", "_"); List<Token> removeToks = new ArrayList<Token>(); for (int j = span.getEnd() - 1; j >= span.getStart(); j--) { removeToks.add(toks.remove(j)); } Token t = new TokenImpl(s, e, lexeme); t.setPOSTag(span.getType()); // if(additionalContext != null) { // t.addContext(analyzer, additionalContext); // t.setAdditionalContext(additionalContext); // } toks.add(span.getStart(), t); } } return toks; }
public static List<Token> groupTokens(String text, List<Token> toks, List<? extends Span> spans, String additionalContext) { for (int i = spans.size() - 1; i >= 0; i--) { Span span = spans.get(i); if (span.length() > 0) { int s = toks.get(span.getStart()).getStart(); int e = toks.get(span.getEnd() - 1).getEnd(); String lexeme = text.substring(s, e).replace(" ", "_"); List<Token> removeToks = new ArrayList<Token>(); for (int j = span.getEnd() - 1; j >= span.getStart(); j--) { removeToks.add(toks.remove(j)); } Token t = new TokenImpl(s, e, lexeme); t.setPOSTag(span.getType()); // if(additionalContext != null) { // t.addContext(analyzer, additionalContext); // t.setAdditionalContext(additionalContext); // } toks.add(span.getStart(), t); } } return toks; }
/** * Returns an array of probabilities for each of the specified spans which is * the arithmetic mean of the probabilities for each of the outcomes which * make up the span. * * @param spans The spans of the names for which probabilities are desired. * * @return an array of probabilities for each of the specified spans. */ public double[] probs(Span[] spans) { double[] sprobs = new double[spans.length]; double[] probs = bestSequence.getProbs(); for (int si = 0; si < spans.length; si++) { double p = 0; for (int oi = spans[si].getStart(); oi < spans[si].getEnd(); oi++) { p += probs[oi]; } p /= spans[si].length(); sprobs[si] = p; } return sprobs; }
/** * Returns an array of probabilities for each of the specified spans which is * the arithmetic mean of the probabilities for each of the outcomes which * make up the span. * * @param spans The spans of the names for which probabilities are desired. * * @return an array of probabilities for each of the specified spans. */ public double[] probs(Span[] spans) { double[] sprobs = new double[spans.length]; double[] probs = bestSequence.getProbs(); for (int si = 0; si < spans.length; si++) { double p = 0; for (int oi = spans[si].getStart(); oi < spans[si].getEnd(); oi++) { p += probs[oi]; } p /= spans[si].length(); sprobs[si] = p; } return sprobs; }
if (name.length() > 1) { if (name.getType() == null) { outcomes[name.getStart()] = "default" + "-" + BilouCodec.START;