@Override public Set<Feature> extract(JCas view1, JCas view2) throws TextClassificationException { return new Feature("BaselineFeature", 0, FeatureType.NUMERIC).asSet(); }
@Override public Set<Feature> extract(JCas view1, JCas view2) throws TextClassificationException { return new Feature("DiffNrOfTokens", JCasUtil.select(view1, Token.class).size() - JCasUtil.select(view2, Token.class).size(), FeatureType.NUMERIC).asSet(); } }
@Override public Set<Feature> extract(JCas view1, JCas view2) throws TextClassificationException { return new Feature("DiffNrOfSentences", JCasUtil.select(view1, Sentence.class).size() - JCasUtil.select(view2, Sentence.class).size(), FeatureType.NUMERIC).asSet(); } }
@Override public Set<Feature> extract(JCas jcas, TextClassificationTarget aTarget) throws TextClassificationException { String token = aTarget.getCoveredText(); boolean bool = Character.isUpperCase(token.charAt(0)); return new Feature(FEATURE_NAME, bool ? 1.0 : 0.0, bool == false, FeatureType.BOOLEAN) .asSet(); } }
@Override public Set<Feature> extract(JCas jcas, TextClassificationTarget aTarget) throws TextClassificationException { String token = aTarget.getCoveredText(); boolean bool = Character.isUpperCase(token.charAt(0)); return new Feature(FEATURE_NAME, bool ? 1.0 : 0.0, bool == false, FeatureType.BOOLEAN) .asSet(); } }
@Override public Set<Feature> extract(JCas jCas, TextClassificationTarget aTarget) throws TextClassificationException { int nrOfEmoticons = JCasUtil.selectCovered(jCas, POS_EMO.class, aTarget).size(); int nrOfTokens = JCasUtil.selectCovered(jCas, Token.class, aTarget).size(); double ratio = (double) nrOfEmoticons / nrOfTokens; return new Feature(EmoticonRatio.class.getSimpleName(), ratio, FeatureType.NUMERIC).asSet(); } }
@Override public Set<Feature> extract(JCas jCas, TextClassificationTarget aTarget) throws TextClassificationException { int nrOfEmoticons = JCasUtil.selectCovered(jCas, POS_EMO.class, aTarget).size(); int nrOfTokens = JCasUtil.selectCovered(jCas, Token.class, aTarget).size(); double ratio = (double) nrOfEmoticons / nrOfTokens; return new Feature(EmoticonRatio.class.getSimpleName(), ratio, FeatureType.NUMERIC).asSet(); } }
@Override public Set<Feature> extract(JCas view1, JCas view2) throws TextClassificationException { return new Feature("DiffNrOfCharacters", view1.getDocumentText().length() - view2.getDocumentText().length(), FeatureType.NUMERIC).asSet(); } }
@Override public Set<Feature> extract(JCas view1, JCas view2) throws TextClassificationException { if (normalizeWithFirst) { return new Feature("SharedNounChunkView1", getSharedNounChunksCount(view1, view2), FeatureType.NUMERIC).asSet(); } else { return new Feature("SharedNounChunkView2", getSharedNounChunksCount(view2, view1), FeatureType.NUMERIC).asSet(); } }
@Override public Set<Feature> extract(JCas jCas, TextClassificationTarget aTarget) throws TextClassificationException { Matcher hashTagMatcher = HASHTAG_PATTERN .matcher(jCas.getDocumentText().substring(aTarget.getBegin(), aTarget.getEnd())); int numberOfHashTags = 0; while (hashTagMatcher.find()) { numberOfHashTags++; } return new Feature(NumberOfHashTags.class.getSimpleName(), numberOfHashTags, FeatureType.NUMERIC).asSet(); }
@Override public Set<Feature> extract(JCas jCas, TextClassificationTarget aTarget) throws TextClassificationException { Matcher hashTagMatcher = HASHTAG_PATTERN .matcher(jCas.getDocumentText().substring(aTarget.getBegin(), aTarget.getEnd())); int numberOfHashTags = 0; while (hashTagMatcher.find()) { numberOfHashTags++; } return new Feature(NumberOfHashTags.class.getSimpleName(), numberOfHashTags, FeatureType.NUMERIC).asSet(); }
@Override public Set<Feature> extract(JCas view1, JCas view2) throws TextClassificationException { return new Feature("SharedNEs", !Collections.disjoint(getNEs(view1), getNEs(view2)), FeatureType.NUMERIC).asSet(); }
@Override public Set<Feature> extract(JCas jcas, TextClassificationTarget aTarget) throws TextClassificationException { long maxLen = getMax(); List<Sentence> sentences = JCasUtil.selectCovered(jcas, Sentence.class, aTarget); double ratio = getRatio(sentences.size(), maxLen); return new Feature(FEATURE_NAME, ratio, FeatureType.NUMERIC).asSet(); }
@Override public Set<Feature> extract(JCas view1, JCas view2) throws TextClassificationException { return new Feature("DiffNounPhraseTokenLength", getAverageNounPhraseTokenLength(view1) - getAverageNounPhraseTokenLength(view2), FeatureType.NUMERIC).asSet(); }
@Override public Set<Feature> extract(JCas view1, JCas view2) throws TextClassificationException { return new Feature("DiffNounPhraseCharacterLength", getAverageNounPhraseCharacterLength(view1) - getAverageNounPhraseCharacterLength(view2), FeatureType.NUMERIC).asSet(); }
@Override public Set<Feature> extract(JCas jcas, TextClassificationTarget aTarget) throws TextClassificationException { long maxLen = getMax(); Collection<Token> tokens = JCasUtil.selectCovered(jcas, Token.class, aTarget); double ratio = getRatio(tokens.size(), maxLen); return new Feature(FEATURE_NAME, ratio, FeatureType.NUMERIC).asSet(); }
@Override public Set<Feature> extract(JCas view1, JCas view2) throws TextClassificationException { return new Feature("DiffTypeTokenRatio", getTypeTokenRatio(view1) / getTypeTokenRatio(view2), FeatureType.NUMERIC).asSet(); }
@Override public Set<Feature> extract(JCas jcas, TextClassificationTarget aTarget) throws TextClassificationException { long maxLen = getMax(); double ratio = getRatio(aTarget.getCoveredText().length(), maxLen); return new Feature(FEATURE_NAME, ratio, FeatureType.NUMERIC).asSet(); }
public Set<Feature> extract(JCas aView, TextClassificationTarget unit) throws TextClassificationException { super.extract(aView, unit); Integer currentTargetIdx = super.unitBegin2Idx.get(unit.getBegin()); Integer targetIdx = currentTargetIdx + shiftIdx; String featureVal = getTargetText(targetIdx); return new Feature(FEATURE_NAME + toHumanReadable(shiftIdx), featureVal, FeatureType.NUMERIC).asSet(); }
public Set<Feature> extract(JCas aView, TextClassificationTarget target) throws TextClassificationException { super.extract(aView, target); Integer currentTargetIdx = super.targetBegin2Idx.get(target.getBegin()); Integer targetIdx = currentTargetIdx + shiftIdx; String featureVal = getTargetText(targetIdx); return new Feature(FEATURE_NAME + toHumanReadable(shiftIdx), featureVal, FeatureType.STRING).asSet(); }