Refine search
@TypeCapability(inputs = { "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS" }) public class LucenePOSNGramFeatureExtractorBase extends LuceneFeatureExtractorBase @ConfigurationParameter(name = PARAM_POS_NGRAM_MIN_N, mandatory = true, defaultValue = "1") protected int posNgramMinN; @ConfigurationParameter(name = PARAM_POS_NGRAM_MAX_N, mandatory = true, defaultValue = "3") protected int posNgramMaxN; @ConfigurationParameter(name = PARAM_POS_NGRAM_USE_TOP_K, mandatory = true, defaultValue = "500") protected int posNgramUseTopK; @ConfigurationParameter(name = PARAM_USE_CANONICAL_POS, mandatory = true, defaultValue = "true") protected boolean useCanonicalTags;
@TypeCapability(inputs = { "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token" }) public class LuceneCharacterSkipNgramFeatureExtractorBase extends LuceneFeatureExtractorBase @ConfigurationParameter(name = PARAM_CHAR_SKIP_NGRAM_MIN_N, mandatory = true, defaultValue = "2") protected int charSkipMinN; @ConfigurationParameter(name = PARAM_CHAR_SKIP_NGRAM_MAX_N, mandatory = true, defaultValue = "3") protected int charSkipMaxN; @ConfigurationParameter(name = PARAM_CHAR_SKIP_SIZE, mandatory = true, defaultValue = "2") protected int charSkipSize; @ConfigurationParameter(name = PARAM_CHAR_SKIP_NGRAM_USE_TOP_K, mandatory = true, defaultValue = "500") protected int charSkipNgramUseTopK; @ConfigurationParameter(name = PARAM_CHAR_SKIP_NGRAM_LOWER_CASE, mandatory = true, defaultValue = "true") protected boolean charSkipToLowerCase;
@TypeCapability(inputs = { "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token" }) public class LuceneSkipNgramFeatureExtractorBase @ConfigurationParameter(name = PARAM_SKIP_NGRAM_MIN_N, mandatory = true, defaultValue = "2") protected int skipMinN; @ConfigurationParameter(name = PARAM_SKIP_NGRAM_MAX_N, mandatory = true, defaultValue = "3") protected int skipMaxN; @ConfigurationParameter(name = PARAM_SKIP_SIZE, mandatory = true, defaultValue = "2") protected int skipSize; @ConfigurationParameter(name = PARAM_SKIP_NGRAM_USE_TOP_K, mandatory = true, defaultValue = "500") protected int skipNgramUseTopK; @ConfigurationParameter(name = PARAM_SKIP_NGRAM_LOWER_CASE, mandatory = true, defaultValue = "true") protected boolean skipToLowerCase;
@TypeCapability(inputs = { "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.Token" }) public class LucenePhoneticNGramFeatureExtractorBase extends LuceneFeatureExtractorBase @ConfigurationParameter(name = PARAM_PHONETIC_NGRAM_MIN_N, mandatory = true, defaultValue = "1") protected int phoneticNgramMinN; @ConfigurationParameter(name = PARAM_PHONETIC_NGRAM_MAX_N, mandatory = true, defaultValue = "3") protected int phoneticNgramMaxN; @ConfigurationParameter(name = PARAM_PHONETIC_NGRAM_USE_TOP_K, mandatory = true, defaultValue = "500") protected int phoneticNgramUseTopK;
@TypeCapability(inputs = { "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token" }) public class LuceneSkipNgramFeatureExtractor @ConfigurationParameter(name = PARAM_SKIP_NGRAM_MIN_N, mandatory = true, defaultValue = "2") private int skipMinN; @ConfigurationParameter(name = PARAM_SKIP_NGRAM_MAX_N, mandatory = true, defaultValue = "3") private int skipMaxN; @ConfigurationParameter(name = PARAM_SKIP_N, mandatory = true, defaultValue = "2") private int skipN; @ConfigurationParameter(name = PARAM_SKIP_NGRAM_LOWER_CASE, mandatory = true, defaultValue = "true") private boolean skipToLowerCase;
@TypeCapability(inputs = { "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS" }) public class LucenePOSNGramFeatureExtractor extends LuceneFeatureExtractorBase @ConfigurationParameter(name = PARAM_POS_NGRAM_MIN_N, mandatory = true, defaultValue = "1") private int posNgramMinN; @ConfigurationParameter(name = PARAM_POS_NGRAM_MAX_N, mandatory = true, defaultValue = "3") private int posNgramMaxN; @ConfigurationParameter(name = PARAM_POS_NGRAM_USE_TOP_K, mandatory = true, defaultValue = "500") private int posNgramUseTopK; @ConfigurationParameter(name = PARAM_USE_CANONICAL_POS, mandatory = true, defaultValue = "true") private boolean useCanonicalTags;
@TypeCapability(inputs = { "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token" }) public abstract class NGramFeatureExtractorBase @ConfigurationParameter(name = PARAM_NGRAM_MIN_N, mandatory = true, defaultValue = "1") protected int ngramMinN; @ConfigurationParameter(name = PARAM_NGRAM_MAX_N, mandatory = true, defaultValue = "3") protected int ngramMaxN; @ConfigurationParameter(name = PARAM_NGRAM_USE_TOP_K, mandatory = true, defaultValue = "500") protected int ngramUseTopK; @ConfigurationParameter(name = PARAM_NGRAM_STOPWORDS_FILE, mandatory = false) protected String ngramStopwordsFile; @ConfigurationParameter(name = PARAM_FILTER_PARTIAL_STOPWORD_MATCHES, mandatory = true, defaultValue = "false") protected boolean filterPartialStopwordMatches; @ConfigurationParameter(name = PARAM_NGRAM_LOWER_CASE, mandatory = true, defaultValue = "true") protected boolean ngramLowerCase;
@TypeCapability(inputs = { "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Split" }, outputs = { "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token" }) public class CompoundPartTokenizer @ConfigurationParameter(name = PARAM_COMPOUND_SPLIT_LEVEL, mandatory = true, defaultValue = { "ALL" }) private CompoundSplitLevel compoundSplitLevel;
@TypeCapability(inputs = { "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token" }) public abstract class NGramFeatureExtractorBase @ConfigurationParameter(name = PARAM_NGRAM_MIN_N, mandatory = true, defaultValue = "1") protected int ngramMinN; @ConfigurationParameter(name = PARAM_NGRAM_MAX_N, mandatory = true, defaultValue = "3") protected int ngramMaxN; @ConfigurationParameter(name = PARAM_NGRAM_USE_TOP_K, mandatory = true, defaultValue = "500") protected int ngramUseTopK; @ConfigurationParameter(name = PARAM_NGRAM_STOPWORDS_FILE, mandatory = false) protected String ngramStopwordsFile; @ConfigurationParameter(name = PARAM_FILTER_PARTIAL_STOPWORD_MATCHES, mandatory = true, defaultValue = "false") protected boolean filterPartialStopwordMatches; @ConfigurationParameter(name = PARAM_NGRAM_FREQ_THRESHOLD, mandatory = true, defaultValue = "0.01") protected float ngramFreqThreshold; @ConfigurationParameter(name = PARAM_NGRAM_LOWER_CASE, mandatory = true, defaultValue = "true") protected boolean ngramLowerCase;
@TypeCapability(outputs = { "de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData" }) public class XmiReader extends ResourceCollectionReaderBase @ConfigurationParameter(name = PARAM_LENIENT, mandatory = true, defaultValue = "true") private boolean lenient;
@TypeCapability(inputs = { KEEP }, outputs = { KEEP }) public class BioLemmatizerNormalizerAnnotator extends JCasAnnotator_ImplBase { private static Logger LOG = LoggerFactory .getLogger(BioLemmatizerNormalizerAnnotator.class); @ConfigurationParameter(name = PARAM_CASE_SENSITIVE, defaultValue = "false",// description = "If true, tokens are not normalized to lowercase before string comparisions") private boolean caseSensitive; @ConfigurationParameter(name = "onlyTokens", defaultValue = "false",// description = "Only lemmatize the Keeps that are Tokens, rest are left unchanged.") private boolean onlyTokens;
@TypeCapability(outputs = { "de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData" }) public class XmiReader extends ResourceCollectionReaderBase @ConfigurationParameter(name = PARAM_LENIENT, mandatory = true, defaultValue = "true") private boolean lenient;
@TypeCapability( outputs={ "de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData"}) @ConfigurationParameter(name=PARAM_LENIENT, mandatory=true, defaultValue="false") private boolean lenient;
@TypeCapability(inputs = { "de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData" }) public class XmiWriter extends JCasFileWriter_ImplBase @ConfigurationParameter(name = PARAM_PRETTY_PRINT, mandatory = true, defaultValue = "true") private boolean prettyPrint; @ConfigurationParameter(name = PARAM_TYPE_SYSTEM_FILE, mandatory = false) private File typeSystemFile;
@TypeCapability(inputs = { "de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData" }) public class XmiWriter extends JCasFileWriter_ImplBase @ConfigurationParameter(name = PARAM_PRETTY_PRINT, mandatory = true, defaultValue = "true") private boolean prettyPrint; @ConfigurationParameter(name = PARAM_TYPE_SYSTEM_FILE, mandatory = false) private File typeSystemFile;
@TypeCapability( inputs={ "de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData"}) @ConfigurationParameter(name = PARAM_PRETTY_PRINT, mandatory = true, defaultValue = "true") private boolean prettyPrint; @ConfigurationParameter(name=PARAM_TYPE_SYSTEM_FILE, mandatory=false) private File typeSystemFile;
@TypeCapability(inputs = { "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS" }) @ConfigurationParameter(name = PARAM_USE_CANONICAL_POS, mandatory = true, defaultValue = "true") protected boolean useCanonicalTags;
@TypeCapability() public class LanguageDetectionAnnotator extends JCasAnnotator_ImplBase { private static Logger LOG = LoggerFactory @ConfigurationParameter(name = MIN_TEXT_LENGTH, defaultValue = "150") private int minTextLenght;
@TypeCapability( outputs = { "de.tudarmstadt.ukp.dkpro.core.io.jwpl.type.DBConfig"}) @ConfigurationParameter(name = PARAM_ONLY_FIRST_PARAGRAPH, mandatory = true, defaultValue = "false") private boolean onlyFirstParagraph;
@TypeCapability(outputs = { LINNAEUS_SPECIES }) public class LinnaeusAnnotator extends JCasAnnotator_ImplBase { @ConfigurationParameter(name = CONFIG_FILE, mandatory = true) private String configFile;