/** * @see org.apache.ctakes.core.nlp.tokenizer.Tokenizer#applyCapitalizationRules */ private void setCapitalization(WordToken wta, String tokenText) { int countUpperCase = 0; boolean containsNonUpperCase = false; for (int i=0; i<tokenText.length(); i++) { char ch = tokenText.charAt(i); if (isUpperCase(ch)) { countUpperCase++; } else { containsNonUpperCase = true; } } if (countUpperCase==0) { wta.setCapitalization(TokenizerAnnotator.TOKEN_CAP_NONE); } else if (!containsNonUpperCase) { wta.setCapitalization(TokenizerAnnotator.TOKEN_CAP_ALL); } else if (countUpperCase==1 && isUpperCase(tokenText.charAt(0))) { wta.setCapitalization(TokenizerAnnotator.TOKEN_CAP_FIRST_ONLY); } else { wta.setCapitalization(TokenizerAnnotator.TOKEN_CAP_MIXED); } }
/** * @see org.apache.ctakes.core.nlp.tokenizer.Tokenizer#applyCapitalizationRules */ private void setCapitalization(WordToken wta, String tokenText) { int countUpperCase = 0; boolean containsNonUpperCase = false; for (int i=0; i<tokenText.length(); i++) { char ch = tokenText.charAt(i); if (isUpperCase(ch)) { countUpperCase++; } else { containsNonUpperCase = true; } } if (countUpperCase==0) { wta.setCapitalization(TokenizerAnnotator.TOKEN_CAP_NONE); } else if (!containsNonUpperCase) { wta.setCapitalization(TokenizerAnnotator.TOKEN_CAP_ALL); } else if (countUpperCase==1 && isUpperCase(tokenText.charAt(0))) { wta.setCapitalization(TokenizerAnnotator.TOKEN_CAP_FIRST_ONLY); } else { wta.setCapitalization(TokenizerAnnotator.TOKEN_CAP_MIXED); } }
break; wta.setCapitalization(cap); wta.setNumPosition(numPos); bta = wta;
break; wta.setCapitalization(cap); wta.setNumPosition(numPos); bta = wta;