public DetectedLanguage detectLanguage(String text, List<String> noopLangs) { String shortText = text.length() > maxLength ? text.substring(0, maxLength) : text; shortText = textObjectFactory.forText(shortText).toString(); Map.Entry<String,Double> result = null; if (fasttextEnabled) {
private List<TextObject> partition() { List<TextObject> result = new ArrayList<>(this.k); if (!breakWords) { int maxLength = this.inputSample.length() / (this.k - 1); Pattern p = Pattern.compile("\\G\\s*(.{1," + maxLength + "})(?=\\s|$)", Pattern.DOTALL); Matcher m = p.matcher(this.inputSample); while (m.find()) result.add(textObjectFactory.create().append(m.group(1))); } else { Splitter splitter = Splitter.fixedLength(this.k); for (String token : splitter.split(this.inputSample.toString())) { result.add(textObjectFactory.create().append(token)); } } return result; } }
@Override public Appendable append(CharSequence csq, int start, int end) throws IOException { return append(csq.subSequence(start, end)); }
public TextObject create() { return new TextObject(textFilter, maxTextLength); }
private List<TextObject> partition() { List<TextObject> result = new ArrayList<>(this.k); if (!breakWords) { int maxLength = this.inputSample.length() / (this.k - 1); Pattern p = Pattern.compile("\\G\\s*(.{1," + maxLength + "})(?=\\s|$)", Pattern.DOTALL); Matcher m = p.matcher(this.inputSample); while (m.find()) result.add(textObjectFactory.create().append(m.group(1))); } else { Splitter splitter = Splitter.fixedLength(this.k); for (String token : splitter.split(this.inputSample.toString())) { result.add(textObjectFactory.create().append(token)); } } return result; } }
@Override public Appendable append(char c) throws IOException { return append(Character.toString(c)); }
public TextObject create() { return new TextObject(textFilter, maxTextLength); }
@Override public Appendable append(char c) throws IOException { return append(Character.toString(c)); }
public DetectedLanguage detectLanguage(String text, List<String> noopLangs) { String shortText = text.length() > maxLength ? text.substring(0, maxLength) : text; shortText = textObjectFactory.forText(shortText).toString(); Map.Entry<String,Double> result = null; if (fasttextEnabled) {
@Override public Appendable append(CharSequence csq, int start, int end) throws IOException { return append(csq.subSequence(start, end)); }
/** * Append the target text for language detection. * This method read the text from specified input reader. * If the total size of target text exceeds the limit size, * the rest is ignored. * * @param reader the input reader (BufferedReader as usual) * @throws java.io.IOException Can't read the reader. */ public TextObject append(Reader reader) throws IOException { char[] buf = new char[1024]; while (reader.ready() && (maxTextLength==0 || stringBuilder.length()<maxTextLength)) { int length = reader.read(buf); append(String.valueOf(buf, 0, length)); } return this; }
/** * Append the target text for language detection. * This method read the text from specified input reader. * If the total size of target text exceeds the limit size, * the rest is ignored. * * @param reader the input reader (BufferedReader as usual) * @throws java.io.IOException Can't read the reader. */ public TextObject append(Reader reader) throws IOException { char[] buf = new char[1024]; while (reader.ready() && (maxTextLength==0 || stringBuilder.length()<maxTextLength)) { int length = reader.read(buf); append(String.valueOf(buf, 0, length)); } return this; }
public TextObject forText(CharSequence text) { return create().append(text); }
public TextObject forText(CharSequence text) { return create().append(text); }
/** * Language detection test for each file (--detectlang option) * * <pre> * usage: --detectlang -d [profile directory] -a [alpha] -s [seed] [test file(s)] * </pre> * */ public void detectLang() throws IOException { LanguageDetector languageDetector = makeDetector(); TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText(); for (String filename: arglist) { try (BufferedReader is = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "utf-8"))) { TextObject textObject = textObjectFactory.create().append(is); List<DetectedLanguage> probabilities = languageDetector.getProbabilities(textObject); System.out.println(filename + ":" + probabilities); } } }
/** * Language detection test for each file (--detectlang option) * * <pre> * usage: --detectlang -d [profile directory] -a [alpha] -s [seed] [test file(s)] * </pre> * */ public void detectLang() throws IOException { LanguageDetector languageDetector = makeDetector(); TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText(); for (String filename: arglist) { try (BufferedReader is = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "utf-8"))) { TextObject textObject = textObjectFactory.create().append(is); List<DetectedLanguage> probabilities = languageDetector.getProbabilities(textObject); System.out.println(filename + ":" + probabilities); } } }