/** * Create a Java String from Unicode character data corresponding * to the original byte data supplied to the Charset detect operation. * * @return a String created from the converted input data. * * @stable ICU 3.4 */ public String getString() throws java.io.IOException { return getString(-1); }
return match.getString(-1); } catch (IOException e) { return null;
content = match.getString(); } catch (final UnsupportedEncodingException e) { log.debug("Unsupported charset [" + match.getName() + "]. Using UTF-8.");
@Override public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream, String extension, final String mimeType, final ParserResultBuilder resultBuilder) throws IOException { resultBuilder.metas().set(MIME_TYPE, findMimeType(extension, mimeType, this::findMimeTypeUsingDefault)); // Trying to detect the CHARSET of the stream final CharsetDetector detector = new CharsetDetector(); try (BufferedInputStream bis = new BufferedInputStream(inputStream)) { detector.setText(bis); final CharsetMatch match = detector.detect(); final ParserFieldsBuilder result = resultBuilder.newDocument(); final String content; if (match != null) { content = match.getString(); result.add(CHARSET_DETECTION, match.getName()); } else { bis.reset(); content = IOUtils.toString(bis, Charset.defaultCharset()); } result.add(CONTENT, content); result.add(LANG_DETECTION, languageDetection(result, CONTENT, 10000)); } }