/** * Returns a concise description of how the {@linkplain #getEncoding() encoding} of the source document was determined. * <p> * The description is intended for informational purposes only. * It is not guaranteed to have any particular format and can not be reliably parsed. * * @return a concise description of how the {@linkplain #getEncoding() encoding} of the source document was determined. * @see #getEncoding() */ public String getEncodingSpecificationInfo() { if (encoding==UNINITIALISED) getDocumentSpecifiedEncoding(); return encodingSpecificationInfo; }
if (encoding==UNINITIALISED) getDocumentSpecifiedEncoding(); return encoding;
logger.warn("Alternative encoding "+safePreliminaryEncoding+" substituted for unsupported preliminary encoding "+preliminaryEncoding+": "+preliminaryEncodingSpecificationInfo); String documentSpecifiedEncodingInfoSuffix; final String documentSpecifiedEncoding=previewSource.getDocumentSpecifiedEncoding(this); if (documentSpecifiedEncoding==null) { if (previewSource.isXML()) {
protected String detectEncoding(RawDocument input) { BOMNewlineEncodingDetector detector = new BOMNewlineEncodingDetector(input.getStream(), input.getEncoding()); // string input has a default BOM defined by java // do not remove it if (input.getInputCharSequence() != null) { detector.detectBom(); } else { detector.detectAndRemoveBom(); } setEncoding(detector.getEncoding()); hasUtf8Bom = detector.hasUtf8Bom(); hasUtf8Encoding = detector.hasUtf8Encoding(); hasBOM = detector.hasBom(); setNewlineType(detector.getNewlineType().toString()); Source parsedHeader = getParsedHeader(input.getStream()); String detectedEncoding = parsedHeader.getDocumentSpecifiedEncoding(); documentEncoding = detectedEncoding == null ? false : true; if (detectedEncoding == null && getEncoding() != null) { detectedEncoding = getEncoding(); LOGGER.debug("Cannot auto-detect encoding. Using the default encoding ({})", getEncoding()); } else if (getEncoding() == null) { detectedEncoding = parsedHeader.getEncoding(); // get best guess LOGGER.debug("Default encoding and detected encoding not found. Using best guess encoding ({})", detectedEncoding); } return detectedEncoding; }