/** * Create a buffered reader for this file. * * @param file a File * @return a BufferedReader * @throws IOException if an IOException occurs. * @since 1.0 */ public static BufferedReader newReader(File file) throws IOException { CharsetToolkit toolkit = new CharsetToolkit(file); return toolkit.getReader(); }
/** * Gets a <code>BufferedReader</code> (indeed a <code>LineNumberReader</code>) from the <code>File</code> * specified in the constructor of <code>CharsetToolkit</code> using the charset discovered or the default * charset if an 8-bit <code>Charset</code> is encountered. * * @return a <code>BufferedReader</code> * @throws FileNotFoundException if the file is not found. */ public BufferedReader getReader() throws FileNotFoundException { LineNumberReader reader = new LineNumberReader(new InputStreamReader(new FileInputStream(file), getCharset())); if (hasUTF8Bom() || hasUTF16LEBom() || hasUTF16BEBom()) { try { reader.read(); } catch (IOException e) { // should never happen, as a file with no content // but with a BOM has at least one char } } return reader; }
/** * Defines the default <code>Charset</code> used in case the buffer represents * an 8-bit <code>Charset</code>. * * @param defaultCharset the default <code>Charset</code> to be returned * if an 8-bit <code>Charset</code> is encountered. */ public void setDefaultCharset(Charset defaultCharset) { if (defaultCharset != null) this.defaultCharset = defaultCharset; else this.defaultCharset = getDefaultSystemCharset(); }
if (hasUTF8Bom()) return Charset.forName("UTF-8"); if (hasUTF16LEBom()) return Charset.forName("UTF-16LE"); if (hasUTF16BEBom()) return Charset.forName("UTF-16BE"); if (isTwoBytesSequence(b0)) { if (!isContinuationChar(b1)) validU8Char = false; else else if (isThreeBytesSequence(b0)) { if (!(isContinuationChar(b1) && isContinuationChar(b2))) validU8Char = false; else else if (isFourBytesSequence(b0)) { if (!(isContinuationChar(b1) && isContinuationChar(b2) && isContinuationChar(b3))) validU8Char = false; else else if (isFiveBytesSequence(b0)) { if (!(isContinuationChar(b1) && isContinuationChar(b2)
public void testFileEncoding() throws Exception { executeTarget("testFileEncoding"); final File testfilesPackageDir = new File(tmpDir, "org/codehaus/groovy/tools/groovydoc/testfiles"); System.err.println("testfilesPackageDir = " + testfilesPackageDir); final String[] list = testfilesPackageDir.list(new FilenameFilter() { public boolean accept(File file, String name) { return name.equals("DocumentedClass.html"); } }); File documentedClassHtmlDoc = new File(testfilesPackageDir, list[0]); CharsetToolkit charsetToolkit = new CharsetToolkit(documentedClassHtmlDoc); assertEquals("The generated groovydoc must be in 'UTF-16LE' file encoding.'", Charset.forName("UTF-16LE"), charsetToolkit.getCharset()); } }
public Charset getCharset() { if (this.charset == null) this.charset = guessEncoding(); return charset; }
updateEncodingsScores(encodingsScores, new CharsetToolkit(data).guessEncoding().displayName());
if (hasUTF8Bom()) return Charset.forName("UTF-8"); if (hasUTF16LEBom()) return Charset.forName("UTF-16LE"); if (hasUTF16BEBom()) return Charset.forName("UTF-16BE"); if (isTwoBytesSequence(b0)) { if (!isContinuationChar(b1)) validU8Char = false; else else if (isThreeBytesSequence(b0)) { if (!(isContinuationChar(b1) && isContinuationChar(b2))) validU8Char = false; else else if (isFourBytesSequence(b0)) { if (!(isContinuationChar(b1) && isContinuationChar(b2) && isContinuationChar(b3))) validU8Char = false; else else if (isFiveBytesSequence(b0)) { if (!(isContinuationChar(b1) && isContinuationChar(b2)
public Charset getCharset() { if (this.charset == null) this.charset = guessEncoding(); return charset; }
if (hasUTF8Bom()) return Charset.forName("UTF-8"); if (hasUTF16LEBom()) return Charset.forName("UTF-16LE"); if (hasUTF16BEBom()) return Charset.forName("UTF-16BE"); if (isTwoBytesSequence(b0)) { if (!isContinuationChar(b1)) validU8Char = false; else else if (isThreeBytesSequence(b0)) { if (!(isContinuationChar(b1) && isContinuationChar(b2))) validU8Char = false; else else if (isFourBytesSequence(b0)) { if (!(isContinuationChar(b1) && isContinuationChar(b2) && isContinuationChar(b3))) validU8Char = false; else else if (isFiveBytesSequence(b0)) { if (!(isContinuationChar(b1) && isContinuationChar(b2)
/** * Gets a <code>BufferedReader</code> (indeed a <code>LineNumberReader</code>) from the <code>File</code> * specified in the constructor of <code>CharsetToolkit</code> using the charset discovered by the * method <code>guessEncoding()</code>. * * @return a <code>BufferedReader</code> * @throws FileNotFoundException if the file is not found. */ public BufferedReader getReader() throws FileNotFoundException { LineNumberReader reader = new LineNumberReader(new InputStreamReader(new FileInputStream(file), getCharset())); if (hasUTF8Bom() || hasUTF16LEBom() || hasUTF16BEBom()) { try { reader.read(); } catch (IOException e) { // should never happen, as a file with no content // but with a BOM has at least one char } } return reader; }
/** * Create a buffered reader for this file. * * @param file a File * @return a BufferedReader * @throws IOException if an IOException occurs. * @since 1.0 */ public static BufferedReader newReader(File file) throws IOException { CharsetToolkit toolkit = new CharsetToolkit(file); return toolkit.getReader(); }
/** * @param infile the file to create a GroovyCodeSource for. * @throws IOException if an issue arises opening and reading the file. */ public GroovyCodeSource(final File infile) throws IOException { this(infile, CharsetToolkit.getDefaultSystemCharset().name()); }
public Charset getCharset() { if (this.charset == null) this.charset = guessEncoding(); return charset; }
if (hasUTF8Bom()) return Charset.forName("UTF-8"); if (hasUTF16LEBom()) return Charset.forName("UTF-16LE"); if (hasUTF16BEBom()) return Charset.forName("UTF-16BE"); if (isTwoBytesSequence(b0)) { if (!isContinuationChar(b1)) validU8Char = false; else else if (isThreeBytesSequence(b0)) { if (!(isContinuationChar(b1) && isContinuationChar(b2))) validU8Char = false; else else if (isFourBytesSequence(b0)) { if (!(isContinuationChar(b1) && isContinuationChar(b2) && isContinuationChar(b3))) validU8Char = false; else else if (isFiveBytesSequence(b0)) { if (!(isContinuationChar(b1) && isContinuationChar(b2)
/** * Gets a <code>BufferedReader</code> (indeed a <code>LineNumberReader</code>) from the <code>File</code> * specified in the constructor of <code>CharsetToolkit</code> using the charset discovered by the * method <code>guessEncoding()</code>. * * @return a <code>BufferedReader</code> * @throws FileNotFoundException if the file is not found. */ public BufferedReader getReader() throws FileNotFoundException { LineNumberReader reader = new LineNumberReader(new InputStreamReader(new FileInputStream(file), getCharset())); if (hasUTF8Bom() || hasUTF16LEBom() || hasUTF16BEBom()) { try { reader.read(); } catch (IOException e) { // should never happen, as a file with no content // but with a BOM has at least one char } } return reader; }
/** * Create a buffered reader for this file. * * @param file a File * @return a BufferedReader * @throws IOException if an IOException occurs. */ public static BufferedReader newReader(File file) throws IOException { CharsetToolkit toolkit = new CharsetToolkit(file); return toolkit.getReader(); }
/** * Read the content of this URL and returns it as a String. * * @param url URL to read content from * @return the text from that URL * @throws IOException if an IOException occurs. * @since 1.0 */ public static String getText(URL url) throws IOException { return getText(url, CharsetToolkit.getDefaultSystemCharset().name()); }
public Charset getCharset() { if (this.charset == null) this.charset = guessEncoding(); return charset; }
if (hasUTF8Bom()) return Charset.forName("UTF-8"); if (hasUTF16LEBom()) return Charset.forName("UTF-16LE"); if (hasUTF16BEBom()) return Charset.forName("UTF-16BE"); if (isTwoBytesSequence(b0)) { if (!isContinuationChar(b1)) validU8Char = false; else else if (isThreeBytesSequence(b0)) { if (!(isContinuationChar(b1) && isContinuationChar(b2))) validU8Char = false; else else if (isFourBytesSequence(b0)) { if (!(isContinuationChar(b1) && isContinuationChar(b2) && isContinuationChar(b3))) validU8Char = false; else else if (isFiveBytesSequence(b0)) { if (!(isContinuationChar(b1) && isContinuationChar(b2)