public static void describeMediaType() { MediaType type = MediaType.parse("text/plain; charset=UTF-8"); System.out.println("type: " + type.getType()); System.out.println("subtype: " + type.getSubtype()); Map<String, String> parameters = type.getParameters(); System.out.println("parameters:"); for (String name : parameters.keySet()) { System.out.println(" " + name + "=" + parameters.get(name)); } }
public UniversalEncodingListener(Metadata metadata) { MediaType type = MediaType.parse(metadata.get(Metadata.CONTENT_TYPE)); if (type != null) { hint = type.getParameters().get("charset"); } if (hint == null) { hint = metadata.get(Metadata.CONTENT_ENCODING); } }
public MediaType normalize(MediaType type) { if (type == null) { return null; } MediaType canonical = registry.get(type.getBaseType()); if (canonical == null) { return type; } else if (type.hasParameters()) { return new MediaType(canonical, type.getParameters()); } else { return canonical; } }
/** * Extracts a charset from a Content-Type HTTP header. * * @param metadata parser metadata * @return a charset if there is one specified, or null */ private static Charset charsetFromContentType(Metadata metadata) { String contentType = metadata.get(Metadata.CONTENT_TYPE); MediaType mediatype = MediaType.parse(contentType); if (mediatype == null) return null; String charsetLabel = mediatype.getParameters().get("charset"); return getCharsetByLabel(charsetLabel); }
private DataURIScheme build(String mediaTypeString, String isBase64, String dataString) { byte[] data = null; //strip out back slashes as you might have in css dataString = (dataString != null) ? dataString.replaceAll("\\\\", " ") : dataString; if (dataString == null || dataString.length() == 0) { data = new byte[0]; } else if (isBase64 != null) { data = base64.decode(dataString); } else { //TODO: handle encodings MediaType mediaType = MediaType.parse(mediaTypeString); Charset charset = StandardCharsets.UTF_8; if (mediaType.hasParameters()) { String charsetName = mediaType.getParameters().get("charset"); if (charsetName != null && Charset.isSupported(charsetName)) { try { charset = Charset.forName(charsetName); } catch (IllegalCharsetNameException e) { //swallow and default to UTF-8 } } } data = dataString.getBytes(charset); } return new DataURIScheme(mediaTypeString, (isBase64 != null), data); }
/** * @since TIKA-121 */ @Test public void testParseNoParams() { String mimeStringNoParams = "text/html"; MediaType type = MediaType.parse(mimeStringNoParams); assertNotNull(type); assertNotNull(type.getParameters()); assertNotNull(type.getParameters().keySet()); assertEquals(0, type.getParameters().keySet().size()); }
/** * @since TIKA-121 */ @Test public void testParseNoParamsWithSemi() { String mimeStringNoParamsWithSemi = "text/html;"; MediaType type = MediaType.parse(mimeStringNoParamsWithSemi); assertNotNull(type); assertNotNull(type.getParameters()); assertNotNull(type.getParameters().keySet()); assertEquals(0, type.getParameters().keySet().size()); }
String charset = type.getParameters().get("charset"); if (charset != null) { try {
/** * Per http://tools.ietf.org/html/rfc2045#section-5.1, charset can be in quotes */ @Test public void testParseWithParamsAndQuotedCharset() { // Typical case, with a quoted charset String mimeStringWithParams = "text/html;charset=\"UTF-8\""; MediaType type = MediaType.parse(mimeStringWithParams); assertNotNull(type); assertEquals(singletonMap("charset", "UTF-8"), type.getParameters()); // Complex case, with various different quoted and un-quoted forms mimeStringWithParams = "text/html;charset=\'UTF-8\';test=\"true\";unquoted=here"; type = MediaType.parse(mimeStringWithParams); assertNotNull(type); assertEquals(3, type.getParameters().size()); assertEquals("UTF-8", type.getParameters().get("charset")); assertEquals("true", type.getParameters().get("test")); assertEquals("here", type.getParameters().get("unquoted")); }
/** * @since TIKA-121 */ @Test public void testParseWithParams() { String mimeStringWithParams = "text/html;charset=UTF-8;foo=bar;foo2=bar2"; MediaType type = MediaType.parse(mimeStringWithParams); assertNotNull(type); assertNotNull(type.getParameters()); assertNotNull(type.getParameters().keySet()); assertEquals(3, type.getParameters().keySet().size()); boolean gotCharset = false, gotFoo = false, gotFoo2 = false; for (String param : type.getParameters().keySet()) { if (param.equals("charset")) { gotCharset = true; } else if (param.equals("foo")) { gotFoo = true; } else if (param.equals("foo2")) { gotFoo2 = true; } } assertTrue(gotCharset && gotFoo && gotFoo2); }
metadata.set(Message.MULTIPART_BOUNDARY, contentType.getParameters().get("boundary")); } else { metadata.add(Metadata.MESSAGE_RAW_HEADER_PREFIX + parsedField.getName(),
incomingCharset = mt.getParameters().get("charset");
public MediaType normalize(MediaType type) { if (type == null) { return null; } MediaType canonical = registry.get(type.getBaseType()); if (canonical == null) { return type; } else if (type.hasParameters()) { return new MediaType(canonical, type.getParameters()); } else { return canonical; } }
public UniversalEncodingListener(Metadata metadata) { MediaType type = MediaType.parse(metadata.get(Metadata.CONTENT_TYPE)); if (type != null) { hint = type.getParameters().get("charset"); } if (hint == null) { hint = metadata.get(Metadata.CONTENT_ENCODING); } }
/** * Extracts a charset from a Content-Type HTTP header. * * @param metadata parser metadata * @return a charset if there is one specified, or null */ private static Charset charsetFromContentType(Metadata metadata) { String contentType = metadata.get(Metadata.CONTENT_TYPE); MediaType mediatype = MediaType.parse(contentType); if (mediatype == null) return null; String charsetLabel = mediatype.getParameters().get("charset"); return getCharsetByLabel(charsetLabel); }
public UniversalEncodingListener(Metadata metadata) { MediaType type = MediaType.parse(metadata.get(Metadata.CONTENT_TYPE)); if (type != null) { hint = type.getParameters().get("charset"); } if (hint == null) { hint = metadata.get(Metadata.CONTENT_ENCODING); } }
public UniversalEncodingListener(Metadata metadata) { MediaType type = MediaType.parse(metadata.get(Metadata.CONTENT_TYPE)); if (type != null) { hint = type.getParameters().get("charset"); } if (hint == null) { hint = metadata.get(Metadata.CONTENT_ENCODING); } }
public UniversalEncodingListener(Metadata metadata) { MediaType type = MediaType.parse(metadata.get(Metadata.CONTENT_TYPE)); if (type != null) { hint = type.getParameters().get("charset"); } if (hint == null) { hint = metadata.get(Metadata.CONTENT_ENCODING); } }
public MediaType normalize(MediaType type) { if (type == null) { return null; } MediaType canonical = registry.get(type.getBaseType()); if (canonical == null) { return type; } else if (type.hasParameters()) { return new MediaType(canonical, type.getParameters()); } else { return canonical; } }
public static MediaRange parse(String range, MediaTypeRegistry registry) { MediaType type = MediaType.parse(range); if (type == null) { return null; } type = registry.normalize(type); Map<String, String> parameters = new HashMap<String, String>(type.getParameters()); String q = parameters.remove("q"); if (q != null) { try { return new MediaRange( new MediaType(type.getBaseType(), parameters), Double.parseDouble(q)); } catch (NumberFormatException e) { return null; } } return new MediaRange(type, 1.0); }