SystemMessage withoutAttachment = new SystemMessage("title", "content"); Attachment attachment = new Attachment(); SystemMessage withAttachment = new SystemMessage("title", "content", attachment);
@POST @Consumes("multipart/form-data") @Produces("text/xml") @Path("form") public StreamingOutput getXMLFromMultipart(Attachment att, @Context final UriInfo info) { return produceOutput(att.getObject(InputStream.class), att.getHeaders(), info, "xml"); }
public void parse(String filePath, ContentHandler handler, Metadata metadata, ParseContext context) throws FileNotFoundException { File pdfFile = new File(filePath); ContentDisposition cd = new ContentDisposition( "form-data; name=\"input\"; filename=\"" + pdfFile.getName() + "\""); Attachment att = new Attachment("input", new FileInputStream(pdfFile), cd); MultipartBody body = new MultipartBody(att); Response response = WebClient .create(restHostUrlStr + GROBID_PROCESSHEADER_PATH) .accept(MediaType.APPLICATION_XML).type(MediaType.MULTIPART_FORM_DATA) .post(body); try { String resp = response.readEntity(String.class); Metadata teiMet = new TEIDOMParser().parse(resp, context); for (String key : teiMet.names()) { metadata.add("grobid:header_" + key, teiMet.get(key)); } } catch (Exception e) { LOG.warn("Couldn't read response", e); } }
public <T> T getAttachmentObject(String contentId, Class<T> cls) { Attachment att = getAttachment(contentId); if (att != null) { return att.getObject(cls); } return null; } }
public Attachment getAttachment(String contentId) { for (Attachment a : atts) { if (contentId.equalsIgnoreCase(a.getContentId())) { return a; } ContentDisposition cd = a.getContentDisposition(); if (cd != null && contentId.equals(cd.getParameter("name"))) { return a; } } return null; }
public Attachment build() { if (contentDisposition != null) { headers.putSingle("Content-Disposition", contentDisposition.toString()); } return new Attachment(headers, dataHandler, object); }
public ContentDisposition getContentDisposition() { String header = getHeader("Content-Disposition"); return header == null ? null : new ContentDisposition(header); }
@POST @Consumes("multipart/form-data") @Produces("text/html") @Path("form") public StreamingOutput getHTMLFromMultipart(Attachment att, @Context final UriInfo info) { return produceOutput(att.getObject(InputStream.class), att.getHeaders(), info, "html"); }
SystemMessage withoutAttachment = new SystemMessage("title", "content", Optional.empty()); Attachment attachment = new Attachment(); SystemMessage withAttachment = new SystemMessage("title", "content", Optional.ofNullable(attachment));
public <T> T getAttachmentObject(String contentId, Class<T> cls) { Attachment att = getAttachment(contentId); if (att != null) { return att.getObject(cls); } return null; } }
public static boolean matchAttachmentId(Attachment at, String value) { if (value.isEmpty()) { return true; } if (at.getContentId().equals(value)) { return true; } ContentDisposition cd = at.getContentDisposition(); return cd != null && value.equals(cd.getParameter("name")); }
public ContentDisposition getContentDisposition() { String header = getHeader("Content-Disposition"); return header == null ? null : new ContentDisposition(header); }
@POST @Consumes("multipart/form-data") @Produces("text/plain") @Path("form") public StreamingOutput getTextFromMultipart(Attachment att, @Context final UriInfo info) { return produceText(att.getObject(InputStream.class), att.getHeaders(), info); }
@Test public void testTextMainMultipart() throws Exception { //boilerpipe Attachment attachmentPart = new Attachment("myhtml", "text/html", ClassLoader.getSystemResourceAsStream("testHTML.html")); Response response = WebClient.create(endPoint + TIKA_PATH+"/form/main") .type("multipart/form-data") .accept("text/plain") .post(attachmentPart); String responseMsg = getStringFromInputStream((InputStream) response .getEntity()); assertTrue(responseMsg.contains("Title : Test Indexation Html")); assertFalse(responseMsg.contains("Indexation du fichier")); }
@POST @Consumes("multipart/form-data") @Produces("text/plain") @Path("form/main") public StreamingOutput getTextMainFromMultipart(final Attachment att, @Context final UriInfo info) { return produceTextMain(att.getObject(InputStream.class), att.getHeaders(), info); }
@Test public void testSimpleWordMultipartXML() throws Exception { ClassLoader.getSystemResourceAsStream(TEST_DOC); Attachment attachmentPart = new Attachment("myworddoc", "application/msword", ClassLoader.getSystemResourceAsStream(TEST_DOC)); WebClient webClient = WebClient.create(endPoint + TIKA_PATH + "/form"); Response response = webClient.type("multipart/form-data") .accept("text/xml") .post(attachmentPart); String responseMsg = getStringFromInputStream((InputStream) response .getEntity()); assertTrue(responseMsg.contains("test")); assertContains("<meta name=\"X-TIKA:digest:MD5\" content=\"f8be45c34e8919eedba48cc8d207fbf0\"/>", responseMsg); }
@POST @Consumes("multipart/form-data") @Produces({"text/csv", "application/json", "application/rdf+xml"}) @Path("form") public Response getMetadataFromMultipart(Attachment att, @Context UriInfo info) throws Exception { return Response.ok( parseMetadata(att.getObject(InputStream.class), att.getHeaders(), info)).build(); }
@Test public void testExtractTextAcceptPlainText() throws Exception { //TIKA-2384 Attachment attachmentPart = new Attachment( "my-docx-file", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ClassLoader.getSystemResourceAsStream("2pic.docx") ); Response response = WebClient.create(endPoint + TIKA_PATH + "/form") .type("multipart/form-data") .accept("text/plain") .post(attachmentPart); String responseMsg = getStringFromInputStream((InputStream) response.getEntity()); assertTrue(responseMsg.contains("P1040893.JPG")); assertNotFound( STREAM_CLOSED_FAULT, responseMsg ); }
throws Exception { return Response.ok( parseMetadata(att.getObject(InputStream.class), att.getHeaders(), info, handlerTypeName)).build();
new Attachment("myworddocx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC)); new Attachment("myworddocx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC)); new Attachment("myworddocx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC)); new Attachment("myworddocx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC)); new Attachment("myworddocx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));