@Override public String apply(NativeDocument input) { return input.getId(); }
private void processSingleAttachment(final NativeDocument inputDocument, final Attachment<?> attachment, List<String> invalidAttachments) { try { final String attachmentData = getAttachmentData(attachment); Document document = parseDocumentThreadSafetly(attachmentData); if (document != null) { Element root = document.getDocumentElement(); String namespace = root.getNamespaceURI(); if (StringUtils.isNotBlank(namespace)) { if (attachmentQueryMap.containsKey(namespace)) { String contentLocationXPaths = attachmentQueryMap.get(namespace); for (String contentLocationXPath : StringUtils.split(contentLocationXPaths)) { createAttachmentByLocation(inputDocument, attachment.getId(), document, namespace, contentLocationXPath, invalidAttachments); } } else { problemHandler.handleProblem(LogSeverity.WARN, inputDocument.getId(), ImporterConstants.PROBLEM_ERROR_READ_ATTACHMENTS, "Unknown namespace " + namespace + " . attachments will not be processed."); } } else { problemHandler.handleProblem(LogSeverity.WARN, inputDocument.getId(), ImporterConstants.PROBLEM_ERROR_READ_ATTACHMENTS, "Namespace was not defined in root element"); } } } catch (Exception e) { problemHandler.handleProblem(LogSeverity.WARN, inputDocument.getId(), ImporterConstants.PROBLEM_ERROR_READ_ATTACHMENTS, e); } }
private Resource toResource(NativeDocument document, Attachment<?> attachment) { InputStream stream = getStream(attachment); if (stream == null) { return null; } return new InputStreamResource(stream, "Record id: " + document.getId() + ", attachment id: " + attachment.getId()); }
@Override public Iterable<DataResponse> feedFromCurrentElement() { NativeDocument document = state.getCurrentDocument(); if(document == null){ return Collections.emptyList(); } String objectClass = document.getTagValue(RepositoryStoreConstants.TAG_NAME_OBJECT_CLASS); if(!BWMetaImporterConstants.SOURCE_FORMAT.equals(objectClass)){ return Collections.emptyList(); } String levelToImport = getLevelToImport(document); Attachment<?> sourceAttachment = document.getAttachments().get(CommonExtractorContstants.SOURCE_ATTRIBUTE); if (sourceAttachment == null) { warn("No source attachment.", document.getId()); return Collections.emptyList(); } InputStream inputStream = getStream(sourceAttachment); List<YElement> elements = builder.build(new InputStreamResource(inputStream, "Record id: " + document.getId())); Stream<YElement> elementsStream = elements.stream(); if (StringUtils.isNotBlank(levelToImport)) { elementsStream = elementsStream.filter(e -> StringUtils.equals(YModelUtils.getType(e), levelToImport)); } return elementsStream.map(e -> toDataResponse(e, document)).collect(Collectors.toList()); }
private void tryRecord(NativeDocument document) { // try { System.out.println("trying..." + document.getId()); YElement element = (YElement) ((Document) document).getMetadata(); StringBuilder textBuilder = new StringBuilder(); String title = fetchTitle(element); textBuilder.append(title).append("."); System.out.println("title=" + title); buildWithPlainText(textBuilder, document, element); // System.out.println("text="+textBuilder.substring(0, 300)); String finalText = textBuilder.toString(); // List<String> soncaKeywords = trySoncaKeywords(element, // finalText); List<String> pwKeywords = tryPwKeywords(finalText); writeResult(element.getId(), title, finalText.length(), pwKeywords); writer.flush(); // List<String> keywords = // metadataGenerator.generateKeywords(YLanguage.Polish, // textBuilder.toString()); } catch (Exception e) { e.printStackTrace(); } }
@Override public void write(List<? extends NativeDocument> items) throws Exception { for (NativeDocument nativeDocument : items) { try { if(replace){ repository.replaceDocument(nativeDocument, eventTag); }else { repository.storeDocument(nativeDocument, eventTag); } } catch (Exception e){ problemHandler.handleProblem(LogSeverity.WARN, nativeDocument.getId(), MONGO_WRITE, e); throw e; } } }
private NativeDocument checkDataset(ReportDuplicateRequest request) { NativeDocument document = documentRepository.fetchDocument(request.getDuplicateDocumentId()); String dataset = document.getTagValue(RepositoryStoreConstants.TAG_NAME_DATASET); if (ApplicationConstants.DEFAULT_DATASET.equals(StringUtils.lowerCase(dataset))) { throw new InvalidEntryException(document.getId(), "Cannot request duplicate for resource added by user"); } return document; }
private String getPartFilename(NativeDocument document, Attachment<?> attachment) throws Exception { String name = attachment.getTagValue(RepositoryStoreConstants.TAG_NAME_CONTENT_NAME); if (name != null) { return name; } String xpath = attachment.getTagValue(ImporterConstants.PROPERTY_ATTACHMENT_PATH_IN_METADATA); if (xpath == null) { return null; } xpath = xpath.replace(LOCATION_XPATH_NAME, FILE_NAME_XPATH_NAME); String metadataLocation = attachment.getTagValue(ImporterConstants.PROPERTY_PARENT_ATTACHMENT); if (metadataLocation == null) { return null; } Attachment<?> metadataAttachment = document.getAttachments().get(metadataLocation); JXPathContext context = getBwMeta(metadataAttachment); try { String attachmentFileName = getFileName(context, xpath); return attachmentFileName; } catch (JXPathNotFoundException e) { LOGGER.warn("Couldn't retrieve attachment name from document: " + document.getId() + " and path: " + xpath); return null; } }
private void checkAuthorshipEntry(ReportDuplicateRequest request, NativeDocument document) { AuthorshipQuery query = new AuthorshipQuery(); query.setDocumentId(request.getDuplicateDocumentId()); Set<AuthorshipStatus> statuses = Sets.newHashSet(AuthorshipStatus.values()); statuses.remove(AuthorshipStatus.REMOVED); query.setStatuses(statuses); Page<Authorship> authPage = authorshipService.fetchAuthorships(query); if (authPage.getTotalSize() > 0) { throw new InvalidEntryException(document.getId(), "Authorship entry exists for given document"); } }
@Override public Attachment<?> createAttachment(NativeDocument document, String parentPath, String path, int totalAttachmentsCount, Boolean downloadRemoteAttachments) throws InvalidAttachmentException { Assert.notNull(path, "Path is null"); Assert.notNull(parentPath, "Parent path is null"); File workingDirectory = new File(workingDirectoryPath); Assert.isTrue(workingDirectory.isDirectory(), "WorkingDirectory is not a directory"); Assert.notNull(workingDirectory, "WorkingDirectory is null"); AttachmentFetcher fetcher = repository.getSupportedFetcher(path); final AttachmentInformation information; if (fetcher != null) { information = fetcher.getAttachmentInformation(document.getId(), path, totalAttachmentsCount); } else { information = new AttachmentInformation(null, path); } if (information.getAttachment() != null) { document.addExistingAttachment(information.getAttachment(), false); return information.getAttachment(); } else if(information.getLocation() != null){ File attachmentFile = new File(workingDirectory + File.separator + information.getLocation()); if (attachmentFile.exists()) { return addAttachment(document, attachmentFile, parentPath); } else { throw new InvalidAttachmentException(path, ""); } } return null; }
problemHandler.handleProblem(LogSeverity.WARN, inputDocument.getId(), "Invalid attachment", e); invalidPaths.addAll(attachmentPathEntry.getValue());
@Override public void write(List<? extends NativeDocument> items) throws Exception { for (NativeDocument document : items) { try { repository.storeDocument(document, eventTag); if (writerListener != null) { writerListener.successfulSaved(document); } } catch (Exception e) { String id; if (document != null) { id = document.getId(); } else { id = "Unknown"; } problemHandler.handleProblem(LogSeverity.WARN, id, "Writing documents", e); if (writerListener != null) { writerListener.errorOnSave(document, e); } } } } }