private static String stringify(final ProvenanceEventRecord event, final int index, final long byteOffset) { final StringBuilder sb = new StringBuilder(); sb.append("Event Index in File = ").append(index).append(", Byte Offset = ").append(byteOffset); sb.append("\n\t").append("Event ID = ").append(event.getEventId()); sb.append("\n\t").append("Event Type = ").append(event.getEventType()); sb.append("\n\t").append("Event Time = ").append(new Date(event.getEventTime())); sb.append("\n\t").append("Event UUID = ").append(event.getFlowFileUuid()); sb.append("\n\t").append("Component ID = ").append(event.getComponentId()); sb.append("\n\t").append("Event ID = ").append(event.getComponentType()); sb.append("\n\t").append("Transit URI = ").append(event.getTransitUri()); sb.append("\n\t").append("Parent IDs = ").append(event.getParentUuids()); sb.append("\n\t").append("Child IDs = ").append(event.getChildUuids()); sb.append("\n\t").append("Previous Attributes = ").append(event.getPreviousAttributes()); sb.append("\n\t").append("Updated Attributes = ").append(event.getUpdatedAttributes()); return sb.toString(); } }
@Override public Builder fromEvent(final ProvenanceEventRecord event) { eventTime = event.getEventTime(); entryDate = event.getFlowFileEntryDate(); lineageStartDate = event.getLineageStartDate(); eventType = event.getEventType(); componentId = event.getComponentId(); componentType = event.getComponentType(); transitUri = event.getTransitUri(); sourceSystemFlowFileIdentifier = event.getSourceSystemFlowFileIdentifier(); uuid = event.getFlowFileUuid(); parentUuids = event.getParentUuids(); childrenUuids = event.getChildUuids(); alternateIdentifierUri = event.getAlternateIdentifierUri(); eventDuration = event.getEventDuration(); previousAttributes = event.getPreviousAttributes(); updatedAttributes = event.getUpdatedAttributes(); details = event.getDetails(); relationship = event.getRelationship(); contentClaimSection = event.getContentClaimSection(); contentClaimContainer = event.getContentClaimContainer(); contentClaimIdentifier = event.getContentClaimIdentifier(); contentClaimOffset = event.getContentClaimOffset(); contentSize = event.getFileSize(); previousClaimSection = event.getPreviousContentClaimSection(); previousClaimContainer = event.getPreviousContentClaimContainer(); previousClaimIdentifier = event.getPreviousContentClaimIdentifier(); previousClaimOffset = event.getPreviousContentClaimOffset();
@Override public String toString() { return "Event[ID=" + record.getEventId() + ", Type=" + record.getEventType() + ", UUID=" + record.getFlowFileUuid() + ", Component=" + record.getComponentId() + "]"; } }
public PlaceholderProvenanceEvent(final ProvenanceEventRecord original) { this.componentId = original.getComponentId(); this.eventId = original.getEventId(); this.eventTime = original.getEventTime(); this.flowFileUuid = original.getFlowFileUuid(); }
public EventRecord(final ProvenanceEventRecord event, final long eventId, final RecordSchema schema, final RecordSchema contentClaimSchema) { this.schema = schema; this.event = event; this.eventId = eventId; this.contentClaimRecord = createContentClaimRecord(contentClaimSchema, event.getContentClaimContainer(), event.getContentClaimSection(), event.getContentClaimIdentifier(), event.getContentClaimOffset(), event.getFileSize()); this.previousClaimRecord = createContentClaimRecord(contentClaimSchema, event.getPreviousContentClaimContainer(), event.getPreviousContentClaimSection(), event.getPreviousContentClaimIdentifier(), event.getPreviousContentClaimOffset(), event.getPreviousFileSize()); }
final String platform, final String nodeIdentifier) { addField(builder, "eventId", UUID.randomUUID().toString()); addField(builder, "eventOrdinal", event.getEventId()); addField(builder, "eventType", event.getEventType().name()); addField(builder, "timestampMillis", event.getEventTime()); addField(builder, "timestamp", df.format(event.getEventTime())); addField(builder, "durationMillis", event.getEventDuration()); addField(builder, "lineageStart", event.getLineageStartDate()); addField(builder, "details", event.getDetails()); addField(builder, "componentId", event.getComponentId()); addField(builder, "componentType", event.getComponentType()); addField(builder, "componentName", componentName); addField(builder, "processGroupId", processGroupId, true); addField(builder, "processGroupName", processGroupName, true); addField(builder, "entityId", event.getFlowFileUuid()); addField(builder, "entityType", "org.apache.nifi.flowfile.FlowFile"); addField(builder, "entitySize", event.getFileSize()); addField(builder, "previousEntitySize", event.getPreviousFileSize()); addField(builder, factory, "updatedAttributes", event.getUpdatedAttributes()); addField(builder, factory, "previousAttributes", event.getPreviousAttributes()); final String contentUriBase = urlPrefix + "/nifi-api/provenance-events/" + event.getEventId() + "/content/"; final String nodeIdSuffix = nodeIdentifier == null ? "" : "?clusterNodeId=" + nodeIdentifier; addField(builder, "contentURI", contentUriBase + "output" + nodeIdSuffix); addField(builder, factory, "parentIds", event.getParentUuids()); addField(builder, factory, "childIds", event.getChildUuids()); addField(builder, "transitUri", event.getTransitUri()); addField(builder, "remoteIdentifier", event.getSourceSystemFlowFileIdentifier());
return eventId; case EventFieldNames.ALTERNATE_IDENTIFIER: return event.getAlternateIdentifierUri(); case EventFieldNames.CHILD_UUIDS: return event.getChildUuids(); case EventFieldNames.COMPONENT_ID: return event.getComponentId(); case EventFieldNames.COMPONENT_TYPE: return event.getComponentType(); case EventFieldNames.CONTENT_CLAIM: return contentClaimRecord; case EventFieldNames.EVENT_DETAILS: return event.getDetails(); case EventFieldNames.EVENT_DURATION: return event.getEventDuration(); case EventFieldNames.EVENT_TIME: return event.getEventTime(); case EventFieldNames.EVENT_TYPE: return event.getEventType().name(); case EventFieldNames.FLOWFILE_ENTRY_DATE: return event.getFlowFileEntryDate(); case EventFieldNames.FLOWFILE_UUID: return event.getFlowFileUuid(); case EventFieldNames.LINEAGE_START_DATE: return event.getLineageStartDate(); case EventFieldNames.PARENT_UUIDS: return event.getParentUuids(); case EventFieldNames.PREVIOUS_ATTRIBUTES: return event.getPreviousAttributes();
public Document convert(final ProvenanceEventRecord record, final StorageSummary persistedEvent) { final Document doc = new Document(); addField(doc, SearchableFields.FlowFileUUID, record.getFlowFileUuid()); addField(doc, SearchableFields.Filename, record.getAttribute(CoreAttributes.FILENAME.key())); addField(doc, SearchableFields.ComponentID, record.getComponentId()); addField(doc, SearchableFields.AlternateIdentifierURI, record.getAlternateIdentifierUri()); addField(doc, SearchableFields.EventType, record.getEventType().name()); addField(doc, SearchableFields.Relationship, record.getRelationship()); addField(doc, SearchableFields.Details, record.getDetails()); addField(doc, SearchableFields.ContentClaimSection, record.getContentClaimSection()); addField(doc, SearchableFields.ContentClaimContainer, record.getContentClaimContainer()); addField(doc, SearchableFields.ContentClaimIdentifier, record.getContentClaimIdentifier()); addField(doc, SearchableFields.SourceQueueIdentifier, record.getSourceQueueIdentifier()); addField(doc, SearchableFields.TransitURI, record.getTransitUri()); addField(doc, searchableField, LuceneUtil.truncateIndexField(record.getAttribute(searchableField.getSearchableFieldName()))); doc.add(new LongField(SearchableFields.LineageStartDate.getSearchableFieldName(), record.getLineageStartDate(), Store.NO)); doc.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), record.getEventTime(), Store.NO)); doc.add(new LongField(SearchableFields.FileSize.getSearchableFieldName(), record.getFileSize(), Store.NO)); final ProvenanceEventType eventType = record.getEventType(); if (eventType == ProvenanceEventType.FORK || eventType == ProvenanceEventType.CLONE || eventType == ProvenanceEventType.REPLAY) { for (final String uuid : record.getChildUuids()) { if (!uuid.equals(record.getFlowFileUuid())) { addField(doc, SearchableFields.FlowFileUUID, uuid); for (final String uuid : record.getParentUuids()) {
final ProvenanceEventType type = event.getEventType(); if (type == ProvenanceEventType.JOIN) { throw new IllegalArgumentException("Cannot replay events that are created from multiple parents"); final Long contentSize = event.getPreviousFileSize(); final String contentClaimId = event.getPreviousContentClaimIdentifier(); final String contentClaimSection = event.getPreviousContentClaimSection(); final String contentClaimContainer = event.getPreviousContentClaimContainer(); if (event.getSourceQueueIdentifier() == null) { throw new IllegalArgumentException("Cannot replay data from Provenance Event because the event does not specify the Source FlowFile Queue"); FlowFileQueue queue = null; for (final Connection connection : connections) { if (event.getSourceQueueIdentifier().equals(connection.getIdentifier())) { queue = connection.getFlowFileQueue(); break; throw new IllegalStateException("Cannot replay data from Provenance Event because the Source FlowFile Queue with ID " + event.getSourceQueueIdentifier() + " no longer exists"); ResourceClaim resourceClaim = resourceClaimManager.getResourceClaim(event.getPreviousContentClaimContainer(), event.getPreviousContentClaimSection(), event.getPreviousContentClaimIdentifier()); if (resourceClaim == null) { resourceClaim = resourceClaimManager.newResourceClaim(event.getPreviousContentClaimContainer(), event.getPreviousContentClaimSection(), event.getPreviousContentClaimIdentifier(), false, false); final long claimOffset = event.getPreviousContentClaimOffset() == null ? 0L : event.getPreviousContentClaimOffset().longValue(); final StandardContentClaim contentClaim = new StandardContentClaim(resourceClaim, claimOffset); contentClaim.setLength(event.getPreviousFileSize() == null ? -1L : event.getPreviousFileSize());
final long offset; if (direction == ContentDirection.INPUT) { if (provEvent.getPreviousContentClaimContainer() == null || provEvent.getPreviousContentClaimSection() == null || provEvent.getPreviousContentClaimIdentifier() == null) { throw new IllegalArgumentException("Input Content Claim not specified"); final ResourceClaim resourceClaim = resourceClaimManager.newResourceClaim(provEvent.getPreviousContentClaimContainer(), provEvent.getPreviousContentClaimSection(), provEvent.getPreviousContentClaimIdentifier(), false, false); claim = new StandardContentClaim(resourceClaim, provEvent.getPreviousContentClaimOffset()); offset = provEvent.getPreviousContentClaimOffset() == null ? 0L : provEvent.getPreviousContentClaimOffset(); size = provEvent.getPreviousFileSize(); } else { if (provEvent.getContentClaimContainer() == null || provEvent.getContentClaimSection() == null || provEvent.getContentClaimIdentifier() == null) { throw new IllegalArgumentException("Output Content Claim not specified"); final ResourceClaim resourceClaim = resourceClaimManager.newResourceClaim(provEvent.getContentClaimContainer(), provEvent.getContentClaimSection(), provEvent.getContentClaimIdentifier(), false, false); claim = new StandardContentClaim(resourceClaim, provEvent.getContentClaimOffset()); offset = provEvent.getContentClaimOffset() == null ? 0L : provEvent.getContentClaimOffset(); size = provEvent.getFileSize(); .setFlowFileUUID(provEvent.getFlowFileUuid()) .setAttributes(provEvent.getAttributes(), Collections.emptyMap()) .setCurrentContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), offset, size) .setTransitUri(requestUri) .setEventTime(System.currentTimeMillis()) .setFlowFileEntryDate(provEvent.getFlowFileEntryDate()) .setLineageStartDate(provEvent.getLineageStartDate()) .setComponentType(flowManager.getRootGroup().getName())
private Object getFieldValue(final ProvenanceEventRecord record, final SearchableField field) { if (SearchableFields.AlternateIdentifierURI.equals(field)) { return record.getAlternateIdentifierUri(); return record.getComponentId(); return record.getDetails(); return record.getEventTime(); return record.getEventType(); return record.getAttributes().get(CoreAttributes.FILENAME.key()); return record.getFileSize(); return record.getFlowFileUuid(); return record.getLineageStartDate(); return record.getRelationship(); return record.getTransitUri();
@Override public DataSetRefs analyze(AnalysisContext context, ProvenanceEventRecord event) { final String transitUri = event.getTransitUri(); final Matcher uriMatcher = URI_PATTERN.matcher(transitUri); if (!uriMatcher.matches()) { logger.warn("Unexpected transit URI: {}", new Object[]{transitUri}); return null; } final Referenceable ref = new Referenceable(TYPE); final String[] hostNames = splitHostNames(uriMatcher.group(1)); final String clusterName = context.getClusterResolver().fromHostNames(hostNames); final String tableName = uriMatcher.group(2); ref.set(ATTR_NAME, tableName); ref.set(ATTR_QUALIFIED_NAME, toQualifiedName(clusterName, tableName)); // TODO: 'uri' is a mandatory attribute, but what should we set? ref.set(ATTR_URI, transitUri); return singleDataSetRef(event.getComponentId(), event.getEventType(), ref); }
protected Referenceable createDataSetRef(AnalysisContext context, ProvenanceEventRecord event) { final Referenceable ref = new Referenceable(TYPE); ref.set(ATTR_NAME, event.getComponentType()); ref.set(ATTR_QUALIFIED_NAME, toQualifiedName(context.getNiFiClusterName(), event.getComponentId())); ref.set(ATTR_DESCRIPTION, event.getEventType() + " was performed by " + event.getComponentType()); return ref; }
if (query.getStartDate() != null && query.getStartDate().getTime() > event.getEventTime()) { return false; if (query.getEndDate() != null && query.getEndDate().getTime() < event.getEventTime()) { return false; if (event.getFileSize() > maxFileSize) { return false; if (event.getFileSize() < minFileSize) { return false; final String eventAttributeValue = event.getAttributes().get(attributeName); final String regex = searchValue.replace("?", ".").replace("*", ".*"); final Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE); if (pattern.matcher(event.getFlowFileUuid()).matches()) { continue; for (final String uuid : event.getParentUuids()) { if (pattern.matcher(uuid).matches()) { found = true; for (final String uuid : event.getChildUuids()) { if (pattern.matcher(uuid).matches()) { found = true; } else if (event.getFlowFileUuid().equals(searchValue) || event.getParentUuids().contains(searchValue) || event.getChildUuids().contains(searchValue)) { continue;
switch (event.getEventType()) { case JOIN: case FORK: case CLONE: case REPLAY: { return submitLineageComputation(event.getParentUuids(), user, authorizer, LineageComputationType.EXPAND_PARENTS, eventId, event.getLineageStartDate(), event.getEventTime()); submission.getResult().setError("Event ID " + eventId + " indicates an event of type " + event.getEventType() + " so its parents cannot be expanded"); return submission;
private void verifyProvenance(int expectedNumEvents) { List<ProvenanceEventRecord> provEvents = runner.getProvenanceEvents(); Assert.assertEquals(expectedNumEvents, provEvents.size()); for (ProvenanceEventRecord event : provEvents) { Assert.assertEquals(ProvenanceEventType.RECEIVE, event.getEventType()); Assert.assertTrue(event.getTransitUri().startsWith("udp://")); } }
@Override public boolean select(final ProvenanceEventRecord event) { return event.getEventId() == id; } }, 1);
protected DataSetRefs executeAnalyzer(AnalysisContext analysisContext, ProvenanceEventRecord event) { final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(event.getComponentType(), event.getTransitUri(), event.getEventType()); if (analyzer == null) { return null; } if (logger.isDebugEnabled()) { logger.debug("Analyzer {} is found for event: {}", analyzer, event); } return analyzer.analyze(analysisContext, event); }
final String componentName = mapHolder.getComponentName(event.getComponentId()); final String processGroupId = mapHolder.getProcessGroupId(event.getComponentId(), event.getComponentType()); final String processGroupName = mapHolder.getComponentName(processGroupId); arrayBuilder.add(serialize(factory, builder, event, df, componentName, processGroupId, processGroupName, hostname, url, rootGroupName, platform, nodeId)); new Object[] {events.size(), transferMillis, transactionId, events.get(0).getEventId()}); } catch (final IOException e) { throw new ProcessException("Failed to send Provenance Events to destination due to IOException:" + e.getMessage(), e);
switch (event.getEventType()) { case CLONE: case FORK: case JOIN: case REPLAY: { return submitLineageComputation(event.getChildUuids(), user, authorizer, LineageComputationType.EXPAND_CHILDREN, eventId, event.getEventTime(), Long.MAX_VALUE); submission.getResult().setError("Event ID " + eventId + " indicates an event of type " + event.getEventType() + " so its children cannot be expanded"); return submission;