@Override protected void storeNewPattern(final TokenPattern pattern, final InputRow row, final String value, final int distinctCount) { final RowAnnotation annotation = _annotationFactory.createAnnotation(); _annotations.put(pattern, annotation); _annotationFactory.annotate(row, distinctCount, annotation); }
@Override public boolean hasAnnotatedRows(final String value) { if (_annotations == null) { return false; } final RowAnnotationFactory annotationFactory = _annotationFactoryRef.get(); if (annotationFactory == null) { return false; } if (value == null) { if (_nullValueAnnotation != null) { return annotationFactory.hasSampleRows(_nullValueAnnotation); } else { return false; } } final RowAnnotation annotation = _annotations.get(value); if (annotation == null) { return false; } return annotationFactory.hasSampleRows(annotation); }
@Override public void run(InputRow row, int distinctCount) { rowAnnotationFactory.annotate(row, distinctCount, rowAnnotation); }
public BooleanAnalyzerColumnDelegate(final RowAnnotationFactory annotationFactory) { _annotationFactory = annotationFactory; _nullAnnotation = _annotationFactory.createAnnotation(); _trueAnnotation = _annotationFactory.createAnnotation(); _falseAnnotation = _annotationFactory.createAnnotation(); }
@Override public CompletenessAnalyzerResult reduce(final Collection<? extends CompletenessAnalyzerResult> results) { final CompletenessAnalyzerResult firstResult = results.iterator().next(); final RowAnnotation annotation = _rowAnnotationFactory.createAnnotation(); final InputColumn<?>[] highlightedColumns = firstResult.getHighlightedColumns(); int totalRowCount = 0; for (final CompletenessAnalyzerResult result : results) { final List<InputRow> sampleRows = result.getSampleRows(); final int invalidRowCount = result.getInvalidRowCount(); if (invalidRowCount == sampleRows.size()) { // if the rows are included for preview/sampling - then // re-annotate them in the master result for (final InputRow sampleRow : sampleRows) { _rowAnnotationFactory.annotate(sampleRow, annotation); } } else { // else we just transfer annotation counts _rowAnnotationFactory.transferAnnotations(result.getAnnotation(), annotation); } totalRowCount += result.getTotalRowCount(); } return new CompletenessAnalyzerResult(totalRowCount, annotation, _rowAnnotationFactory, highlightedColumns); }
_annotationFactory.resetAnnotation(_maxAnnotation); _annotationFactory.resetAnnotation(_minAnnotation); _annotationFactory.annotate(row, distinctCount, _maxAnnotation); _annotationFactory.annotate(row, distinctCount, _minAnnotation); _annotationFactory.annotate(row, distinctCount, _nullAnnotation);
private void reduce(final RowAnnotation annotation, final AnnotatedRowsResult annotatedRowsResult) { if (annotatedRowsResult == null) { return; } final int rowCount = annotatedRowsResult.getAnnotatedRowCount(); if (rowCount == 0) { return; } final List<InputRow> rows = annotatedRowsResult.getSampleRows(); if (rows.size() == rowCount) { for (final InputRow row : rows) { _rowAnnotationFactory.annotate(row, annotation); } } else { _rowAnnotationFactory.transferAnnotations(annotatedRowsResult.getAnnotation(), annotation); } }
@Override public CategorizationResult reduce(final Collection<? extends CategorizationResult> results) { final RowAnnotationFactory annotationFactory = null; final Map<String, RowAnnotation> reducedCategories = new LinkedHashMap<>(); for (final CategorizationResult result : results) { final Collection<String> categoryNames = result.getCategoryNames(); for (final String categoryName : categoryNames) { final RowAnnotation partialAnnotation = result.getCategoryRowAnnotation(categoryName); final RowAnnotation reducedAnnotation = reducedCategories.get(categoryName); if (reducedAnnotation == null) { // adopt the annotation from the partial result final RowAnnotation annotation = _rowAnnotationFactory.createAnnotation(); _rowAnnotationFactory.transferAnnotations(partialAnnotation, annotation); reducedCategories.put(categoryName, annotation); } else { // add records to the existing annotation _rowAnnotationFactory.transferAnnotations(partialAnnotation, reducedAnnotation); } } } return new CategorizationResult(annotationFactory, reducedCategories); }
@Override public void run(InputRow row, int distinctCount) { rowAnnotationFactory.annotate(row, distinctCount, rowAnnotation); }
public StringAnalyzerColumnDelegate(final RowAnnotationFactory annotationFactory) { _annotationFactory = annotationFactory; _nullAnnotation = annotationFactory.createAnnotation(); _blankAnnotation = annotationFactory.createAnnotation(); _entirelyUppercaseAnnotation = annotationFactory.createAnnotation(); _entirelyLowercaseAnnotation = annotationFactory.createAnnotation(); _maxCharsAnnotation = annotationFactory.createAnnotation(); _minCharsAnnotation = annotationFactory.createAnnotation(); _maxWhitespaceAnnotation = annotationFactory.createAnnotation(); _minWhitespaceAnnotation = annotationFactory.createAnnotation(); _uppercaseExclFirstLetterAnnotation = annotationFactory.createAnnotation(); _digitAnnotation = annotationFactory.createAnnotation(); _diacriticAnnotation = annotationFactory.createAnnotation(); _maxWordsAnnotation = annotationFactory.createAnnotation(); _minWordsAnnotation = annotationFactory.createAnnotation(); }
_annotationFactory.annotate(row, distinctCount, _nullAnnotation); } else { final int numChars = value.length(); _annotationFactory.annotate(row, distinctCount, _blankAnnotation); _annotationFactory.annotate(row, distinctCount, _uppercaseExclFirstLetterAnnotation); _numUppercaseExclFirstLetter += numUppercaseExclFirstLetter; _annotationFactory.annotate(row, distinctCount, _diacriticAnnotation); _annotationFactory.annotate(row, distinctCount, _digitAnnotation); _annotationFactory.resetAnnotation(_maxCharsAnnotation); _maxChars = numChars; _annotationFactory.annotate(row, distinctCount, _maxCharsAnnotation); _annotationFactory.resetAnnotation(_minCharsAnnotation); _minChars = numChars; _annotationFactory.annotate(row, distinctCount, _minCharsAnnotation); _annotationFactory.resetAnnotation(_maxWordsAnnotation); _annotationFactory.annotate(row, distinctCount, _maxWordsAnnotation); _annotationFactory.resetAnnotation(_minWordsAnnotation);
public void addObservation(InputRow row, List<Object> inspectionOutcomes) { RowAnnotation annotation = _observations.get(inspectionOutcomes); if (annotation == null) { synchronized (this) { annotation = _observations.get(inspectionOutcomes); if (annotation == null) { annotation = _rowAnnotationFactory.createAnnotation(); _observations.put(inspectionOutcomes, annotation); } } } _rowAnnotationFactory.annotate(row, annotation); }
public void run(final Boolean value, final InputRow row, final int distinctCount) { _rowCount += distinctCount; if (value == null) { _annotationFactory.annotate(row, distinctCount, _nullAnnotation); } else { if (value.booleanValue()) { _annotationFactory.annotate(row, distinctCount, _trueAnnotation); } else { _annotationFactory.annotate(row, distinctCount, _falseAnnotation); } } }
public DateAndTimeAnalyzerColumnDelegate(final boolean descriptiveStatistics, final RowAnnotationFactory annotationFactory) { _annotationFactory = annotationFactory; _nullAnnotation = _annotationFactory.createAnnotation(); _maxDateAnnotation = _annotationFactory.createAnnotation(); _minDateAnnotation = _annotationFactory.createAnnotation(); _maxTimeAnnotation = _annotationFactory.createAnnotation(); _minTimeAnnotation = _annotationFactory.createAnnotation(); _numRows = 0; if (descriptiveStatistics) { _statistics = new DescriptiveStatistics(); } else { _statistics = new SummaryStatistics(); } }
public synchronized void run(final Date value, final InputRow row, final int distinctCount) { _numRows += distinctCount; if (value == null) { _annotationFactory.annotate(row, distinctCount, _nullAnnotation); } else { final long timestamp = value.getTime(); if (localDate.isAfter(_maxDate)) { _maxDate = localDate; _annotationFactory.resetAnnotation(_maxDateAnnotation); } else if (localDate.isBefore(_minDate)) { _minDate = localDate; _annotationFactory.resetAnnotation(_minDateAnnotation); _annotationFactory.resetAnnotation(_maxTimeAnnotation); } else if (localTime.isBefore(_minTime)) { _minTime = localTime; _annotationFactory.resetAnnotation(_minTimeAnnotation); _annotationFactory.annotate(row, distinctCount, _maxDateAnnotation); _annotationFactory.annotate(row, distinctCount, _minDateAnnotation); _annotationFactory.annotate(row, distinctCount, _maxTimeAnnotation); _annotationFactory.annotate(row, distinctCount, _minTimeAnnotation);
public AnnotatedRowsResult getCategoryRowSample(final String category) { final RowAnnotationFactory rowAnnotationFactory = _annotationFactoryRef.get(); if (rowAnnotationFactory == null) { return null; } final RowAnnotation annotation = _categories.get(category); if (annotation == null) { return null; } if (!rowAnnotationFactory.hasSampleRows(annotation)) { return null; } return new AnnotatedRowsResult(annotation, rowAnnotationFactory); }
public void addObservation(InputRow row, List<Object> inspectionOutcomes) { RowAnnotation annotation = _observations.get(inspectionOutcomes); if (annotation == null) { synchronized (this) { annotation = _observations.get(inspectionOutcomes); if (annotation == null) { annotation = _rowAnnotationFactory.createAnnotation(); _observations.put(inspectionOutcomes, annotation); } } } _rowAnnotationFactory.annotate(row, annotation); }
@Override protected void storeMatch(final TokenPattern pattern, final InputRow row, final String value, final int distinctCount) { final RowAnnotation annotation = _annotations.get(pattern); if (annotation == null) { throw new IllegalStateException("No annotation available for pattern: " + pattern); } _annotationFactory.annotate(row, distinctCount, annotation); }
public CharacterSetDistributionAnalyzerColumnDelegate(final RowAnnotationFactory annotationFactory, final Map<String, UnicodeSet> unicodeSets) { _annotationFactory = annotationFactory; _unicodeSets = unicodeSets; _annotations = new HashMap<>(); for (final String name : unicodeSets.keySet()) { _annotations.put(name, _annotationFactory.createAnnotation()); } }
/** * Factory method for {@link AnnotatedRowsResult} that will return non-null * ONLY if the {@link RowAnnotation} passed in has any sample rows according * to the {@link RowAnnotationFactory}. * * Otherwise returning null has the benefit that usually it makes it easy to * filter out unnecesary drill-to-detail result objects. * * @param annotation * @param annotationFactory * @param column * @return */ public static AnnotatedRowsResult createIfSampleRowsAvailable(final RowAnnotation annotation, final RowAnnotationFactory annotationFactory, final InputColumn<?>... columns) { if (annotationFactory.hasSampleRows(annotation)) { return new AnnotatedRowsResult(annotation, annotationFactory, columns); } return null; }