+ "to the typical usage-pattern.") @ExternalDocumentation( { @DocumentationLink(title = "Analyzer rundown", url = "https://www.youtube.com/watch?v=hZWxB_eu_A0", type = DocumentationType.VIDEO, version = "4.0") }) @Concurrent(true)
+ "your data.\nIt can be used for a lot of purposes but is excellent for verifying or getting ideas about " + "the format of the string-values in a column.") @ExternalDocumentation(value = { @DocumentationLink(title = "Kasper's Source: Pattern Finder 2.0", url = "http://kasper.eobjects.org/2010/09/pattern-finder-20-latest-feature-in.html", type = DocumentationType.TECH, version = "2.0") })
@Named("Remove dictionary matches") @Description( "Removes any part of a string that is matched against a dictionary. Use it to standardize or prepare " + "values, for instance by removing adjectives that make comparison of similar terms difficult.") @ExternalDocumentation({ @DocumentationLink(title = "Segmenting customers on messy data", url = "https://www.youtube.com/watch?v=iy-j5s-uHz4", type = DocumentationType.VIDEO, version = "4.0") }) @Categorized(superCategory = ImproveSuperCategory.class, value = ReferenceDataCategory.class)
@Alias("Synonym replacement") @Description("Replaces strings with their synonyms") @ExternalDocumentation({ @DocumentationLink(title = "Segmenting customers on messy data", url = "https://www.youtube.com/watch?v=iy-j5s-uHz4", type = DocumentationType.VIDEO, version = "4.0"), @DocumentationLink(title = "Understanding and using Synonyms", url = "https://www.youtube.com/watch?v=_YiPaA8bFt4", type = DocumentationType.VIDEO, version = "2.0") }) @Categorized(superCategory = ImproveSuperCategory.class, value = ReferenceDataCategory.class)
@Named("Character set distribution") @Description("Inspects and maps text characters according to character set affinity, " + "such as Latin, Hebrew, Cyrillic, Chinese and more.") @ExternalDocumentation({ @DocumentationLink(title = "Internationalization in DataCleaner", url = "https://www.youtube.com/watch?v=ApA-nhtLbhI", type = DocumentationType.VIDEO, version = "3.0") }) @Concurrent(true)
+ "<li>Match a hash-sign and 3 pairs of hexadecimal digits (using pseudo-characters of Java regular expressions):" + "<blockquote>\\#?(\\p{XDigit}{2})(\\p{XDigit}{2})(\\p{XDigit}{2})</blockquote></li>" + "</ul>") @ExternalDocumentation({ @DocumentationLink(title = "Regex parsing with DataCleaner", url = "https://www.youtube.com/watch?v=VA6dw5Nv2AM", type = DocumentationType.VIDEO, version = "3.0"), @DocumentationLink(title = "Java Tutorials: Regular Expressions Lesson", url = "https://docs.oracle.com/javase/tutorial/essential/regex/", type = DocumentationType.TECH, version = "3.0") })
@Named("Regex search/replace") @Description("Search and replace text in String values using regular expressions.") @ExternalDocumentation({ @DocumentationLink(title = "Java Tutorials: Regular Expressions Lesson", url = "https://docs.oracle.com/javase/tutorial/essential/regex/", type = DocumentationType.TECH, version = "3.0") })
@Named("Transliterate") @Description("Converts non-latin characters to latin (or even ASCII) characters.") @ExternalDocumentation({ @DocumentationLink(title = "Internationalization in DataCleaner", url = "https://www.youtube.com/watch?v=ApA-nhtLbhI", type = DocumentationType.VIDEO, version = "3.0") }) @Categorized(EncodingCategory.class)