@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
RegexTokenizer transformer = getTransformer();
if(!transformer.getGaps()){
throw new IllegalArgumentException("Expected splitter mode, got token matching mode");
}
if(transformer.getMinTokenLength() != 1){
throw new IllegalArgumentException("Expected 1 as minimum token length, got " + transformer.getMinTokenLength() + " as minimum token length");
}
Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
Field<?> field = feature.getField();
if(transformer.getToLowercase()){
Apply apply = PMMLUtil.createApply("lowercase", feature.ref());
field = encoder.createDerivedField(FeatureUtil.createName("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, apply);
}
return Collections.singletonList(new DocumentFeature(encoder, field, transformer.getPattern()));
}
}