@Override public ValFrame apply(Env env, Env.StackHelp stk, AstRoot asts[]) { Word2VecModel model = (Word2VecModel) stk.track(asts[1].exec(env)).getModel(); return new ValFrame(model.toFrame()); }
Frame groupByTEColumnAndAggregate(Frame data, int teColumnIndex) { int numeratorColumnIndex = data.find("numerator"); int denominatorColumnIndex = data.find("denominator"); AstGroup.AGG[] aggs = new AstGroup.AGG[2]; AstGroup.NAHandling na = AstGroup.NAHandling.ALL; aggs[0] = new AstGroup.AGG(AstGroup.FCN.sum, numeratorColumnIndex, na, (int) data.vec(numeratorColumnIndex).max() + 1); aggs[1] = new AstGroup.AGG(AstGroup.FCN.sum, denominatorColumnIndex, na, (int) data.vec(denominatorColumnIndex).max() + 1); Frame result = new AstGroup().performGroupingWithAggregations(data, new int[]{teColumnIndex}, aggs, -1).getFrame(); return register(result); }
@Override public ValFrame apply(Env env, Env.StackHelp stk, AstRoot asts[]) { String[] encodingMapKeys = getEncodingMapKeys(env, stk, asts); Frame[] encodingMapFrames = getEncodingMapFrames(env, stk, asts); Frame frame = getFrameToTransform(env, stk, asts); String[] teColumnsToEncode = getTEColumns(env, stk, asts); byte dataLeakageHandlingStrategy = getDataLeakageHandlingStrategy(env, stk, asts); String targetColumnName = getTargetColumnName(env, stk, asts); String foldColumnName = getFoldColumnName(env, stk, asts); boolean withBlending = getWithBlending(env, stk, asts); BlendingParams params = withBlending ? getBlendingParams(env, stk, asts) : null; double noise = getNoise(env, stk, asts); double seed = getSeed(env, stk, asts); boolean withImputationForOriginalColumns = true; TargetEncoder tec = params == null ? new TargetEncoder(teColumnsToEncode) : new TargetEncoder(teColumnsToEncode, params); Map<String, Frame> encodingMap = reconstructEncodingMap(encodingMapKeys, encodingMapFrames); if(noise == -1) { return new ValFrame(tec.applyTargetEncoding(frame, targetColumnName, encodingMap, dataLeakageHandlingStrategy, foldColumnName, withBlending, withImputationForOriginalColumns, (long) seed)); } else { return new ValFrame(tec.applyTargetEncoding(frame, targetColumnName, encodingMap, dataLeakageHandlingStrategy, foldColumnName, withBlending, noise, withImputationForOriginalColumns, (long) seed)); } }
Frame groupThenAggregateForNumeratorAndDenominator(Frame fr, String teColumnName, String foldColumnName, int targetIndex) { int teColumnIndex = fr.find(teColumnName); int[] groupByColumns = null; if (foldColumnName == null) { groupByColumns = new int[]{teColumnIndex}; } else { int foldColumnIndex = fr.find(foldColumnName); groupByColumns = new int[]{teColumnIndex, foldColumnIndex}; } AstGroup.AGG[] aggs = new AstGroup.AGG[2]; AstGroup.NAHandling na = AstGroup.NAHandling.ALL; aggs[0] = new AstGroup.AGG(AstGroup.FCN.sum, targetIndex, na, (int) fr.vec(targetIndex).max() + 1); aggs[1] = new AstGroup.AGG(AstGroup.FCN.nrow, targetIndex, na, (int) fr.vec(targetIndex).max() + 1); Frame result = new AstGroup().performGroupingWithAggregations(fr, groupByColumns, aggs, -1).getFrame(); return register(result); }
@Override public ValFrame apply(Env env, Env.StackHelp stk, AstRoot asts[]) { Word2VecModel model = (Word2VecModel) stk.track(asts[1].exec(env)).getModel(); return new ValFrame(model.toFrame()); }
Frame groupByTEColumnAndAggregate(Frame data, int teColumnIndex) { int numeratorColumnIndex = data.find("numerator"); int denominatorColumnIndex = data.find("denominator"); AstGroup.AGG[] aggs = new AstGroup.AGG[2]; AstGroup.NAHandling na = AstGroup.NAHandling.ALL; aggs[0] = new AstGroup.AGG(AstGroup.FCN.sum, numeratorColumnIndex, na, (int) data.vec(numeratorColumnIndex).max() + 1); aggs[1] = new AstGroup.AGG(AstGroup.FCN.sum, denominatorColumnIndex, na, (int) data.vec(denominatorColumnIndex).max() + 1); Frame result = new AstGroup().performGroupingWithAggregations(data, new int[]{teColumnIndex}, aggs, -1).getFrame(); return register(result); }
@Override public ValFrame apply(Env env, Env.StackHelp stk, AstRoot asts[]) { String[] encodingMapKeys = getEncodingMapKeys(asts); Frame[] encodingMapFrames = getEncodingMapFrames(env, stk, asts); Frame frame = getFrameToTransform(env, stk, asts); String[] teColumnsToEncode = getTEColumns(asts); byte dataLeakageHandlingStrategy = getDataLeakageHandlingStrategy(env, stk, asts); String targetColumnName = getTargetColumnName(env, stk, asts); String foldColumnName = getFoldColumnName(env, stk, asts); boolean withBlending = getWithBlending(env, stk, asts); double inflectionPoint = getInflectionPoint(env, stk, asts); double smoothing = getSmoothing(env, stk, asts); double noise = getNoise(env, stk, asts); double seed = getSeed(env, stk, asts); boolean withImputationForOriginalColumns = true; BlendingParams params = new BlendingParams(inflectionPoint, smoothing); TargetEncoder tec = new TargetEncoder(teColumnsToEncode, params); Map<String, Frame> encodingMap = reconstructEncodingMap(encodingMapKeys, encodingMapFrames); if(noise == -1) { return new ValFrame(tec.applyTargetEncoding(frame, targetColumnName, encodingMap, dataLeakageHandlingStrategy, foldColumnName, withBlending, withImputationForOriginalColumns, (long) seed)); } else { return new ValFrame(tec.applyTargetEncoding(frame, targetColumnName, encodingMap, dataLeakageHandlingStrategy, foldColumnName, withBlending, noise, withImputationForOriginalColumns, (long) seed)); } }
Frame groupThenAggregateForNumeratorAndDenominator(Frame fr, String teColumnName, String foldColumnName, int targetIndex) { int teColumnIndex = fr.find(teColumnName); int[] groupByColumns = null; if (foldColumnName == null) { groupByColumns = new int[]{teColumnIndex}; } else { int foldColumnIndex = fr.find(foldColumnName); groupByColumns = new int[]{teColumnIndex, foldColumnIndex}; } AstGroup.AGG[] aggs = new AstGroup.AGG[2]; AstGroup.NAHandling na = AstGroup.NAHandling.ALL; aggs[0] = new AstGroup.AGG(AstGroup.FCN.sum, targetIndex, na, (int) fr.vec(targetIndex).max() + 1); aggs[1] = new AstGroup.AGG(AstGroup.FCN.nrow, targetIndex, na, (int) fr.vec(targetIndex).max() + 1); Frame result = new AstGroup().performGroupingWithAggregations(fr, groupByColumns, aggs, -1).getFrame(); return register(result); }