private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException { Map<String,String> mappings = new TreeMap<>(); for (int i = 0; i < num; i++) { String line = reader.readLine(); String parts[] = line.split("\\s+"); if (parts.length != 3) { throw new ParseException("invalid syntax: " + line, reader.getLineNumber()); } if (mappings.put(parts[1], parts[2]) != null) { throw new IllegalStateException("duplicate mapping specified for: " + parts[1]); } } Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton(); Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs); IntsRefBuilder scratchInts = new IntsRefBuilder(); for (Map.Entry<String,String> entry : mappings.entrySet()) { Util.toUTF16(entry.getKey(), scratchInts); builder.add(scratchInts.get(), new CharsRef(entry.getValue())); } return builder.finish(); }
/** Builds the NormalizeCharMap; call this once you * are done calling {@link #add}. */ public NormalizeCharMap build() { final FST<CharsRef> map; try { final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton(); final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs); final IntsRefBuilder scratch = new IntsRefBuilder(); for(Map.Entry<String,String> ent : pendingPairs.entrySet()) { builder.add(Util.toUTF16(ent.getKey(), scratch), new CharsRef(ent.getValue())); } map = builder.finish(); pendingPairs.clear(); } catch (IOException ioe) { // Bogus FST IOExceptions!! (will never happen) throw new RuntimeException(ioe); } return new NormalizeCharMap(map); } }
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException { Map<String,String> mappings = new TreeMap<>(); for (int i = 0; i < num; i++) { String line = reader.readLine(); String parts[] = line.split("\\s+"); if (parts.length != 3) { throw new ParseException("invalid syntax: " + line, reader.getLineNumber()); } if (mappings.put(parts[1], parts[2]) != null) { throw new IllegalStateException("duplicate mapping specified for: " + parts[1]); } } Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton(); Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs); IntsRefBuilder scratchInts = new IntsRefBuilder(); for (Map.Entry<String,String> entry : mappings.entrySet()) { Util.toUTF16(entry.getKey(), scratchInts); builder.add(scratchInts.get(), new CharsRef(entry.getValue())); } return builder.finish(); }
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException { Map<String,String> mappings = new TreeMap<>(); for (int i = 0; i < num; i++) { String line = reader.readLine(); String parts[] = line.split("\\s+"); if (parts.length != 3) { throw new ParseException("invalid syntax: " + line, reader.getLineNumber()); } if (mappings.put(parts[1], parts[2]) != null) { throw new IllegalStateException("duplicate mapping specified for: " + parts[1]); } } Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton(); Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs); IntsRefBuilder scratchInts = new IntsRefBuilder(); for (Map.Entry<String,String> entry : mappings.entrySet()) { Util.toUTF16(entry.getKey(), scratchInts); builder.add(scratchInts.get(), new CharsRef(entry.getValue())); } return builder.finish(); }
Util.toUTF16(value, irBuilder); final IntsRef ints = irBuilder.get(); synchronized (this.fst) {
/** Builds the NormalizeCharMap; call this once you * are done calling {@link #add}. */ public NormalizeCharMap build() { final FST<CharsRef> map; try { final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton(); final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs); final IntsRefBuilder scratch = new IntsRefBuilder(); for(Map.Entry<String,String> ent : pendingPairs.entrySet()) { builder.add(Util.toUTF16(ent.getKey(), scratch), new CharsRef(ent.getValue())); } map = builder.finish(); pendingPairs.clear(); } catch (IOException ioe) { // Bogus FST IOExceptions!! (will never happen) throw new RuntimeException(ioe); } return new NormalizeCharMap(map); } }
/** Builds the NormalizeCharMap; call this once you * are done calling {@link #add}. */ public NormalizeCharMap build() { final FST<CharsRef> map; try { final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton(); final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs); final IntsRefBuilder scratch = new IntsRefBuilder(); for(Map.Entry<String,String> ent : pendingPairs.entrySet()) { builder.add(Util.toUTF16(ent.getKey(), scratch), new CharsRef(ent.getValue())); } map = builder.finish(); pendingPairs.clear(); } catch (IOException ioe) { // Bogus FST IOExceptions!! (will never happen) throw new RuntimeException(ioe); } return new NormalizeCharMap(map); } }
public static FST<?> getFST(SortedSet<String> values) throws IOException { final IntsRefBuilder irBuilder = new IntsRefBuilder(); // The builder options with defaults FST.INPUT_TYPE inputType = FST.INPUT_TYPE.BYTE1; int minSuffixCount1 = 0; int minSuffixCount2 = 0; boolean doShareSuffix = true; boolean doShareNonSingletonNodes = true; int shareMaxTailLength = Integer.MAX_VALUE; boolean allowArrayArcs = true; int bytesPageBits = 15; final Outputs<Object> outputs = NoOutputs.getSingleton(); // create the FST from the values org.apache.lucene.util.fst.Builder<Object> fstBuilder = new org.apache.lucene.util.fst.Builder<>(inputType, minSuffixCount1, minSuffixCount2, doShareSuffix, doShareNonSingletonNodes, shareMaxTailLength, outputs, allowArrayArcs, bytesPageBits); for (String value : values) { Util.toUTF16(value, irBuilder); final IntsRef scratchInt = irBuilder.get(); fstBuilder.add(scratchInt, outputs.getNoOutput()); } return fstBuilder.finish(); }