/** Creates a new string map by creating and wrapping a {@link ZFastTrieDistributorMonotoneMinimalPerfectHashFunction}. * * @param keys the keys used to populate the string map. */ public SignedFunctionStringMap(final Iterable<? extends CharSequence> keys) throws IOException { this.function = new TwoStepsLcpMonotoneMinimalPerfectHashFunction.Builder<CharSequence>().keys(keys).transform(TransformationStrategies.prefixFreeUtf16()).build(); }
@Override @SuppressWarnings({"unchecked"}) public String toString() { final Object key = isInternal() ? ((InternalNode<U>)this).reference.key : ((Leaf<U>)this).key; final TransformationStrategy transform = key instanceof CharSequence ? TransformationStrategies.prefixFreeIso() : TransformationStrategies.identity(); final long extentLength = extentLength(transform); return (isLeaf() ? "[" : "(") + Integer.toHexString(hashCode() & 0xFFFF) + (key(transform) == null ? "" : " " + (extentLength > 16 ? key(transform).subVector(0, 8) + "..." + key(transform).subVector(extentLength - 8, extentLength): key(transform).subVector(0, extentLength))) + " [" + nameLength + ".." + extentLength + "], " + (isInternal() ? ((InternalNode<U>)this).handleLength() + "->" + ((InternalNode<U>)this).jumpLength() : "") + (isLeaf() ? "]" : ")"); } }
LOGGER.debug("Bucket size: " + bucketSize); final Iterable<BitVector> bitVectors = TransformationStrategies.wrap(elements, transform); distributor = new HollowTrieDistributor<>(bitVectors, log2BucketSize, TransformationStrategies.identity(), tempDir); offset = new GOV3Function.Builder<BitVector>().keys(bitVectors).transform(TransformationStrategies.identity()).values(new AbstractLongBigList() { @Override public long getLong(long index) {
? new HuTuckerTransformationStrategy(collection, true) : iso ? TransformationStrategies.prefixFreeIso() : utf32 ? TransformationStrategies.prefixFreeUtf32() : TransformationStrategies.prefixFreeUtf16();
if ( jsapResult.getBoolean( "sorted" ) ) BinIO.storeObject( new URLMPHVirtualDocumentResolver( new ShiftAddXorSignedStringMap( collection.iterator(), new TwoStepsLcpMonotoneMinimalPerfectHashFunction<CharSequence>( collection, iso ? TransformationStrategies.prefixFreeIso() : TransformationStrategies.prefixFreeUtf16() ), width ) ), resolverName ); else BinIO.storeObject( new URLMPHVirtualDocumentResolver( new ShiftAddXorSignedStringMap( collection.iterator(), new MWHCFunction<CharSequence>( collection, iso ? TransformationStrategies.iso() : TransformationStrategies.utf16() ), width ) ), resolverName ); LOGGER.debug( " done." );
? TransformationStrategies.prefixFreeIso() : utf32 ? TransformationStrategies.prefixFreeUtf32() : TransformationStrategies.prefixFreeUtf16(); final ZFastTrie<LongArrayBitVector> zFastTrie = new ZFastTrie<>(TransformationStrategies.identity()); while(lineIterator.hasNext()) { zFastTrie.add(LongArrayBitVector.copy(transformationStrategy.toBitVector(lineIterator.next().copy())));
if (jsapResult.getBoolean("sorted")) BinIO.storeObject(new URLMPHVirtualDocumentResolver(new ShiftAddXorSignedStringMap(collection.iterator(), new TwoStepsLcpMonotoneMinimalPerfectHashFunction.Builder<CharSequence>().keys(collection).transform(iso ? TransformationStrategies.prefixFreeIso() : TransformationStrategies.prefixFreeUtf16()).build(), width)), resolverName); else BinIO.storeObject(new URLMPHVirtualDocumentResolver(new ShiftAddXorSignedStringMap(collection.iterator(), new GOV3Function.Builder<CharSequence>().keys(collection).transform(iso ? TransformationStrategies.prefixFreeIso() : TransformationStrategies.prefixFreeUtf16()).build(), width)), resolverName); LOGGER.debug( " done." );
new MWHCFunction.Builder<CharSequence>().keys( surfaceForms ).transform( TransformationStrategies.utf16() ).build() );
@Deprecated public static <T extends BitVector> TransformationStrategy<T> identity() { return TransformationStrategies.identity(); }
behaviour = new GOV3Function.Builder<BitVector>().keys(TransformationStrategies.wrap(elements, transformationStrategy)).transform(TransformationStrategies.identity()).store(chunkedHashStore).values(intermediateTrie.externalValues, 1).indirect().build(); intermediateTrie.externalValues = null; ranker = new TwoStepsLcpMonotoneMinimalPerfectHashFunction.Builder<BitVector>().keys(Arrays.asList(rankerArray)).transform(TransformationStrategies.prefixFree()).build(); rankerArray = null; final ChunkedHashStore<BitVector> intermediateTrieChunkedHashStore = new ChunkedHashStore<>(TransformationStrategies.identity(), chunkedHashStore.tempDir()); intermediateTrieChunkedHashStore.reset(seed); intermediateTrieChunkedHashStore.addAll(intermediateTrie.internalNodeKeys.iterator(), intermediateTrie.internalNodeSignatures.iterator()); pl.start("Searching for mistakes..."); final Iterator<BitVector>iterator = TransformationStrategies.wrap(elements.iterator(), transformationStrategy); c = 0; int mistakes = 0; pl.start("Searching for false positives..."); for(final BitVector curr: TransformationStrategies.wrap(elements, transformationStrategy)) { final long h = Hashes.spooky4(curr, seed); if (mistakeSignatures.contains((int)h)) { corrections = new GOV3Function.Builder<BitVector>().keys(positives).transform(TransformationStrategies.identity()).values(results, logW).build();
? new HuTuckerTransformationStrategy(collection, true) : iso ? TransformationStrategies.prefixFreeIso() : utf32 ? TransformationStrategies.prefixFreeUtf32() : TransformationStrategies.prefixFreeUtf16();
if ( jsapResult.getBoolean( "sorted" ) ) BinIO.storeObject( new URLMPHVirtualDocumentResolver( new ShiftAddXorSignedStringMap( collection.iterator(), new TwoStepsLcpMonotoneMinimalPerfectHashFunction<CharSequence>( collection, iso ? TransformationStrategies.prefixFreeIso() : TransformationStrategies.prefixFreeUtf16() ), width ) ), resolverName ); else BinIO.storeObject( new URLMPHVirtualDocumentResolver( new ShiftAddXorSignedStringMap( collection.iterator(), new MWHCFunction<CharSequence>( collection, iso ? TransformationStrategies.iso() : TransformationStrategies.utf16() ), width ) ), resolverName ); LOGGER.debug( " done." );
if (jsapResult.getBoolean("sorted")) BinIO.storeObject(new URLMPHVirtualDocumentResolver(new SignedFunctionStringMap(new TwoStepsLcpMonotoneMinimalPerfectHashFunction.Builder<CharSequence>().keys(collection).transform(iso ? TransformationStrategies.prefixFreeIso() : TransformationStrategies.prefixFreeUtf16()).signed(width).build())), resolverName); else BinIO.storeObject(new URLMPHVirtualDocumentResolver(new SignedFunctionStringMap(new GOV3Function.Builder<CharSequence>().keys(collection).transform(iso ? TransformationStrategies.prefixFreeIso() : TransformationStrategies.prefixFreeUtf16()).signed(width).build())), resolverName);
final ChunkedHashStore<BitVector> chunkedHashStore = new ChunkedHashStore<>(TransformationStrategies.identity()); chunkedHashStore.reset(random.nextLong()); for(final T s: elements) { LOGGER.debug("First bucket size estimate: " + firstbucketSize); final Iterable<BitVector> bitVectors = TransformationStrategies.wrap(elements, transform); PaCoTrieDistributor<BitVector> firstDistributor = new PaCoTrieDistributor<>(bitVectors, t, TransformationStrategies.identity()); else { firstDistributor = null; distributor = new PaCoTrieDistributor<>(bitVectors, log2BucketSize, TransformationStrategies.identity()); offset = new GOV3Function.Builder<BitVector>().keys(bitVectors).transform(TransformationStrategies.identity()).store(chunkedHashStore).values(new AbstractLongBigList() { @Override public long getLong(long index) {
TransformationStrategies.utf16() ).build() );
@Deprecated public static <T extends BitVector> TransformationStrategy<T> identity() { return TransformationStrategies.identity(); }
? new HuTuckerTransformationStrategy(collection, true) : iso ? TransformationStrategies.prefixFreeIso() : utf32 ? TransformationStrategies.prefixFreeUtf32() : TransformationStrategies.prefixFreeUtf16();
TransformationStrategies.prefixFreeUtf16()));
long totalLength = 0; final RandomGenerator r = new XoRoShiRo128PlusRandomGenerator(); final ChunkedHashStore<BitVector> chunkedHashStore = new ChunkedHashStore<>(TransformationStrategies.identity(), tempDir); chunkedHashStore.reset(r.nextLong()); final Iterable<BitVector> bitVectors = TransformationStrategies.wrap(keys, transform); final ProgressLogger pl = new ProgressLogger(LOGGER); pl.displayLocalSpeed = true; LOGGER.debug("Bucket size: " + (1L << this.log2BucketSize)); LOGGER.info("Computing z-fast trie distributor..."); distributor = new ZFastTrieDistributor<>(bitVectors, this.log2BucketSize, TransformationStrategies.identity(), chunkedHashStore);