public int hashCode() { int h = 0; for( Iterator<?> i = getKeys(); i.hasNext(); ) h = h * 31 + Arrays.hashCode( getStringArray( (String)i.next() ) ); return h; }
public long getLong( final Enum<?> key, long arg ) { return getLong( key.name().toLowerCase(), arg ); }
protected static TermProcessor getTermProcessor( final Properties properties ) { try { // Catch old property files if ( properties.getProperty( Index.PropertyKeys.TERMPROCESSOR ) == null ) throw new IllegalArgumentException( "No term processor has been specified (most likely, because of an obsolete property file)" ); return ObjectParser.fromSpec( properties.getString( Index.PropertyKeys.TERMPROCESSOR ), TermProcessor.class, MG4JClassParser.PACKAGE, new String[] { "getInstance" } ); } catch ( Exception e ) { throw new RuntimeException( e ); } }
public Properties[] properties() { Properties[] properties = new Properties[ k ]; for( int i = 0; i < k; i++ ) { properties[ i ] = new Properties(); properties[ i ].addProperty( "pointerfrom", cutPoint[ i ] ); properties[ i ].addProperty( "pointerto", cutPoint[ i + 1 ] ); } return properties; }
final Properties properties = new Properties( basename + DiskBasedIndex.PROPERTIES_EXTENSION ); ClusteringStrategy strategy = null; Class<? extends ClusteringStrategy> strategyClass = null; if ( properties.containsKey( PropertyKeys.STRATEGY ) ) strategy = (ClusteringStrategy)BinIO.loadObject( properties.getString( PropertyKeys.STRATEGY ) ); else if ( properties.containsKey( PropertyKeys.STRATEGYCLASS ) ) try { strategyClass = (Class<? extends ClusteringStrategy>)MG4JClassParser.getParser().parse( properties.getString( PropertyKeys.STRATEGYCLASS ) ); final Class<? extends IndexCluster> indexClass = (Class<? extends IndexCluster>)Class.forName( properties.getString( Index.PropertyKeys.INDEXCLASS, "(missing index class)" )); String[] localBasename = properties.getStringArray( PropertyKeys.LOCALINDEX ); Index[] localIndex = new Index[ localBasename.length ]; for( int i = 0; i < localIndex.length ; i++ ) localIndex[ i ] = Index.getInstance( localBasename[ i ], randomAccess, documentSizes ); final int numberOfDocuments = properties.getInt( Index.PropertyKeys.DOCUMENTS ); final IntBigList sizes = queryProperties != null && queryProperties.containsKey( Index.UriKeys.SIZES ) ? DiskBasedIndex.readSizes( queryProperties.get( Index.UriKeys.SIZES ), numberOfDocuments ) : null; if ( properties.getBoolean( DocumentalCluster.PropertyKeys.BLOOM ) ) { LOGGER.debug( "Loading Bloom filters..." ); termFilter = new BloomFilter[ localIndex.length ]; return new LexicalCluster( localIndex, (LexicalClusteringStrategy)strategy, termFilter, numberOfDocuments, properties.getInt( Index.PropertyKeys.TERMS ), properties.getLong( Index.PropertyKeys.POSTINGS ), properties.getLong( Index.PropertyKeys.OCCURRENCES ), properties.getInt( Index.PropertyKeys.MAXCOUNT ), payload, hasCounts, hasPositions, Index.getTermProcessor( properties ),
final boolean isHighPerformance = BitStreamHPIndex.class.isAssignableFrom( Class.forName( new Properties( inputBasename + DiskBasedIndex.PROPERTIES_EXTENSION ).getString( Index.PropertyKeys.INDEXCLASS ) ) ); Properties properties = new Properties( inputBasename + DiskBasedIndex.PROPERTIES_EXTENSION ); Properties globalProperties = new Properties(); if ( strategyFilename != null ) globalProperties.setProperty( IndexCluster.PropertyKeys.STRATEGY, strategyFilename ); globalProperties.setProperty( DocumentalCluster.PropertyKeys.BLOOM, false ); globalProperties.setProperty( Index.PropertyKeys.INDEXCLASS, LexicalCluster.class.getName() ); for( int i = 0; i < numIndices; i++ ) globalProperties.addProperty( IndexCluster.PropertyKeys.LOCALINDEX, localBasename[ i ] ); globalProperties.setProperty( Index.PropertyKeys.FIELD, properties.getProperty( Index.PropertyKeys.FIELD ) ); globalProperties.setProperty( Index.PropertyKeys.POSTINGS, properties.getProperty( Index.PropertyKeys.POSTINGS ) ); globalProperties.setProperty( Index.PropertyKeys.OCCURRENCES, properties.getProperty( Index.PropertyKeys.OCCURRENCES ) ); globalProperties.setProperty( Index.PropertyKeys.DOCUMENTS, properties.getProperty( Index.PropertyKeys.DOCUMENTS ) ); globalProperties.setProperty( Index.PropertyKeys.TERMS, properties.getProperty( Index.PropertyKeys.TERMS ) ); globalProperties.setProperty( Index.PropertyKeys.TERMPROCESSOR, properties.getProperty( Index.PropertyKeys.TERMPROCESSOR ) ); globalProperties.setProperty( Index.PropertyKeys.MAXCOUNT, properties.getProperty( Index.PropertyKeys.MAXCOUNT ) ); globalProperties.setProperty( Index.PropertyKeys.MAXDOCSIZE, properties.getProperty( Index.PropertyKeys.MAXDOCSIZE ) ); globalProperties.save( outputBasename + DiskBasedIndex.PROPERTIES_EXTENSION ); LOGGER.debug( "Properties for clustered index " + outputBasename + ": " + new ConfigurationMap( globalProperties ) ); input.close(); output.close(); Properties localProperties = new Properties(); localProperties.addAll( globalProperties ); localProperties.setProperty( Index.PropertyKeys.TERMS, numTerms[ i ] ); localProperties.setProperty( Index.PropertyKeys.OCCURRENCES, numberOfOccurrences[ i ] ); localProperties.setProperty( Index.PropertyKeys.POSTINGS, numberOfPostings[ i ] ); localProperties.setProperty( Index.PropertyKeys.POSTINGS, numberOfPostings[ i ] ); localProperties.setProperty( Index.PropertyKeys.INDEXCLASS, properties.getProperty( Index.PropertyKeys.INDEXCLASS ) ); localProperties.addProperties( Index.PropertyKeys.CODING, properties.getStringArray( Index.PropertyKeys.CODING ) );
indexClass = Class.forName( properties.getString( Index.PropertyKeys.INDEXCLASS, "(missing index class)" )); if ( ! indexFile.exists() ) throw new FileNotFoundException( "Cannot find index file " + indexFile.getName() ); final Map<Component,Coding> flags = CompressionFlags.valueOf( properties.getStringArray( Index.PropertyKeys.CODING ), null ); final long numberOfDocuments = properties.getLong( Index.PropertyKeys.DOCUMENTS ); final long numberOfTerms = properties.getLong( Index.PropertyKeys.TERMS ); final long numberOfPostings= properties.getLong( Index.PropertyKeys.POSTINGS ); final long numberOfOccurrences = properties.getLong( Index.PropertyKeys.OCCURRENCES, -1 ); final int maxCount = properties.getInt( Index.PropertyKeys.MAXCOUNT, -1 ); final String field = properties.getString( Index.PropertyKeys.FIELD, new File( basename.toString() ).getName() ); final Payload payload = (Payload)( properties.containsKey( Index.PropertyKeys.PAYLOADCLASS ) ? Class.forName( properties.getString( Index.PropertyKeys.PAYLOADCLASS ) ).newInstance() : null ); final Coding frequencyCoding = flags.get( Component.FREQUENCIES ); final Coding pointerCoding = flags.get( Component.POINTERS ); final int quantum = properties.getInt( BitStreamIndex.PropertyKeys.SKIPQUANTUM, -1 ); final int height = properties.getInt( BitStreamIndex.PropertyKeys.SKIPHEIGHT, -1 ); final int bufferSize = properties.getInt( BitStreamIndex.PropertyKeys.BUFFERSIZE, BitStreamIndex.DEFAULT_BUFFER_SIZE );
indexWriter.close(); final Properties properties = indexWriter.properties(); properties.setProperty( Index.PropertyKeys.SIZE, 0 ); properties.setProperty( Index.PropertyKeys.OCCURRENCES, -1 ); properties.setProperty( Index.PropertyKeys.MAXCOUNT, -1 ); properties.setProperty( Index.PropertyKeys.MAXDOCSIZE, -1 ); properties.setProperty( Index.PropertyKeys.TERMPROCESSOR, NullTermProcessor.class.getName() ); properties.setProperty( Index.PropertyKeys.PAYLOADCLASS, payload.getClass().getName() ); if ( field != null ) properties.setProperty( Index.PropertyKeys.FIELD, field ); properties.save( batchBasename + DiskBasedIndex.PROPERTIES_EXTENSION ); new FileOutputStream( batchBasename + DiskBasedIndex.TERMS_EXTENSION ).close(); batch = 1; final Properties properties = new Properties(); if ( field != null ) properties.setProperty( Index.PropertyKeys.FIELD, field ); properties.setProperty( Index.PropertyKeys.BATCHES, batch ); properties.setProperty( Index.PropertyKeys.DOCUMENTS, totDocuments ); properties.setProperty( Index.PropertyKeys.POSTINGS, totPostings ); properties.setProperty( Index.PropertyKeys.OCCURRENCES, -1 ); properties.setProperty( Index.PropertyKeys.MAXCOUNT, -1 ); properties.setProperty( Index.PropertyKeys.MAXDOCSIZE, -1 ); properties.setProperty( Index.PropertyKeys.TERMPROCESSOR, NullTermProcessor.class.getName() ); properties.setProperty( Index.PropertyKeys.PAYLOADCLASS, payload.getClass().getName() ); final Properties clusterProperties = new Properties(); clusterProperties.addAll( properties ); clusterProperties.setProperty( Index.PropertyKeys.TERMS, 1 ); clusterProperties.setProperty( IndexCluster.PropertyKeys.BLOOM, false ); clusterProperties.setProperty( IndexCluster.PropertyKeys.FLAT, true );
Properties globalProperties = new Properties(); globalProperties.setProperty( Index.PropertyKeys.FIELD, inputProperties.getProperty( Index.PropertyKeys.FIELD ) ); globalProperties.setProperty( Index.PropertyKeys.TERMPROCESSOR, inputProperties.getProperty( Index.PropertyKeys.TERMPROCESSOR ) ); localProperties.addAll( globalProperties ); localProperties.setProperty( Index.PropertyKeys.MAXCOUNT, String.valueOf( maxDocPos[ i ] ) ); localProperties.setProperty( Index.PropertyKeys.MAXDOCSIZE, maxDocSize[ i ] ); localProperties.setProperty( Index.PropertyKeys.FIELD, globalProperties.getProperty( Index.PropertyKeys.FIELD ) ); localProperties.setProperty( Index.PropertyKeys.OCCURRENCES, haveCounts ? numOccurrences[ i ] : -1 ); localProperties.setProperty( Index.PropertyKeys.POSTINGS, numPostings[ i ] ); localProperties.setProperty( Index.PropertyKeys.TERMS, numTerms[ i ] ); if ( havePayloads ) localProperties.setProperty( Index.PropertyKeys.PAYLOADCLASS, payload.getClass().getName() ); if ( strategyProperties[ i ] != null ) localProperties.addAll( strategyProperties[ i ] ); localProperties.save( localBasename[ i ] + DiskBasedIndex.PROPERTIES_EXTENSION ); if ( strategyFilename != null ) globalProperties.setProperty( IndexCluster.PropertyKeys.STRATEGY, strategyFilename ); for( int i = 0; i < numIndices; i++ ) globalProperties.addProperty( IndexCluster.PropertyKeys.LOCALINDEX, localBasename[ i ] ); globalProperties.setProperty( DocumentalCluster.PropertyKeys.BLOOM, bloomFilterPrecision != 0 ); globalProperties.setProperty( DocumentalCluster.PropertyKeys.FLAT, inputProperties.getLong( Index.PropertyKeys.TERMS ) <= 1 ); globalProperties.setProperty( Index.PropertyKeys.MAXCOUNT, inputProperties.getProperty( Index.PropertyKeys.MAXCOUNT ) ); globalProperties.setProperty( Index.PropertyKeys.MAXDOCSIZE, inputProperties.getProperty( Index.PropertyKeys.MAXDOCSIZE ) ); globalProperties.setProperty( Index.PropertyKeys.POSTINGS, inputProperties.getProperty( Index.PropertyKeys.POSTINGS ) ); globalProperties.setProperty( Index.PropertyKeys.OCCURRENCES, inputProperties.getProperty( Index.PropertyKeys.OCCURRENCES ) ); globalProperties.setProperty( Index.PropertyKeys.DOCUMENTS, inputProperties.getProperty( Index.PropertyKeys.DOCUMENTS ) ); globalProperties.setProperty( Index.PropertyKeys.TERMS, inputProperties.getProperty( Index.PropertyKeys.TERMS ) ); if ( havePayloads ) globalProperties.setProperty( Index.PropertyKeys.PAYLOADCLASS, payload.getClass().getName() ); globalProperties.setProperty( Index.PropertyKeys.INDEXCLASS,
Properties globalProperties = new Properties(); globalProperties.setProperty( Index.PropertyKeys.FIELD, inputProperties.getProperty( Index.PropertyKeys.FIELD ) ); globalProperties.setProperty( Index.PropertyKeys.TERMPROCESSOR, inputProperties.getProperty( Index.PropertyKeys.TERMPROCESSOR ) ); localProperties.addAll( globalProperties ); localProperties.setProperty( Index.PropertyKeys.MAXCOUNT, String.valueOf( maxDocPos[ i ] ) ); localProperties.setProperty( Index.PropertyKeys.MAXDOCSIZE, maxDocSize[ i ] ); localProperties.setProperty( Index.PropertyKeys.FIELD, globalProperties.getProperty( Index.PropertyKeys.FIELD ) ); localProperties.setProperty( Index.PropertyKeys.OCCURRENCES, haveCounts ? numOccurrences[ i ] : -1 ); localProperties.setProperty( Index.PropertyKeys.POSTINGS, numPostings[ i ] ); localProperties.setProperty( Index.PropertyKeys.TERMS, numTerms[ i ] ); if ( havePayloads ) localProperties.setProperty( Index.PropertyKeys.PAYLOADCLASS, payload.getClass().getName() ); if ( strategyProperties[ i ] != null ) localProperties.addAll( strategyProperties[ i ] ); localProperties.save( localBasename[ i ] + DiskBasedIndex.PROPERTIES_EXTENSION ); if ( strategyFilename != null ) globalProperties.setProperty( IndexCluster.PropertyKeys.STRATEGY, strategyFilename ); for( int i = 0; i < numIndices; i++ ) globalProperties.addProperty( IndexCluster.PropertyKeys.LOCALINDEX, localBasename[ i ] ); globalProperties.setProperty( DocumentalCluster.PropertyKeys.BLOOM, bloomFilterPrecision != 0 ); globalProperties.setProperty( DocumentalCluster.PropertyKeys.FLAT, inputProperties.getInt( Index.PropertyKeys.TERMS ) <= 1 ); globalProperties.setProperty( Index.PropertyKeys.MAXCOUNT, inputProperties.getProperty( Index.PropertyKeys.MAXCOUNT ) ); globalProperties.setProperty( Index.PropertyKeys.MAXDOCSIZE, inputProperties.getProperty( Index.PropertyKeys.MAXDOCSIZE ) ); globalProperties.setProperty( Index.PropertyKeys.POSTINGS, inputProperties.getProperty( Index.PropertyKeys.POSTINGS ) ); globalProperties.setProperty( Index.PropertyKeys.OCCURRENCES, inputProperties.getProperty( Index.PropertyKeys.OCCURRENCES ) ); globalProperties.setProperty( Index.PropertyKeys.DOCUMENTS, inputProperties.getProperty( Index.PropertyKeys.DOCUMENTS ) ); globalProperties.setProperty( Index.PropertyKeys.TERMS, inputProperties.getProperty( Index.PropertyKeys.TERMS ) ); if ( havePayloads ) globalProperties.setProperty( Index.PropertyKeys.PAYLOADCLASS, payload.getClass().getName() ); globalProperties.setProperty( Index.PropertyKeys.INDEXCLASS,
Properties indexProps = new Properties(propsFile); indexProps.setAutoSave(false); Iterator<String> keysIter = indexProps.getKeys(); String OLDPKG = "it.unimi.dsi.mg4j"; String NEWPKG = "it.unimi.dsi.big.mg4j"; while(keysIter.hasNext()) { String key = keysIter.next(); Object value = indexProps.getProperty(key); if(value instanceof String && ((String)value).indexOf(OLDPKG) >= 0) { newVals.put(key, ((String)value).replace(OLDPKG, NEWPKG)); indexProps.setProperty(newEntry.getKey(), newEntry.getValue()); indexProps.save(); } else { throw new IOException("Could not rename old properties file (" +
public Properties properties() { Properties result = new Properties(); result.setProperty( Index.PropertyKeys.DOCUMENTS, numberOfDocuments ); result.setProperty( Index.PropertyKeys.TERMS, currentTerm + 1 ); result.setProperty( Index.PropertyKeys.POSTINGS, numberOfPostings ); result.setProperty( Index.PropertyKeys.MAXCOUNT, maxCount ); result.setProperty( Index.PropertyKeys.INDEXCLASS, FileIndex.class.getName() ); // We save all flags, except for the PAYLOAD component, which is just used internally. for( Map.Entry<Component,Coding> e: flags.entrySet() ) if ( e.getKey() != Component.PAYLOADS ) result.addProperty( Index.PropertyKeys.CODING, new MutableString().append( e.getKey() ).append( ':' ).append( e.getValue() ) ); return result; } }
public enum IndexType { /** An old-style, interleaved index. */ INTERLEAVED, /** A high-performance index which stores position separately. */ HIGH_PERFORMANCE, /** A quasi-succinct index. */ QUASI_SUCCINCT }
final int batches; if ( factory.fieldType( indexedField[ i ] ) == DocumentFactory.FieldType.VIRTUAL ) { batches = new Properties( new File( batchDir, basenameField[ i ] ) + DiskBasedIndex.PROPERTIES_EXTENSION ).getInt( Index.PropertyKeys.BATCHES ); final String[] inputBasename = new String[ batches ]; for( int j = 0; j < inputBasename.length; j++ ) inputBasename[ j ] = Scan.batchBasename( j, basenameField[ i ], batchDir ); final String[] inputBasename = new Properties( new File( batchDir, basenameField[ i ] ) + Scan.CLUSTER_PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX ); batches = inputBasename.length; if ( factory.fieldType( indexedField[ i ] ) == DocumentFactory.FieldType.TEXT ) {
public static BatchDetails batchEndPoints(File indexDirectory) throws IOException, ConfigurationException { BatchDetails details = new BatchDetails(); long totalDocs = 0; File[] subIndexes = indexDirectory.listFiles(INDEX_NAME_FILTER); if(subIndexes.length == 0) { throw new RuntimeException("Index has no AtomicIndexes!"); } details.names = subIndexes[0].list(BATCH_NAME_FILTER); java.util.Arrays.sort(details.names, BATCH_COMPARATOR); details.endPoints = new long[details.names.length]; for(int i = 0; i < details.names.length; i++) { Properties batchProps = new Properties(); try(FileInputStream propsIn = new FileInputStream(new File(new File(subIndexes[0], details.names[i]), subIndexes[0].getName() + ".properties"))) { batchProps.load(propsIn); } totalDocs += batchProps.getLong("documents"); details.endPoints[i] = totalDocs; } return details; }
public String getString( final Enum<?> key, String arg ) { return getString( key.name().toLowerCase(), arg ); }
if ( subset ) termFile.close(); Properties properties = indexWriter.properties(); properties.addProperty( Index.PropertyKeys.TERMPROCESSOR, ObjectParser.toSpec( alignerIndex.termProcessor ) ); properties.save( outputBasename + DiskBasedIndex.PROPERTIES_EXTENSION ); frequencies.close();
public Boolean getBoolean( final Enum<?> key, Boolean arg ) { return getBoolean( key.name().toLowerCase(), arg ); }
final int batches; if ( factory.fieldType( indexedField[ i ] ) == DocumentFactory.FieldType.VIRTUAL ) { batches = IOFactories.loadProperties( ioFactory, basenameField[ i ] + DiskBasedIndex.PROPERTIES_EXTENSION ).getInt( Index.PropertyKeys.BATCHES ); final String[] inputBasename = new String[ batches ]; for( int j = 0; j < inputBasename.length; j++ ) inputBasename[ j ] = Scan.batchBasename( j, basenameField[ i ], batchDir ); final String[] inputBasename = IOFactories.loadProperties( ioFactory, basenameField[ i ] + Scan.CLUSTER_PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
private void writeObject( final ObjectOutputStream s ) throws IOException { s.defaultWriteObject(); try { save( s ); } catch ( ConfigurationException e ) { throw new RuntimeException( e ); } }