/** Stores an object in a file given by a pathname. * * @param o an object. * @param filename a filename. * @see #loadObject(CharSequence) */ public static void storeObject(final Object o, final CharSequence filename) throws IOException { storeObject(o, new File(filename.toString())); } /** Loads an object from a file given by a {@link File} object.
public void save() throws IOException { File mutFile = new File(modelPath + "/" + prefix + "mutSet"); File unMutFile = new File(modelPath + "/" + prefix + "unMutSet"); mutFile.createNewFile(); unMutFile.createNewFile(); BinIO.storeObject(plantedMutSet,mutFile); BinIO.storeObject(unMutSet,unMutFile); }
public void close() throws IOException { if ( numberOfDocuments == 0 ) zipOut.putNextEntry( new ZipEntry( "dummy" ) ); zipDataOutputStream.close(); final ZipDocumentCollection zipDocumentCollection = new ZipDocumentCollection( basenameSuffix + ZipDocumentCollection.ZIP_EXTENSION, factory, numberOfDocuments, exact ); BinIO.storeObject( zipDocumentCollection, basenameSuffix + DocumentCollection.DEFAULT_EXTENSION ); zipDocumentCollection.close(); }
public void close() throws IOException { if ( numberOfDocuments == 0 ) zipOut.putNextEntry( new ZipEntry( "dummy" ) ); zipDataOutputStream.close(); final ZipDocumentCollection zipDocumentCollection = new ZipDocumentCollection( basenameSuffix + ZipDocumentCollection.ZIP_EXTENSION, factory, numberOfDocuments, exact ); BinIO.storeObject( zipDocumentCollection, basenameSuffix + DocumentCollection.DEFAULT_EXTENSION ); zipDocumentCollection.close(); }
public void close() throws IOException { if ( numberOfDocuments == 0 ) zipOut.putNextEntry( new ZipEntry( "dummy" ) ); zipDataOutputStream.close(); final ZipDocumentCollection zipDocumentCollection = new ZipDocumentCollection( basenameSuffix + ZipDocumentCollection.ZIP_EXTENSION, factory, numberOfDocuments, exact ); BinIO.storeObject( zipDocumentCollection, basenameSuffix + DocumentCollection.DEFAULT_EXTENSION ); zipDocumentCollection.close(); }
public void close() throws IOException { if ( numberOfDocuments == 0 ) zipOut.putNextEntry( new ZipEntry( "dummy" ) ); zipDataOutputStream.close(); final ZipDocumentCollection zipDocumentCollection = new ZipDocumentCollection( basenameSuffix + ZipDocumentCollection.ZIP_EXTENSION, factory, numberOfDocuments, exact ); BinIO.storeObject( zipDocumentCollection, basenameSuffix + DocumentCollection.DEFAULT_EXTENSION ); zipDocumentCollection.close(); }
public static void main( final String[] arg ) throws IOException, JSAPException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { SimpleJSAP jsap = new SimpleJSAP( TRECDocumentCollection.class.getName(), "Saves a serialised concatenated document collection, given the filenames of the component collections.", new Parameter[] { new UnflaggedOption( "collection", JSAP.STRING_PARSER, JSAP.REQUIRED, "The filename of the resulting collection." ), new UnflaggedOption( "component", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.GREEDY, "Multiple filenames specifying a series of collections." ) } ); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; BinIO.storeObject( new ConcatenatedDocumentCollection( jsapResult.getStringArray( "component" ) ), jsapResult.getString( "collection" ) ); }
@SuppressWarnings("unchecked") public static void main(final String[] arg) throws IOException, JSAPException, ClassNotFoundException { final SimpleJSAP jsap = new SimpleJSAP(SignedFunctionStringMap.class.getName(), "Saves a string map wrapping a signed function on character sequences.", new Parameter[] { new UnflaggedOption("function", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of a signed function defined on character sequences."), new UnflaggedOption("map", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the resulting string map."), }); final JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) return; final String functionName = jsapResult.getString("function"); final String mapName = jsapResult.getString("map"); BinIO.storeObject(new SignedFunctionStringMap((Object2LongFunction<? extends CharSequence>)BinIO.loadObject(functionName)), mapName); } }
public static void main( String[] args ) throws Exception { SimpleJSAP jsap = new SimpleJSAP( WarcDocumentSequence.class.getName(), "Saves a serialised Warc document sequence based on a set of file names.", new Parameter[] { new FlaggedOption( "factory", JSAP.CLASS_PARSER, IdentityDocumentFactory.class.getName(), JSAP.NOT_REQUIRED, 'f', "factory", "A document factory with a standard constructor." ), new FlaggedOption( "property", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'p', "property", "A 'key=value' specification, or the name of a property file" ).setAllowMultipleDeclarations( true ), new Switch( "gzip", 'z', "gzip", "Expect gzip-ed WARC content (files should end in .warc.gz)." ), new FlaggedOption( "bufferSize", JSAP.INTSIZE_PARSER, DEFAULT_BUFFER_SIZE, JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of an I/O buffer." ), new UnflaggedOption( "sequence", JSAP.STRING_PARSER, JSAP.REQUIRED, "The filename for the serialized sequence." ), new UnflaggedOption( "basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.GREEDY,"A list of basename files that will be indexed. If missing, a list of files will be read from standard input." ) } ); final JSAPResult jsapResult = jsap.parse( args ); if ( jsap.messagePrinted() ) System.exit( 1 ); final DocumentFactory factory = PropertyBasedDocumentFactory.getInstance( jsapResult.getClass( "factory" ), jsapResult.getStringArray( "property" ) ); final boolean isGZipped = jsapResult.getBoolean( "gzip" ); String[] file = jsapResult.getStringArray( "basename" ); if ( file.length == 0 ) file = IOUtils.readLines( System.in ).toArray( new String[ 0 ] ); if ( file.length == 0 ) LOGGER.warn( "Empty fileset" ); BinIO.storeObject( new WarcDocumentSequence( file, factory, isGZipped, jsapResult.getInt( "bufferSize" ) ), jsapResult.getString( "sequence" ) ); } }
public static void main( final String[] arg ) throws JSAPException, IOException, ConfigurationException, SecurityException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { final SimpleJSAP jsap = new SimpleJSAP( SubsetLexicalStrategy.class.getName(), "Builds a subset-based lexical partitioning strategy reading an UTF-8 encoded term list from standard input.", new Parameter[] { new UnflaggedOption( "basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the index." ), new UnflaggedOption( "strategy", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the strategy." ) }); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; final Index index = Index.getInstance( jsapResult.getString( "basename" ) ); @SuppressWarnings("resource") final FastBufferedReader reader = new FastBufferedReader( new InputStreamReader( System.in, "UTF-8" ) ); final MutableString s = new MutableString(); final IntOpenHashSet subset = new IntOpenHashSet(); while( reader.readLine( s ) != null ) { final int t = (int)index.termMap.getLong( s ); if ( t == -1 ) throw new IllegalArgumentException( "Unknown term " + s ); subset.add( t ); } reader.close(); BinIO.storeObject( new SubsetLexicalStrategy( subset ), jsapResult.getString( "strategy" ) ); }
public static void main( String args[] ) throws JSAPException, IOException { SimpleJSAP jsap = new SimpleJSAP( UncompressedWordVectors.class.getName(), "Creates a Word Vector representation from a string file", new Parameter[]{ new FlaggedOption( "input", JSAP.STRING_PARSER, JSAP .NO_DEFAULT, JSAP.REQUIRED, 'i', "input", "Vector file" ), new FlaggedOption( "output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', "output", "Output file name" ) } ); JSAPResult jsapResult = jsap.parse( args ); if( jsap.messagePrinted() ) return; UncompressedWordVectors vec = UncompressedWordVectors.read( jsapResult.getString( "input" ) ); vec.N = vec.vectors.get( vec.vectors.keySet().iterator().next() ).length; BinIO.storeObject( vec, jsapResult.getString( "output" ) ); }
public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException { final SimpleJSAP jsap = new SimpleJSAP( TernaryIntervalSearchTree.class.getName(), "Builds a ternary interval search tree reading from standard input a newline-separated list of terms.", new Parameter[] { new FlaggedOption( "bufferSize", JSAP.INTSIZE_PARSER, "64Ki", JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of the I/O buffer used to read terms." ), new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ), new UnflaggedOption( "tree", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised tree." ) }); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; final TernaryIntervalSearchTree tree = new TernaryIntervalSearchTree(); MutableString term = new MutableString(); final ProgressLogger pl = new ProgressLogger(); pl.itemsName = "terms"; final FastBufferedReader terms = new FastBufferedReader( new InputStreamReader( System.in, (Charset)jsapResult.getObject( "encoding" ) ), jsapResult.getInt( "bufferSize" ) ); pl.start( "Reading terms..." ); while( terms.readLine( term ) != null ) { pl.update(); tree.add( term ); } pl.done(); BinIO.storeObject( tree, jsapResult.getString( "tree" ) ); }
public static void main( final String[] arg ) throws IOException, JSAPException, NoSuchMethodException { final SimpleJSAP jsap = new SimpleJSAP( TernaryIntervalSearchTree.class.getName(), "Builds a ternary interval search tree reading from standard input a newline-separated list of terms.", new Parameter[] { new FlaggedOption( "bufferSize", JSAP.INTSIZE_PARSER, "64Ki", JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of the I/O buffer used to read terms." ), new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding." ), new UnflaggedOption( "tree", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised tree." ) }); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; final TernaryIntervalSearchTree tree = new TernaryIntervalSearchTree(); MutableString term = new MutableString(); final ProgressLogger pl = new ProgressLogger(); pl.itemsName = "terms"; final FastBufferedReader terms = new FastBufferedReader( new InputStreamReader( System.in, (Charset)jsapResult.getObject( "encoding" ) ), jsapResult.getInt( "bufferSize" ) ); pl.start( "Reading terms..." ); while( terms.readLine( term ) != null ) { pl.update(); tree.add( term ); } pl.done(); BinIO.storeObject( tree, jsapResult.getString( "tree" ) ); }
public static void main( final String[] arg ) throws IOException, JSAPException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, InstantiationException, MessagingException, ConfigurationException { SimpleJSAP jsap = new SimpleJSAP( JavamailDocumentCollection.class.getName(), "Saves a serialised mbox collection based on a given mbox file.", new Parameter[] { new FlaggedOption( "property", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'p', "property", "A 'key=value' specification, or the name of a property file" ).setAllowMultipleDeclarations( true ), new UnflaggedOption( "collection", JSAP.STRING_PARSER, JSAP.REQUIRED, "The filename for the serialised collection." ), new UnflaggedOption( "storeUrl", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The javamail store." ), new UnflaggedOption( "folder", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The folder to be read." ) } ); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; BinIO.storeObject( new JavamailDocumentCollection( jsapResult.getString( "storeUrl" ), jsapResult.getString( "folder" ), jsapResult.getStringArray( "property" ) ), jsapResult.getString( "collection" ) ); } }
public static void main( final String[] arg ) throws IOException, JSAPException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, InstantiationException, MessagingException, ConfigurationException { SimpleJSAP jsap = new SimpleJSAP( JavamailDocumentCollection.class.getName(), "Saves a serialised mbox collection based on a given mbox file.", new Parameter[] { new FlaggedOption( "property", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'p', "property", "A 'key=value' specification, or the name of a property file" ).setAllowMultipleDeclarations( true ), new UnflaggedOption( "collection", JSAP.STRING_PARSER, JSAP.REQUIRED, "The filename for the serialised collection." ), new UnflaggedOption( "storeUrl", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The javamail store." ), new UnflaggedOption( "folder", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The folder to be read." ) } ); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; BinIO.storeObject( new JavamailDocumentCollection( jsapResult.getString( "storeUrl" ), jsapResult.getString( "folder" ), jsapResult.getStringArray( "property" ) ), jsapResult.getString( "collection" ) ); } }
public static void main( final String[] arg ) throws IOException, JSAPException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, InstantiationException, MessagingException, ConfigurationException { SimpleJSAP jsap = new SimpleJSAP( JavamailDocumentCollection.class.getName(), "Saves a serialised mbox collection based on a given mbox file.", new Parameter[] { new FlaggedOption( "property", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'p', "property", "A 'key=value' specification, or the name of a property file" ).setAllowMultipleDeclarations( true ), new UnflaggedOption( "collection", JSAP.STRING_PARSER, JSAP.REQUIRED, "The filename for the serialised collection." ), new UnflaggedOption( "storeUrl", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The javamail store." ), new UnflaggedOption( "folder", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The folder to be read." ) } ); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; BinIO.storeObject( new JavamailDocumentCollection( jsapResult.getString( "storeUrl" ), jsapResult.getString( "folder" ), jsapResult.getStringArray( "property" ) ), jsapResult.getString( "collection" ) ); } }
public static void main( final String[] arg ) throws IOException, JSAPException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, InstantiationException, MessagingException, ConfigurationException { SimpleJSAP jsap = new SimpleJSAP( JavamailDocumentCollection.class.getName(), "Saves a serialised mbox collection based on a given mbox file.", new Parameter[] { new FlaggedOption( "property", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'p', "property", "A 'key=value' specification, or the name of a property file" ).setAllowMultipleDeclarations( true ), new UnflaggedOption( "collection", JSAP.STRING_PARSER, JSAP.REQUIRED, "The filename for the serialised collection." ), new UnflaggedOption( "storeUrl", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The javamail store." ), new UnflaggedOption( "folder", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The folder to be read." ) } ); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; BinIO.storeObject( new JavamailDocumentCollection( jsapResult.getString( "storeUrl" ), jsapResult.getString( "folder" ), jsapResult.getStringArray( "property" ) ), jsapResult.getString( "collection" ) ); } }
public static void main( final String[] arg ) throws JSAPException, IOException, ConfigurationException, SecurityException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { final SimpleJSAP jsap = new SimpleJSAP( FrequencyLexicalStrategy.class.getName(), "Builds a lexical partitioning strategy based on a frequency threshold.", new Parameter[] { new FlaggedOption( "threshold", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 't', "threshold", "The frequency threshold." ), new UnflaggedOption( "basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the index." ), new UnflaggedOption( "strategy", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the strategy." ) }); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; final Index index = Index.getInstance( jsapResult.getString( "basename" ) ); final int threshold = jsapResult.getInt( "threshold" ); final IntOpenHashSet subset = new IntOpenHashSet(); final InputBitStream frequencies = new InputBitStream( jsapResult.getString( "basename" ) + DiskBasedIndex.FREQUENCIES_EXTENSION ); for( int t = 0; t < index.numberOfTerms; t++ ) if ( frequencies.readLongGamma() >= threshold ) subset.add( t ); frequencies.close(); BinIO.storeObject( new FrequencyLexicalStrategy( subset ), jsapResult.getString( "strategy" ) ); } }
public static void main( final String[] arg ) throws JSAPException, IOException, ConfigurationException, SecurityException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { final SimpleJSAP jsap = new SimpleJSAP( FrequencyLexicalStrategy.class.getName(), "Builds a lexical partitioning strategy based on a frequency threshold.", new Parameter[] { new FlaggedOption( "threshold", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 't', "threshold", "The frequency threshold." ), new UnflaggedOption( "basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the index." ), new UnflaggedOption( "strategy", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the strategy." ) }); JSAPResult jsapResult = jsap.parse( arg ); if ( jsap.messagePrinted() ) return; final Index index = Index.getInstance( jsapResult.getString( "basename" ) ); final long threshold = jsapResult.getLong( "threshold" ); final LongOpenHashSet subset = new LongOpenHashSet(); final InputBitStream frequencies = new InputBitStream( jsapResult.getString( "basename" ) ); for( long t = 0; t < index.numberOfTerms; t++ ) if ( frequencies.readLongGamma() >= threshold ) subset.add( t ); frequencies.close(); BinIO.storeObject( new FrequencyLexicalStrategy( subset ), jsapResult.getString( "strategy" ) ); } }
public static void main(final String[] arg) throws IOException, ClassNotFoundException, JSAPException { SimpleJSAP jsap = new SimpleJSAP(PermutedFrontCodedStringList.class.getName(), "Builds a permuted front-coded list of strings using a given front-coded string list and a permutation (either in text or binary format).", new Parameter[] { new Switch("invert", 'i', "invert", "Invert permutation before creating the permuted list."), new Switch("text", 't', "text", "The permutation is a text file."), new UnflaggedOption("list", JSAP.STRING_PARSER, JSAP.REQUIRED, "A front-coded string list."), new UnflaggedOption("permutation", JSAP.STRING_PARSER, JSAP.REQUIRED, "A permutation for the indices of the list (in DataInput format, unless you specify --text)."), new UnflaggedOption("permutedList", JSAP.STRING_PARSER, JSAP.REQUIRED, "A the filename for the resulting permuted list."), }); JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) return; final String permutationFile = jsapResult.getString("permutation"); final int[] permutation = jsapResult.userSpecified("text") ? IntIterators.unwrap(TextIO.asIntIterator(permutationFile)) : BinIO.loadInts(permutationFile); if (jsapResult.getBoolean("invert")) Util.invertPermutationInPlace(permutation); BinIO.storeObject( new PermutedFrontCodedStringList((FrontCodedStringList)BinIO.loadObject(jsapResult.getString("list")), permutation), jsapResult.getString("permutedList") ); } }