protected TerminateToolException createTerminationIOException(IOException e) { if (e instanceof InsufficientTrainingDataException) { return new TerminateToolException(-1, "\n\nERROR: Not enough training data\n" + "The provided training data is not sufficient to create enough events to train a model.\n" + "To resolve this error use more training data, if this doesn't help there might\n" + "be some fundamental problem with the training data itself."); } return new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage(), e); } }
public static FileInputStream openInFile(File file) { try { return new FileInputStream(file); } catch (FileNotFoundException e) { throw new TerminateToolException(-1, "File '" + file + "' cannot be found", e); } }
public static TerminateToolException createObjectStreamError(IOException e) { return new TerminateToolException(-1, "IO Error while creating an Input Stream: " + e.getMessage(), e); }
public static void handleStdinIoError(IOException e) { throw new TerminateToolException(-1, "IO Error while reading from stdin: " + e.getMessage(), e); }
public Object parseArgument(Method method, String argName, String argValue) { Object value; try { value = Integer.parseInt(argValue); } catch (NumberFormatException e) { throw new TerminateToolException(1, String.format(INVALID_ARG, argName, argValue) + "Value must be an integer!", e); } return value; } }
public Object parseArgument(Method method, String argName, String charsetName) { try { if (OptionalParameter.DEFAULT_CHARSET.equals(charsetName)) { return Charset.defaultCharset(); } else if (Charset.isSupported(charsetName)) { return Charset.forName(charsetName); } else { throw new TerminateToolException(1, String.format(INVALID_ARG, argName, charsetName) + "Encoding not supported on this platform."); } } catch (IllegalCharsetNameException e) { throw new TerminateToolException(1, String.format(INVALID_ARG, argName, charsetName) + "Illegal encoding name."); } } }
public static void checkLanguageCode(String code) { List<String> languageCodes = new ArrayList<>(); languageCodes.addAll(Arrays.asList(Locale.getISOLanguages())); languageCodes.add("x-unspecified"); if (!languageCodes.contains(code)) { throw new TerminateToolException(1, "Unknown language code " + code + ", " + "must be an ISO 639 code!"); } }
public static InputStreamFactory createInputStreamFactory(File file) { try { return new MarkableFileInputStreamFactory(file); } catch (FileNotFoundException e) { throw new TerminateToolException(-1, "File '" + file + "' cannot be found", e); } }
static ParserType parseParserType(String typeAsString) { ParserType type = null; if (typeAsString != null && typeAsString.length() > 0) { type = ParserType.parse(typeAsString); if (type == null) { throw new TerminateToolException(1, "ParserType training parameter '" + typeAsString + "' is invalid!"); } } return type; }
protected Detokenizer createDetokenizer(DetokenizerParameter p) { try { return new DictionaryDetokenizer(new DetokenizationDictionary( new FileInputStream(new File(p.getDetokenizer())))); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while loading detokenizer dict: " + e.getMessage(), e); } } }
public static byte[] openFeatureGeneratorBytes(File featureGenDescriptorFile) { byte[] featureGeneratorBytes = null; // load descriptor file into memory if (featureGenDescriptorFile != null) { try (InputStream bytesIn = CmdLineUtil.openInFile(featureGenDescriptorFile)) { featureGeneratorBytes = ModelUtil.read(bytesIn); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage(), e); } } return featureGeneratorBytes; }
/** * Returns stream factory for the type of this tool for the <code>format</code>. * * @param format data format name * @return stream factory for the type of this tool for the format */ protected ObjectStreamFactory<T> getStreamFactory(String format) { ObjectStreamFactory<T> factory = StreamFactoryRegistry.getFactory(type, format); if (null != factory) { return factory; } else { throw new TerminateToolException(1, "Format " + format + " is not found.\n" + getHelp()); } }
protected <T> T validateAndParseParams(String[] args, Class<T> argProxyInterface) { String errorMessage = ArgumentParser.validateArgumentsLoudly(args, argProxyInterface); if (null != errorMessage) { throw new TerminateToolException(1, errorMessage + "\n" + getHelp()); } return ArgumentParser.parse(args, argProxyInterface); }
public ObjectStream<POSSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); InputStreamFactory inFactory = CmdLineUtil.createInputStreamFactory(params.getData()); try { System.setOut(new PrintStream(System.out, true, "UTF-8")); return new ConllXPOSSampleStream(inFactory, StandardCharsets.UTF_8); } catch (UnsupportedEncodingException e) { // this shouldn't happen throw new TerminateToolException(-1, "UTF-8 encoding is not supported: " + e.getMessage(), e); } catch (IOException e) { // That will throw an exception CmdLineUtil.handleCreateObjectStreamError(e); return null; } } }
public void run(String[] args) { Params params = validateAndParseParams(args, Params.class); File dictInFile = params.getInputFile(); File dictOutFile = params.getOutputFile(); Charset encoding = params.getEncoding(); CmdLineUtil.checkInputFile("dictionary input file", dictInFile); CmdLineUtil.checkOutputFile("dictionary output file", dictOutFile); try (InputStreamReader in = new InputStreamReader(new FileInputStream(dictInFile), encoding); OutputStream out = new FileOutputStream(dictOutFile)) { Dictionary dict = Dictionary.parseOneEntryPerLine(in); dict.serialize(out); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage(), e); } } }
/** * Validates arguments for a format processed by the <code>factory</code>. * @param factory a stream factory * @param args arguments */ protected void validateFactoryArgs(ObjectStreamFactory<T> factory, String[] args) { String errMessage = ArgumentParser.validateArgumentsLoudly(args, factory.getParameters()); if (null != errMessage) { throw new TerminateToolException(1, "Format parameters are invalid: " + errMessage + "\n" + "Usage: " + ArgumentParser.createUsage(factory.getParameters())); } }
public String getHelp(String format) { if ("".equals(format) || StreamFactoryRegistry.DEFAULT_FORMAT.equals(format)) { return getBasicHelp(paramsClass, StreamFactoryRegistry.getFactory(type, StreamFactoryRegistry.DEFAULT_FORMAT).getParameters()); } else { ObjectStreamFactory<T> factory = StreamFactoryRegistry.getFactory(type, format); if (null == factory) { throw new TerminateToolException(1, "Format " + format + " is not found.\n" + getHelp()); } return "Usage: " + CLI.CMD + " " + getName() + "." + format + " " + ArgumentParser.createUsage(paramsClass, factory.getParameters()); } } }
public ObjectStream<LanguageSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); File sentencesFileDir = params.getSentencesDir(); try { return new SampleSkipStream(new SampleShuffleStream( new LeipzigLanguageSampleStream(sentencesFileDir, Integer.parseInt(params.getSentencesPerSample()), Integer.parseInt(params.getSamplesPerLanguage()) + Integer.parseInt(params.getSamplesToSkip()))), Integer.parseInt(params.getSamplesToSkip())); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while opening sample data.", e); } } }
public void run(String[] args) { Params params = validateAndParseParams(args, Params.class); File dictInFile = params.getInputFile(); CmdLineUtil.checkInputFile("dictionary input file", dictInFile); Path metadataPath = DictionaryMetadata.getExpectedMetadataLocation(dictInFile.toPath()); CmdLineUtil.checkInputFile("dictionary metadata (.info) input file", metadataPath.toFile()); MorfologikDictionayBuilder builder = new MorfologikDictionayBuilder(); try { builder.build(dictInFile.toPath(), params.getOverwrite(), params.getValidate(), params.getAcceptBOM(), params.getAcceptCR(), params.getIgnoreEmpty()); } catch (Exception e) { throw new TerminateToolException(-1, "Error while creating Morfologik POS Dictionay: " + e.getMessage(), e); } } }
@Test public void testCreation() { TerminateToolException e = new TerminateToolException(-500); Assert.assertEquals(-500, e.getCode()); } }