private static Set<String> getOptions(String variedParam) { return variedParam != null ? Utils.arrayToCollection(variedParam.split(OR_REGEX), new LinkedHashSet<String>()) : null; } }
private boolean samePredicate(Predicate<I, S> hypothesisPredicate, Predicate<I, S> textPredicate) { return (Utils.intersect( predicateLemmasLowerCase(hypothesisPredicate), predicateLemmasLowerCase(textPredicate), new LinkedHashSet<String>() ).size()>0); }
private static <T> List<T> getMapAsList(Map<Integer,T> map) { List<T> list = new ArrayList<T>(map.keySet().size()); Set<Integer> ids = map.keySet(); Integer[] idsArray = Utils.collectionToArray(ids, new Integer[0]); Arrays.sort(idsArray); for (Integer id : idsArray) { list.add(map.get(id)); } return list; }
public void go() throws LexicalResourceException, IOException, UnsupportedPosTagStringException System.out.println("Memory used: "+Utils.stringMemoryUsedInMB()); BufferedReader reader = new BufferedReader(new InputStreamReader(System.in)); System.out.println("Type exit to exit.");
private void createSorted() throws ClassifierException { if (logger.isDebugEnabled()){logger.debug("SorterOfTreeHistory: creating sorted map...");} if (logger.isDebugEnabled()){logger.debug(String.format("Classifier threshold = %-4.4f", classifier.getThreshold()));} Map<TreeHistoryComponent,Double> map = new HashMap<TreeHistoryComponent, Double>(); double initialCost = -classifier.getProduct(treeHistory.getInitialComponent().getFeatureVector()); if (logger.isDebugEnabled()){logger.debug(String.format("Initial cost = %-4.4f", initialCost));} double prevCost = initialCost; for (TreeHistoryComponent component : treeHistory.getComponents()) { double currentCost = -classifier.getProduct(component.getFeatureVector()); map.put(component, currentCost-prevCost); if (logger.isDebugEnabled()){logger.debug(String.format("Cost = %-4.4f. Delta = %-4.4f Spec = %s", currentCost, (currentCost-prevCost), component.getSpecification().toString()));} prevCost = currentCost; } List<TreeHistoryComponent> sorted = Utils.getSortedByValue(map); Collections.reverse(sorted); sortedNonIncreasingOrder = new LinkedHashMap<TreeHistoryComponent, Double>(); for (TreeHistoryComponent component : sorted) { sortedNonIncreasingOrder.put(component,map.get(component)); } }
public RTEPairsPreProcessor(String configurationFileName,String trainTestEnum) throws TeEngineMlException { if (null==logger){logger = Logger.getLogger(RTEPairsPreProcessor.class);} this.configurationFileName = configurationFileName; if (trainTestEnum!=null) { try{this.trainOrTest = TrainTestEnum.valueOf(trainTestEnum.toUpperCase());} catch(IllegalArgumentException e){throw new TeEngineMlException("Second argument must be a train/test flag.");} } else { GlobalMessages.globalWarn("No train/test flag is provided (in command line). The correct method to run this class is by providing this flag (in addition to the configuration file).\n" + "The flag's value should be: {"+Utils.getEnumValues(TrainTestEnum.class)+"}", logger); this.trainOrTest = null; } }
logger.info("Done: initialization of DirtDBRuleBase. Memory used: "+Utils.stringMemoryUsedInMB());
List<Mention> sortedMentions = Utils.getSortedByValue(mapMentionToSpanOfLeaves);
"<c> On|on|IN|I-PP|O|(S/S)/NP an|an|DT|I-NP|O|NP[nb]/N aircraft|aircraft|NN|I-NP|O|N with|with|IN|I-PP|O|(NP\\NP)/NP a|a|DT|I-NP|O|NP[nb]/N pilot|pilot|NN|I-NP|O|N and|and|CC|I-NP|O|conj copilot|copilot|NN|I-NP|O|N ,|,|,|I-NP|O|, or|or|CC|I-NP|O|conj instructor|instructor|NN|I-NP|O|N and|and|CC|I-NP|O|conj trainee|trainee|NN|I-NP|O|N ,|,|,|O|O|, the|the|DT|I-NP|O|NP[nb]/N aircraft|aircraft|NN|I-NP|O|N is|be|VBZ|I-VP|O|(S[dcl]\\NP)/(S[pss]\\NP) made|make|VBN|I-VP|O|(S[pss]\\NP)/(S[adj]\\NP) capable|capable|JJ|I-ADJP|O|(S[adj]\\NP)/PP of|of|IN|I-PP|O|PP/NP control|control|NN|I-NP|O|N without|without|IN|I-PP|O|(NP\\NP)/NP the|the|DT|I-NP|O|NP[nb]/N crew|crew|NN|I-NP|O|N changing|change|VBG|I-NP|O|(S[ng]\\NP)/NP seats|seat|NNS|I-NP|O|N .|.|.|O|O|."}; ArrayList<String> ret = Utils.arrayToCollection(retArray, new ArrayList<String>(retArray.length));
System.out.println("Memory used = "+Utils.stringMemoryUsedInMB()); System.out.println("Memory used = "+Utils.stringMemoryUsedInMB());
private boolean structuresAreMatch(PredicateArgumentStructure<I, S> hypothesisStructure, PredicateArgumentStructure<I, S> textStructure) { return (Utils.intersect(extractLemmasOfPredicateLowerCase(hypothesisStructure), extractLemmasOfPredicateLowerCase(textStructure), new LinkedHashSet<String>()).size()>0); }
protected Map<Integer,Double> generateInitialParameters(Vector<LabeledSample> samples) { Map<Integer,Double> ret = new LinkedHashMap<Integer, Double>(); Set<Integer> indices = new LinkedHashSet<Integer>(); for (LabeledSample sample : samples) { indices.addAll(sample.getFeatures().keySet()); } Integer[] indicesArray = Utils.collectionToArray(indices, new Integer[0]); Arrays.sort(indicesArray); double initialParameterValue = 0.0; if (this.restricted) { initialParameterValue = Math.max(0.0, this.restrictionMinimumValue); } for (Integer index : indicesArray) { ret.put(index,initialParameterValue); } return ret; }
ArrayList<String> ret = Utils.arrayToCollection(retArray, new ArrayList<String>(retArray.length));
logger.debug("Memory used: "+Utils.stringMemoryUsedInMB()); logger.debug("Element distance = "+element.getDistance());
private void findUsingMaps() { specs = new LinkedHashSet<SubstituteNodeSpecificationMultiWord>(); for (ExtendedNode textNode : textNodesToMultiWord.keySet()) { for (ExtendedNode hypothesisNode : hypothesisNodesToMultiWord.keySet()) { if ( InfoObservations.infoHasLemma(hypothesisNode.getInfo()) && (!(InfoGetFields.getLemma(textNode.getInfo()).equals(InfoGetFields.getLemma(hypothesisNode.getInfo())))) ) { List<String> textNodeWords = textNodesToMultiWord.get(textNode); List<String> hypothesisNodeWords = hypothesisNodesToMultiWord.get(hypothesisNode); Collection<String> intersection = Utils.intersect(textNodeWords, hypothesisNodeWords, new LinkedList<String>()); if (intersection.size()>0) { SubstituteNodeSpecificationMultiWord spec = new SubstituteNodeSpecificationMultiWord(textNode, hypothesisNode.getInfo().getNodeInfo(), textNode.getInfo().getAdditionalNodeInformation(),textNodeWords,hypothesisNodeWords); specs.add(spec); } } } } }
private void readDatasetFile() throws RTEMainReaderException { logger.info("Processing dataset: "+datasetFile); RTEMainReader reader = new DefaultRTEMainReader(); reader.setXmlFile(new File(datasetFile)); if (annotated) { reader.setHasClassification(); } reader.read(); Map<Integer,TextHypothesisPair> pairsMap = reader.getMapIdToPair(); pairs = new ArrayList<TextHypothesisPair>(pairsMap.size()); Set<Integer> pairsIds = pairsMap.keySet(); Integer[] pairsIdsArray = Utils.collectionToArray(pairsIds, new Integer[0]); Arrays.sort(pairsIdsArray); for (Integer pairId : pairsIdsArray) { pairs.add(pairsMap.get(pairId)); } }
Set<WikiExtractionType> extractionTypes = Utils.arrayToCollection(new WikiExtractionType[]{WikiExtractionType.REDIRECT,WikiExtractionType.CATEGORY, WikiExtractionType.LEX_ALL_NOUNS,WikiExtractionType.SYNT_ALL_NOUNS}, new HashSet<WikiExtractionType>());
public void processList() throws TeEngineMlException, OperationException, ClassifierException, ScriptException, RuleBaseException, AnnotatorException { Iterator<ExtendedPairData> pairsIterator = pairs.iterator(); // for (ExtendedPairData pair : pairs) while ( (!stopFlag.isStop()) && (pairsIterator.hasNext()) ) { ExtendedPairData pair = pairsIterator.next(); logger.info("Processing pair: "+((pair.getDatasetName()!=null)?pair.getDatasetName()+": ":"") + pair.getPair().getId()); // logger.info("Processing pair: "+pair.getPair().getId()); boolean noProblem = false; try { processPair(pair); noProblem = true; } finally { if (!noProblem)this.stopFlag.stop(); } if (logger.isInfoEnabled()) { logger.info("Pair processing done. Memory used: "+Utils.stringMemoryUsedInMB()); } } }
/** * Assuming this is called from the Swing thread (I.e. in the context of {@link javax.swing.SwingUtilities#invokeLater(Runnable)}) * * @param underLyingSystem * @param useF1Classifier * @throws TeEngineMlException */ public void setUnderlyingSystem(SingleComponentUnderlyingSystem underLyingSystem, Boolean useF1Classifier) throws TeEngineMlException { this.underLyingSystem = underLyingSystem; Set<String> allowedDatasets = this.underLyingSystem.getAllowedDatasetNames().getMutableSetCopy(); if (allowedDatasets.size()>0) { cpe.getComboBoxDatasetNames().setModel(new DefaultComboBoxModel<String>(Utils.collectionToArray(allowedDatasets, new String[0]))); cpe.setDatasetNamesAllow(true); } else { cpe.setDatasetNamesAllow(false); } this.useF1Classifier = useF1Classifier; if (this.useF1Classifier!=null){if (this.useF1Classifier.booleanValue()) { cpe.setStatusBarClassifierType(VisualTracingTool.STATUS_BAR_CLASSIFIER_TYPE_F1_OPTIMIZED); cpe.updateStatusBarLabel(); }} }
Set<WikiExtractionType> extractionTypes = Utils.arrayToCollection(new WikiExtractionType[]{WikiExtractionType.REDIRECT,WikiExtractionType.BE_COMP, WikiExtractionType.BE_COMP_IDIRECT,WikiExtractionType.ALL_NOUNS_TOP}, new HashSet<WikiExtractionType>());