public Dictionary<String> getDictionary(String resourcePath) throws IOException { DictionaryInfo dictInfo = getDictionaryInfo(resourcePath); return dictInfo == null ? null : dictInfo.getDictionaryObject(); }
/** * return null if no dictionary for given column */ @SuppressWarnings("unchecked") public Dictionary<String> getDictionary(CubeSegment cubeSeg, TblColRef col) { DictionaryInfo info = null; try { DictionaryManager dictMgr = getDictionaryManager(); String dictResPath = cubeSeg.getDictResPath(col); if (dictResPath == null) return null; info = dictMgr.getDictionaryInfo(dictResPath); if (info == null) throw new IllegalStateException("No dictionary found by " + dictResPath + ", invalid cube state; cube segment" + cubeSeg + ", col " + col); } catch (IOException e) { throw new IllegalStateException("Failed to get dictionary for cube segment" + cubeSeg + ", col" + col, e); } return (Dictionary<String>) info.getDictionaryObject(); }
public DictionaryInfo saveDictionary(TblColRef col, IReadableTable inpTable, Dictionary<String> dictionary) throws IOException { DictionaryInfo dictInfo = createDictionaryInfo(col, inpTable); String dupInfo = checkDupByInfo(dictInfo); if (dupInfo != null) { logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupInfo); return getDictionaryInfo(dupInfo); } return trySaveNewDict(dictionary, dictInfo); }
private String checkDupByContent(DictionaryInfo dictInfo, Dictionary<String> dict) throws IOException { ResourceStore store = getStore(); NavigableSet<String> existings = store.listResources(dictInfo.getResourceDir()); if (existings == null) return null; logger.info("{} existing dictionaries of the same column", existings.size()); if (existings.size() > 100) { logger.warn("Too many dictionaries under {}, dict count: {}", dictInfo.getResourceDir(), existings.size()); } for (String existing : existings) { DictionaryInfo existingInfo = getDictionaryInfo(existing); if (existingInfo != null) { if ((config.isDictResuable() && existingInfo.getDictionaryObject().contains(dict)) || dict.equals(existingInfo.getDictionaryObject())) { return existing; } } } return null; }
public DictionaryInfo buildDictionary(TblColRef col, IReadableTable inpTable, String builderClass) throws IOException { if (inpTable.exists() == false) return null; logger.info("building dictionary for " + col); DictionaryInfo dictInfo = createDictionaryInfo(col, inpTable); String dupInfo = checkDupByInfo(dictInfo); if (dupInfo != null) { logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupInfo); return getDictionaryInfo(dupInfo); } logger.info("Building dictionary object " + JsonUtil.writeValueAsString(dictInfo)); Dictionary<String> dictionary; dictionary = buildDictFromReadableTable(inpTable, dictInfo, builderClass, col); return trySaveNewDict(dictionary, dictInfo); }
/** * For the new segment, we need to create new dimension dictionaries by merging underlying * dictionaries. (https://issues.apache.org/jira/browse/KYLIN-2457, https://issues.apache.org/jira/browse/KYLIN-2800) * @param cube * @param newSeg * @throws IOException */ private void makeDictForNewSegment(KylinConfig conf, CubeInstance cube, CubeSegment newSeg, List<CubeSegment> mergingSegments) throws IOException { DictionaryManager dictMgr = DictionaryManager.getInstance(conf); CubeDesc cubeDesc = cube.getDescriptor(); for (TblColRef col : cubeDesc.getAllColumnsNeedDictionaryBuilt()) { logger.info("Merging fact table dictionary on : " + col); List<DictionaryInfo> dictInfos = new ArrayList<DictionaryInfo>(); for (CubeSegment segment : mergingSegments) { logger.info("Including fact table dictionary of segment : " + segment); if (segment.getDictResPath(col) != null) { DictionaryInfo dictInfo = dictMgr.getDictionaryInfo(segment.getDictResPath(col)); if (dictInfo != null && !dictInfos.contains(dictInfo)) { dictInfos.add(dictInfo); } else { logger.warn("Failed to load DictionaryInfo from " + segment.getDictResPath(col)); } } } mergeDictionaries(dictMgr, newSeg, dictInfos, col); } }
DictionaryInfo largestDictInfo = findLargestDictInfo(newDictInfo); if (largestDictInfo != null) { largestDictInfo = getDictionaryInfo(largestDictInfo.getResourcePath()); Dictionary<String> largestDictObject = largestDictInfo.getDictionaryObject(); if (largestDictObject.contains(newDict)) { if (dupDict != null) { logger.info("Identical dictionary content, reuse existing dictionary at " + dupDict); return getDictionaryInfo(dupDict);
for (CubeSegment segment : mergingSegments) { if (segment.getDictResPath(col) != null) { DictionaryInfo dictInfo = dictMgr.getDictionaryInfo(segment.getDictResPath(col)); if (dictInfo != null && !dictInfos.contains(dictInfo)) { dictInfos.add(dictInfo);
for (CubeSegment segment : mergingSegments) { if (segment.getDictResPath(col) != null) { DictionaryInfo dictInfo = dictMgr.getDictionaryInfo(segment.getDictResPath(col)); if (dictInfo != null && !dictInfos.contains(dictInfo)) { dictInfos.add(dictInfo);
logger.info(dictInfoResource); DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource); DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);
DictionaryManager dstDictMgr = DictionaryManager.getInstance(dstConfig); DictionaryManager srcDicMgr = DictionaryManager.getInstance(srcConfig); DictionaryInfo dictSrc = srcDicMgr.getDictionaryInfo(item);
DictionaryManager dstDictMgr = DictionaryManager.getInstance(dstConfig); DictionaryManager srcDicMgr = DictionaryManager.getInstance(srcConfig); DictionaryInfo dictSrc = srcDicMgr.getDictionaryInfo(item);
public Dictionary<?> getDictionary(String resourcePath) throws IOException { DictionaryInfo dictInfo = getDictionaryInfo(resourcePath); return dictInfo == null ? null : dictInfo.getDictionaryObject(); }
public DictionaryInfo trySaveNewDict(Dictionary<?> newDict, DictionaryInfo newDictInfo) throws IOException { String dupDict = checkDupByContent(newDictInfo, newDict); if (dupDict != null) { logger.info("Identical dictionary content " + newDict + ", reuse existing dictionary at " + dupDict); return getDictionaryInfo(dupDict); } newDictInfo.setDictionaryObject(newDict); newDictInfo.setDictionaryClass(newDict.getClass().getName()); save(newDictInfo); dictCache.put(newDictInfo.getResourcePath(), newDictInfo); return newDictInfo; }
/** * return null if no dictionary for given column */ public Dictionary<?> getDictionary(CubeSegment cubeSeg, TblColRef col) { DictionaryInfo info = null; try { DictionaryManager dictMgr = getDictionaryManager(); // logger.info("Using metadata url " + metadataUrl + // " for DictionaryManager"); String dictResPath = cubeSeg.getDictResPath(col); if (dictResPath == null) return null; info = dictMgr.getDictionaryInfo(dictResPath); if (info == null) throw new IllegalStateException("No dictionary found by " + dictResPath + ", invalid cube state; cube segment" + cubeSeg + ", col " + col); } catch (IOException e) { throw new IllegalStateException("Failed to get dictionary for cube segment" + cubeSeg + ", col" + col, e); } return info.getDictionaryObject(); }
/** * return null if no dictionary for given column */ public Dictionary<?> getDictionary(IISegment iiSeg, TblColRef col) { DictionaryInfo info = null; try { DictionaryManager dictMgr = getDictionaryManager(); // logger.info("Using metadata url " + metadataUrl + // " for DictionaryManager"); String dictResPath = iiSeg.getDictResPath(col); if (dictResPath == null) return null; info = dictMgr.getDictionaryInfo(dictResPath); if (info == null) throw new IllegalStateException("No dictionary found by " + dictResPath + ", invalid II state; II segment" + iiSeg + ", col " + col); } catch (IOException e) { throw new IllegalStateException("Failed to get dictionary for II segment" + iiSeg + ", col" + col, e); } return info.getDictionaryObject(); }
public DictionaryInfo buildDictionary(DataModelDesc model, String dict, TblColRef col, String factColumnsPath) throws IOException { logger.info("building dictionary for " + col); Object[] tmp = decideSourceData(model, dict, col, factColumnsPath); String srcTable = (String) tmp[0]; String srcCol = (String) tmp[1]; int srcColIdx = (Integer) tmp[2]; ReadableTable inpTable = (ReadableTable) tmp[3]; DictionaryInfo dictInfo = new DictionaryInfo(srcTable, srcCol, srcColIdx, col.getDatatype(), inpTable.getSignature(), inpTable.getColumnDelimeter()); String dupDict = checkDupByInfo(dictInfo); if (dupDict != null) { logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupDict); return getDictionaryInfo(dupDict); } Dictionary<?> dictionary = DictionaryGenerator.buildDictionary(dictInfo, inpTable); return trySaveNewDict(dictionary, dictInfo); }
for (CubeSegment segment : mergingSegments) { logger.info("Including fact table dictionary of segment : " + segment); DictionaryInfo dictInfo = dictMgr.getDictionaryInfo(segment.getDictResPath(col)); dictInfos.add(dictInfo);
public DictionaryInfo mergeDictionary(List<DictionaryInfo> dicts) throws IOException { DictionaryInfo firstDictInfo = null; int totalSize = 0; for (DictionaryInfo info : dicts) { // check if (firstDictInfo == null) { firstDictInfo = info; } else { if (!firstDictInfo.isDictOnSameColumn(info)) { throw new IllegalArgumentException("Merging dictionaries are not structurally equal(regardless of signature)."); } } totalSize += info.getInput().getSize(); } if (firstDictInfo == null) { throw new IllegalArgumentException("DictionaryManager.mergeDictionary input cannot be null"); } DictionaryInfo newDictInfo = new DictionaryInfo(firstDictInfo); TableSignature signature = newDictInfo.getInput(); signature.setSize(totalSize); signature.setLastModifiedTime(System.currentTimeMillis()); signature.setPath("merged_with_no_original_path"); String dupDict = checkDupByInfo(newDictInfo); if (dupDict != null) { logger.info("Identical dictionary input " + newDictInfo.getInput() + ", reuse existing dictionary at " + dupDict); return getDictionaryInfo(dupDict); } Dictionary<?> newDict = DictionaryGenerator.mergeDictionaries(newDictInfo, dicts); return trySaveNewDict(newDict, newDictInfo); }
@Test @Ignore("hive not ready") public void basic() throws Exception { CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_without_slr_desc"); TblColRef col = cubeDesc.findColumnRef("DEFAULT.TEST_CATEGORY_GROUPINGS", "META_CATEG_NAME"); DictionaryInfo info1 = dictMgr.buildDictionary(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null); System.out.println(JsonUtil.writeValueAsIndentString(info1)); DictionaryInfo info2 = dictMgr.buildDictionary(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null); System.out.println(JsonUtil.writeValueAsIndentString(info2)); assertTrue(info1.getUuid() == info2.getUuid()); assertTrue(info1 == dictMgr.getDictionaryInfo(info1.getResourcePath())); assertTrue(info2 == dictMgr.getDictionaryInfo(info2.getResourcePath())); assertTrue(info1.getDictionaryObject() == info2.getDictionaryObject()); touchDictValues(info1); }