public MultipleDictionaryValueEnumerator(DataType dataType, List<DictionaryInfo> dictionaryInfoList) { this.dataType = dataType; dictionaryList = Lists.newArrayListWithCapacity(dictionaryInfoList.size()); for (DictionaryInfo dictInfo : dictionaryInfoList) { Dictionary<String> dictionary = (Dictionary<String>) dictInfo.getDictionaryObject(); dictionaryList.add((Dictionary<String>) dictInfo.getDictionaryObject()); curKeys.add(dictionary.getMinId()); } }
public Dictionary<String> getDictionary(String resourcePath) throws IOException { DictionaryInfo dictInfo = getDictionaryInfo(resourcePath); return dictInfo == null ? null : dictInfo.getDictionaryObject(); }
@Override public void serialize(DictionaryInfo obj, DataOutputStream out) throws IOException { String json = JsonUtil.writeValueAsIndentString(obj); out.writeUTF(json); if (infoOnly == false) obj.getDictionaryObject().write(out); }
/** * return null if no dictionary for given column */ @SuppressWarnings("unchecked") public Dictionary<String> getDictionary(CubeSegment cubeSeg, TblColRef col) { DictionaryInfo info = null; try { DictionaryManager dictMgr = getDictionaryManager(); String dictResPath = cubeSeg.getDictResPath(col); if (dictResPath == null) return null; info = dictMgr.getDictionaryInfo(dictResPath); if (info == null) throw new IllegalStateException("No dictionary found by " + dictResPath + ", invalid cube state; cube segment" + cubeSeg + ", col " + col); } catch (IOException e) { throw new IllegalStateException("Failed to get dictionary for cube segment" + cubeSeg + ", col" + col, e); } return (Dictionary<String>) info.getDictionaryObject(); }
public static void dump(File f) throws IOException { if (f.isDirectory()) { File[] files = f.listFiles(); if (files == null) { return; } for (File c : files) dump(c); return; } if (f.getName().endsWith(".dict")) { DictionaryInfoSerializer ser = new DictionaryInfoSerializer(); DictionaryInfo dictInfo = ser.deserialize(new DataInputStream(new FileInputStream(f))); System.out.println("============================================================================"); System.out.println("File: " + f.getAbsolutePath()); System.out.println(new Date(dictInfo.getLastModified())); System.out.println(JsonUtil.writeValueAsIndentString(dictInfo)); dictInfo.getDictionaryObject().dump(System.out); System.out.println(); } } }
private String checkDupByContent(DictionaryInfo dictInfo, Dictionary<String> dict) throws IOException { ResourceStore store = getStore(); NavigableSet<String> existings = store.listResources(dictInfo.getResourceDir()); if (existings == null) return null; logger.info("{} existing dictionaries of the same column", existings.size()); if (existings.size() > 100) { logger.warn("Too many dictionaries under {}, dict count: {}", dictInfo.getResourceDir(), existings.size()); } for (String existing : existings) { DictionaryInfo existingInfo = getDictionaryInfo(existing); if (existingInfo != null) { if ((config.isDictResuable() && existingInfo.getDictionaryObject().contains(dict)) || dict.equals(existingInfo.getDictionaryObject())) { return existing; } } } return null; }
@SuppressWarnings("unchecked") public static Map<TblColRef, Dictionary<String>> writeDictionary(CubeSegment cubeSegment, Map<TblColRef, Dictionary<String>> dictionaryMap, long startOffset, long endOffset) { Map<TblColRef, Dictionary<String>> realDictMap = Maps.newHashMap(); for (Map.Entry<TblColRef, Dictionary<String>> entry : dictionaryMap.entrySet()) { final TblColRef tblColRef = entry.getKey(); final Dictionary<String> dictionary = entry.getValue(); IReadableTable.TableSignature signature = new IReadableTable.TableSignature(); signature.setLastModifiedTime(System.currentTimeMillis()); signature.setPath(String.format(Locale.ROOT, "streaming_%s_%s", startOffset, endOffset)); signature.setSize(endOffset - startOffset); DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getColumnDesc(), tblColRef.getDatatype(), signature); logger.info("writing dictionary for TblColRef:" + tblColRef.toString()); DictionaryManager dictionaryManager = DictionaryManager.getInstance(cubeSegment.getCubeDesc().getConfig()); try { DictionaryInfo realDict = dictionaryManager.trySaveNewDict(dictionary, dictInfo); cubeSegment.putDictResPath(tblColRef, realDict.getResourcePath()); realDictMap.put(tblColRef, (Dictionary<String>) realDict.getDictionaryObject()); } catch (IOException e) { throw new RuntimeException("error save dictionary for column:" + tblColRef, e); } } return realDictMap; }
if (!dicts.get(0).getDictionaryObject().equals(dicts.get(i).getDictionaryObject())) { identicalSourceDicts = false; break;
if (largestDictInfo != null) { largestDictInfo = getDictionaryInfo(largestDictInfo.getResourcePath()); Dictionary<String> largestDictObject = largestDictInfo.getDictionaryObject(); if (largestDictObject.contains(newDict)) { logger.info("dictionary content " + newDict + ", is contained by dictionary at " + largestDictInfo.getResourcePath());
DictionaryInfo dictInfo = store.getResource(dictPath, DictionaryInfoSerializer.FULL_SERIALIZER); if ("org.apache.kylin.dict.AppendTrieDictionary".equals(dictInfo != null ? dictInfo.getDictionaryClass() : null)){ String dictObj = dictInfo.getDictionaryObject().toString(); String basedir = dictObj.substring(dictObj.indexOf("(") + 1, dictObj.indexOf(")") - 1); if (basedir.startsWith(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory() + "/resources/GlobalDict")) {
DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);
private void saveDictionaryInfo(CubeSegment cubeSeg, TblColRef col, DictionaryInfo dictInfo) throws IOException { if (dictInfo == null) return; // work on copy instead of cached objects CubeInstance cubeCopy = cubeSeg.getCubeInstance().latestCopyForWrite(); // get a latest copy CubeSegment segCopy = cubeCopy.getSegmentById(cubeSeg.getUuid()); Dictionary<?> dict = dictInfo.getDictionaryObject(); segCopy.putDictResPath(col, dictInfo.getResourcePath()); segCopy.getRowkeyStats().add(new Object[] { col.getIdentity(), dict.getSize(), dict.getSizeOfId() }); CubeUpdate update = new CubeUpdate(cubeCopy); update.setToUpdateSegs(segCopy); updateCube(update); }
Dictionary dictObj = dictSrc.getDictionaryObject().copyToAnotherMeta(srcConfig, dstConfig); DictionaryInfo dictSaved = dstDictMgr.trySaveNewDict(dictObj, dictSrc); dictSrc.setLastModified(ts);
Dictionary dictObj = dictSrc.getDictionaryObject().copyToAnotherMeta(srcConfig, dstConfig); DictionaryInfo dictSaved = dstDictMgr.trySaveNewDict(dictObj, dictSrc); dictSrc.setLastModified(ts);
public Dictionary<?> getDictionary(String resourcePath) throws IOException { DictionaryInfo dictInfo = getDictionaryInfo(resourcePath); return dictInfo == null ? null : dictInfo.getDictionaryObject(); }
@Override public void serialize(DictionaryInfo obj, DataOutputStream out) throws IOException { String json = JsonUtil.writeValueAsIndentString(obj); out.writeUTF(json); if (infoOnly == false) obj.getDictionaryObject().write(out); }
public static Dictionary mergeDictionaries(DictionaryInfo targetInfo, List<DictionaryInfo> sourceDicts) { HashSet<byte[]> dedup = new HashSet<byte[]>(); for (DictionaryInfo info : sourceDicts) { Dictionary<?> dict = info.getDictionaryObject(); int minkey = dict.getMinId(); int maxkey = dict.getMaxId(); byte[] buffer = new byte[dict.getSizeOfValue()]; for (int i = minkey; i <= maxkey; ++i) { int size = dict.getValueBytesFromId(i, buffer, 0); dedup.add(Bytes.copy(buffer, 0, size)); } } List<byte[]> valueList = new ArrayList<byte[]>(); valueList.addAll(dedup); return buildDictionaryFromValueList(targetInfo, valueList); }
@SuppressWarnings("unchecked") private void touchDictValues(DictionaryInfo info1) { Dictionary<String> dict = (Dictionary<String>) info1.getDictionaryObject(); HashSet<String> set = new HashSet<String>(); for (int i = 0, n = info1.getCardinality(); i < n; i++) { set.add(dict.getValueFromId(i)); } assertEquals(info1.getCardinality(), set.size()); } }
assertEquals(3, info1.getDictionaryObject().getSize());
@Test @Ignore("hive not ready") public void basic() throws Exception { CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_without_slr_desc"); TblColRef col = cubeDesc.findColumnRef("DEFAULT.TEST_CATEGORY_GROUPINGS", "META_CATEG_NAME"); DictionaryInfo info1 = dictMgr.buildDictionary(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null); System.out.println(JsonUtil.writeValueAsIndentString(info1)); DictionaryInfo info2 = dictMgr.buildDictionary(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null); System.out.println(JsonUtil.writeValueAsIndentString(info2)); assertTrue(info1.getUuid() == info2.getUuid()); assertTrue(info1 == dictMgr.getDictionaryInfo(info1.getResourcePath())); assertTrue(info2 == dictMgr.getDictionaryInfo(info2.getResourcePath())); assertTrue(info1.getDictionaryObject() == info2.getDictionaryObject()); touchDictValues(info1); }