中一个或多个 hyperloglog
数据进行合并时,结果会以稠密的结构进行存储,占用内存 12k,与之相对的是,以稀疏结构存储的数据(在我的数据库中)平均仅占 200B。
因此我写了个 Lua
在我的 MacBook Pro (2.4 GHz Intel Core i5) 上,这个脚本每称可转换 182 个 hyperloglog
local function hll_dense2sparse(key) local exec = redis.call local sub = string.sub local byte = string.byte local char = string.char local insert = table.insert local concat = table.concat local floor = math.floor local magic = "HYLL" local dense = exec("GET", key) if sub(dense, 1, 4) ~= magic then -- not a hll return -1 end if byte(dense, 5) == 1 then -- already sparse return 0 end if #dense ~= 12304 then -- 12304 = 16 + 16384 * 6 / 8 is the length of a dense hll return -1 end local sparse = {magic, char(1), sub(dense, 6, 16)} local c, v, _v = 1, nil, nil for i = 0, 16384 do local offset = i * 6 % 8 local j = (i * 6 - offset) / 8 + 17 local x, y = byte(dense, j, j + 1) if x then _v = (floor(x / 2 ^ offset) + (y or 0) * 2 ^ (8 - offset)) % 64 else _v = nil end if _v and _v > 32 then -- cannot translate to sparse representation return -2 end if _v == v then c = c + 1 else if v == 0 then while c >= 16384 do insert(sparse, char(127) .. char(255)) c = c - 16384 end if c > 64 then c = c - 1 insert(sparse, char(64 + floor(c / 256)) .. char(c % 256)) elseif c > 0 then insert(sparse, char(c - 1)) end elseif v then v = v - 1 while c >= 4 do insert(sparse, char(128 + v * 4 + 3)) c = c - 4 end if c > 0 then insert(sparse, char(128 + v * 4 + c - 1)) end end c, v = 1, _v end end exec("SET", key, concat(sparse)) return 1end复制代码