玩转HBase RowKey设计及案例(code)，你会吗？

2018-07-20 本文已影响58人 ad4d39659223

中国HBase技术社区第二届MeetUp

在7月21号13点，北京线下召开

公众号回复“****报名****”，直接获取

现场 or 直播报名链接

Rowkey长度原则

Rowkey是一个二进制码流，Rowkey的长度被很多开发者建议说设计在10~100个字节，我的建议是越短越好，不要超过16个字节。原因一数据的持久化文件HFile中是按照KeyValue存储的，如果Rowkey过长比如100个字节，1000万列数据光Rowkey就要占用100*1000万=10亿个字节，将近1G数据，这会极大影响HFile的存储效率；原因二MemStore将缓存部分数据到内存，如果Rowkey字段过长内存的有效利用率会降低，系统将无法缓存更多的数据，这会降低检索效率。因此Rowkey的字节长度越短越好。原因三目前操作系统是都是64位系统，内存8字节对齐。控制在16个字节，8字节的整数倍利用操作系统的最佳特性。

Rowkey哈希原则

在集群的环境中，为了得到更好的并行性，我们希望有好的load blance，让每个节点提供的请求处理都是均等的。我们也希望，region不要经常split，因为split会使server有一段时间的停顿，如何能做到呢？

随机散列与预分区。二者结合起来，是比较完美的，预分区一开始就预建好了一部分region,这些region都维护着自已的start-end keys，再配合上随机散列，写数据能均等地命中这些预建的region，就能解决上面的那些缺点，大大地提高了性能。

hash就是rowkey前面由一串随机字符串组成,随机字符串生成方式可以由SHA或者MD5等方式生成，只要region所管理的start-end keys范围比较随机，那么就可以解决写热点问题。

 1long currentId = 1L;
 2byte [] rowkey = Bytes.add(MD5Hash.getMD5AsHex(Bytes.toBytes(currentId)).substring(0, 8).getBytes(),
 3Bytes.toBytes(currentId));
 4HBaseAdmin.createTable(HTableDescriptor tableDescriptor,byte[][] splitkeys)可以指定预分区的splitKey，即指定region间的rowkey临界值.
 5public class HashChoreWoker implements SplitKeysCalculator{
 6//随机取机数目
 7private int baseRecord;
 8//rowkey生成器
 9private RowKeyGenerator rkGen;
10//取样时，由取样数目及region数相除所得的数量.
11private int splitKeysBase;
12//splitkeys个数
13private int splitKeysNumber;
14//由抽样计算出来的splitkeys结果
15private byte[][] splitKeys;
16public HashChoreWoker(int baseRecord, int prepareRegions) {
17this.baseRecord = baseRecord;
18//实例化rowkey生成器
19rkGen = new HashRowKeyGenerator();
20splitKeysNumber = prepareRegions - 1;
21splitKeysBase = baseRecord / prepareRegions;
22}
23public byte[][] calcSplitKeys() {
24splitKeys = new byte[splitKeysNumber][];
25//使用treeset保存抽样数据，已排序过
26TreeSet<byte[]> rows = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
27for (int i = 0; i < baseRecord; i++) {
28rows.add(rkGen.nextId());
29}
30int pointer = 0;
31Iterator<byte[]> rowKeyIter = rows.iterator();
32int index = 0;
33while (rowKeyIter.hasNext()) {
34byte[] tempRow = rowKeyIter.next();
35rowKeyIter.remove();
36if ((pointer != 0) && (pointer % splitKeysBase == 0)) {
37if (index < splitKeysNumber) {
38splitKeys[index] = tempRow;
39index ++;
40}
41}
42pointer ++;
43}
44rows.clear();
45rows = null;
46return splitKeys;
47}
48}

KeyGenerator及实现

 1//interface
 2public interface RowKeyGenerator {
 3byte [] nextId();
 4}
 5//implements
 6public class HashRowKeyGenerator implements RowKeyGenerator {
 7private long currentId = 1;
 8private long currentTime = System.currentTimeMillis();
 9private Random random = new Random();
10public byte[] nextId() {
11try {
12currentTime += random.nextInt(1000);
13byte[] lowT = Bytes.copy(Bytes.toBytes(currentTime), 4, 4);
14byte[] lowU = Bytes.copy(Bytes.toBytes(currentId), 4, 4);
15return Bytes.add(MD5Hash.getMD5AsHex(Bytes.add(lowU, lowT)).substring(0, 8).getBytes(),
16Bytes.toBytes(currentId));
17} finally {
18currentId++;
19}
20}
21}
22Unit Test
23@Test
24public void testHashAndCreateTable() throws Exception{
25HashChoreWoker worker = new HashChoreWoker(1000000,10);
26byte [][] splitKeys = worker.calcSplitKeys();
27HBaseAdmin admin = new HBaseAdmin(HBaseConfiguration.create());
28TableName tableName = TableName.valueOf("hash_split_table");
29if (admin.tableExists(tableName)) {
30try {
31admin.disableTable(tableName);
32} catch (Exception e) {
33}
34admin.deleteTable(tableName);
35}
36HTableDescriptor tableDesc = new HTableDescriptor(tableName);
37HColumnDescriptor columnDesc = new HColumnDescriptor(Bytes.toBytes("info"));
38columnDesc.setMaxVersions(1);
39tableDesc.addFamily(columnDesc);
40admin.createTable(tableDesc ,splitKeys);
41admin.close();
42}

partition顾名思义，就是分区，这种分区有点类似于mapreduce中的partitioner,将区域用长整数(Long)作为分区号，每个region管理着相应的区域数据，在rowKey生成时，将id取模后，然后拼上id整体作为rowKey.这个比较简单，不需要取样，splitKeys也非常简单，直接是分区号即可。

 1public class PartitionRowKeyManager implements RowKeyGenerator,
 2SplitKeysCalculator {
 3public static final int DEFAULT_PARTITION_AMOUNT = 20;
 4private long currentId = 1;
 5private int partition = DEFAULT_PARTITION_AMOUNT;
 6public void setPartition(int partition) {
 7this.partition = partition;
 8}
 9public byte[] nextId() {
10try {
11long partitionId = currentId % partition;
12return Bytes.add(Bytes.toBytes(partitionId),
13Bytes.toBytes(currentId));
14} finally {
15currentId++;
16}
17}
18public byte[][] calcSplitKeys() {
19byte[][] splitKeys = new byte[partition - 1][];
20for(int i = 1; i < partition ; i ++) {
21splitKeys[i-1] = Bytes.toBytes((long)i);
22}
23return splitKeys;
24}
25}

测试代码

 1@Test
 2public void testPartitionAndCreateTable() throws Exception{
 3PartitionRowKeyManager rkManager = new PartitionRowKeyManager();
 4//只预建10个分区
 5rkManager.setPartition(10);
 6byte [][] splitKeys = rkManager.calcSplitKeys();
 7HBaseAdmin admin = new HBaseAdmin(HBaseConfiguration.create());
 8TableName tableName = TableName.valueOf("partition_split_table");
 9if (admin.tableExists(tableName)) {
10try {
11admin.disableTable(tableName);
12} catch (Exception e) {
13}
14admin.deleteTable(tableName);
15}
16HTableDescriptor tableDesc = new HTableDescriptor(tableName);
17HColumnDescriptor columnDesc = new HColumnDescriptor(Bytes.toBytes("info"));
18columnDesc.setMaxVersions(1);
19tableDesc.addFamily(columnDesc);
20admin.createTable(tableDesc ,splitKeys);
21admin.close();
22}

我们在表中存储的是文件信息，每个文件有5个属性：文件id（long，全局唯一）、创建时间（long）、文件名（String）、分类名（String）、所有者（User）。

我们可以输入的查询条件：文件创建时间区间（比如从20120901到20120914期间创建的文件），文件名（“中国好声音”），分类（“综艺”），所有者（“浙江卫视”）。
假设当前我们一共有如下文件：

ID CreateTime Name Category UserID
1 20120902 中国好声音第1期综艺 1
2 20120904 中国好声音第2期综艺 1
3 20120906 中国好声音外卡赛综艺 1
4 20120908 中国好声音第3期综艺 1
5 20120910 中国好声音第4期综艺 1
6 20120912 中国好声音选手采访综艺花絮 2
7 20120914 中国好声音第5期综艺 1
8 20120916 中国好声音录制花絮综艺花絮 2
9 20120918 张玮独家专访花絮 3
10 20120920 加多宝凉茶广告综艺广告 4

这里UserID应该对应另一张User表，暂不列出。我们只需知道UserID的含义：

1代表浙江卫视； 2代表好声音剧组； 3代表 XX微博； 4代表赞助商。调用查询接口的时候将上述5个条件同时输入find(20120901,20121001,”中国好声音”,”综艺”,”浙江卫视”)。此时我们应该得到记录应该有第1、2、3、4、5、7条。第6条由于不属于“浙江卫视”应该不被选中。我们在设计RowKey时可以这样做：采用 UserID + CreateTime + FileID组成RowKey，这样既能满足多条件查询，又能有很快的查询速度。

需要注意以下几点：
（1）每条记录的RowKey，每个字段都需要填充到相同长度。假如预期我们最多有10万量级的用户，则userID应该统一填充至6位，如000001，000002…

（2）结尾添加全局唯一的FileID的用意也是使每个文件对应的记录全局唯一。避免当UserID与CreateTime相同时的两个不同文件记录相互覆盖。
按照这种RowKey存储上述文件记录，在HBase表中是下面的结构：
rowKey（userID 6 + time 8 + fileID 6） name category ….
Rowkey数据热点问题
要解决这个问题是非常容易的，只需要将所有的数据散列到全部的Region上即可。这是可以做到的，在rowkey前面加上一个非线性前缀，或者翻转rowkey，或者将rowkey hash化。

数据分散到不同的Region上存储，可以利用HBase的并行特点，可以利用MapReduce和spark计算框架并行处理数据。

下列代码是参考（在rowkey前面加上一个非线性前缀）

1import java.io.IOException;
  2import java.util.ArrayList;
  3import java.util.List;
  4import org.apache.hadoop.conf.Configuration;
  5import org.apache.hadoop.hbase.HBaseConfiguration;
  6import org.apache.hadoop.hbase.HColumnDescriptor;
  7import org.apache.hadoop.hbase.HTableDescriptor;
  8import org.apache.hadoop.hbase.KeyValue;
  9import org.apache.hadoop.hbase.MasterNotRunningException;
 10import org.apache.hadoop.hbase.TableName;
 11import org.apache.hadoop.hbase.ZooKeeperConnectionException;
 12import org.apache.hadoop.hbase.client.Get;
 13import org.apache.hadoop.hbase.client.HBaseAdmin;
 14import org.apache.hadoop.hbase.client.HTable;
 15import org.apache.hadoop.hbase.client.HTablePool;
 16import org.apache.hadoop.hbase.client.Put;
 17import org.apache.hadoop.hbase.client.Result;
 18import org.apache.hadoop.hbase.client.ResultScanner;
 19import org.apache.hadoop.hbase.client.Scan;
 20import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
 21import org.apache.hadoop.hbase.filter.Filter;
 22import org.apache.hadoop.hbase.filter.FilterList;
 23import org.apache.hadoop.hbase.filter.PrefixFilter;
 24import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
 25import org.apache.hadoop.hbase.util.Bytes;
 26import org.slf4j.Logger;
 27import org.slf4j.LoggerFactory;
 28import com.kktest.hbase.HashChoreWoker;
 29import com.kktest.hbase.HashRowKeyGenerator;
 30import com.kktest.hbase.RowKeyGenerator;
 31import com.kktest.hbase.BitUtils;
 32/**
 33* hbase 客户端
 34*
 35*/
 36@SuppressWarnings("all")
 37public class HBaseClient {
 38private static Logger logger = LoggerFactory.getLogger(HBaseClient.class);
 39private static Configuration config;
 40static {
 41config = HBaseConfiguration.create();
 42config.set("hbase.zookeeper.quorum",
 43"192.168.1.100:2181,192.168.1.101:2181,192.168.1.103:2181");
 44}
 45/**
 46* 根据随机散列（hash）创建分区表
 47*
 48* @throws Exception
 49* hash_split_table
 50*/
 51public static void testHashAndCreateTable(String tableNameTmp,
 52String columnFamily) throws Exception { // 取随机散列 10 代表 10个分区
 53HashChoreWoker worker = new HashChoreWoker(1000000, 10);
 54byte[][] splitKeys = worker.calcSplitKeys();
 55HBaseAdmin admin = new HBaseAdmin(config);
 56TableName tableName = TableName.valueOf(tableNameTmp);
 57if (admin.tableExists(tableName)) {
 58try {
 59admin.disableTable(tableName);
 60} catch (Exception e) {
 61}
 62admin.deleteTable(tableName);
 63}
 64HTableDescriptor tableDesc = new HTableDescriptor(tableName);
 65HColumnDescriptor columnDesc = new HColumnDescriptor(
 66Bytes.toBytes(columnFamily));
 67columnDesc.setMaxVersions(1);
 68tableDesc.addFamily(columnDesc);
 69admin.createTable(tableDesc, splitKeys);
 70admin.close();
 71}
 72/**
 73* @Title: queryData
 74* @Description: 从HBase查询出数据
 75* @param tableName
 76* 表名
 77* @param rowkey
 78* rowkey
 79* @return 返回用户信息的list
 80* @throws Exception
 81*/
 82@SuppressWarnings("all")
 83public static ArrayList<String> queryData(String tableName, String rowkey)
 84throws Exception {
 85ArrayList<String> list = new ArrayList<String>();
 86logger.info("开始时间");
 87//在高并发的情况下，最好不要使用HTable/HTablePool，用asynchbase
 88HTable table = new HTable(config, tableName);
 89Get get = new Get(rowkey.getBytes()); // 根据rowkey查询，该操作会比较费时
 90Result r = table.get(get);
 91logger.info("结束时间");
 92KeyValue[] kv = r.raw();
 93for (int i = 0; i < kv.length; i++) {
 94// 循环每一列
 95String key = kv[i].getKeyString();
 96String value = kv[i].getValueArray().toString();
 97// 将查询到的结果写入List中
 98list.add(key + ":"+ value);
 99}// end of 遍历每一列
100return list;
101}
102/**
103* 增加表数据
104*
105* @param tableName
106* @param rowkey
107*/
108public static void insertData(String tableName, String rowkey) {
109HTable table = null;
110try {
111table = new HTable(config, tableName);
112// 一个PUT代表一行数据，再NEW一个PUT表示第二行数据,每行一个唯一的ROWKEY，此处rowkey为put构造方法中传入的值
113for (int i = 1; i < 100; i++) {
114byte[] result = getNumRowkey(rowkey,i);
115Put put = new Put(result);
116// 本行数据的第一列
117put.add(rowkey.getBytes(), "name".getBytes(),
118("aaa" + i).getBytes());
119// 本行数据的第三列
120put.add(rowkey.getBytes(), "age".getBytes(),
121("bbb" + i).getBytes());
122// 本行数据的第三列
123put.add(rowkey.getBytes(), "address".getBytes(),
124("ccc" + i).getBytes());
125table.put(put);
126}
127} catch (Exception e1) {
128e1.printStackTrace();
129}
130}
131//在旧的rowkey前加上hash值，产生新的rowkey
132private static byte[] getNewRowkey(String rowkey) {
133byte[] result = null;
134//hash值生成器
135RowKeyGenerator rkGen = new HashRowKeyGenerator();
136byte[] splitKeys = rkGen.nextId();
137byte[] rowkeytmp = rowkey.getBytes();
138//hash值字符串+旧rowkey=新rowkey
139result = new byte[splitKeys.length + rowkeytmp.length];
140System.arraycopy(splitKeys, 0, result, 0, splitKeys.length);
141System.arraycopy(rowkeytmp, 0, result, splitKeys.length,
142rowkeytmp.length);
143return result;
144}
145public static void main(String[] args) {
146RowKeyGenerator rkGen = new HashRowKeyGenerator();
147byte[] splitKeys = rkGen.nextId();
148System.out.println(splitKeys);
149}
150//与getNewRowkey类似
151private static byte[] getNumRowkey(String rowkey, int i) {
152byte[] result = null;
153RowKeyGenerator rkGen = new HashRowKeyGenerator();
154byte[] splitKeys = rkGen.nextId();
155byte[] rowkeytmp = rowkey.getBytes();
156byte[] intVal = BitUtils.getByteByInt(i);
157//hash值字符串+旧rowkey+参数i字符串=新rowkey
158result = new byte[splitKeys.length + rowkeytmp.length + intVal.length];
159System.arraycopy(splitKeys, 0, result, 0, splitKeys.length);
160System.arraycopy(rowkeytmp, 0, result, splitKeys.length,
161rowkeytmp.length);
162System.arraycopy(intVal, 0, result, splitKeys.length+rowkeytmp.length,
163intVal.length);
164return result;
165}
166/**
167* 删除表
168*
169* @param tableName
170*/
171public static void dropTable(String tableName) {
172try {
173HBaseAdmin admin = new HBaseAdmin(config);
174admin.disableTable(tableName);
175admin.deleteTable(tableName);
176} catch (MasterNotRunningException e) {
177e.printStackTrace();
178} catch (ZooKeeperConnectionException e) {
179e.printStackTrace();
180} catch (IOException e) {
181e.printStackTrace();
182}
183}
184/**
185* 查询所有
186*
187* @param tableName
188*/
189public static void QueryAll(String tableName) {
190HTable table = null;
191try {
192table = new HTable(config, tableName);
193ResultScanner rs = table.getScanner(new Scan());
194for (Result r : rs) {
195System.out.println("获得到rowkey:" + new String(r.getRow()));
196for (KeyValue keyValue : r.raw()) {
197System.out.println("列：" + new String(keyValue.getFamily())
198+ "====值:" + new String(keyValue.getValue()));
199}
200}
201} catch (IOException e) {
202e.printStackTrace();
203}
204}
205/**
206* 查询所有
207*
208* @param tableName
209*/
210public static void QueryByCondition1(String tableName) {
211HTable table = null;
212try {
213table = new HTable(config, tableName);
214Get scan = new Get("abcdef".getBytes());// 根据rowkey查询
215Result r = table.get(scan);
216System.out.println("获得到rowkey:" + new String(r.getRow()));
217for (KeyValue keyValue : r.raw()) {
218System.out.println("列：" + new String(keyValue.getFamily())
219+ "====值:" + new String(keyValue.getValue()));
220}
221} catch (IOException e) {
222e.printStackTrace();
223}
224}
225/**
226* 根据rowkwy前坠查询
227* @param tableName
228* @param rowkey
229*/
230public static void queryByRowKey(String tableName,String rowkey)
231{
232try {
233HTable table = new HTable(config, tableName);
234Scan scan = new Scan();
235scan.setFilter(new PrefixFilter(rowkey.getBytes()));
236ResultScanner rs = table.getScanner(scan);
237KeyValue[] kvs = null;
238for (Result tmp : rs)
239{
240kvs = tmp.raw();
241for (KeyValue kv : kvs)
242{
243System.out.print(kv.getRow()+" ");
244System.out.print(kv.getFamily()+" :");
245System.out.print(kv.getQualifier()+" ");
246System.out.print(kv.getTimestamp()+" ");
247System.out.println(kv.getValue());
248}
249}
250} catch (IOException e) {
251e.printStackTrace();
252}
253}
254/**
255* 查询所有
256*
257* @param tableName
258*/
259public static void QueryByCondition2(String tableName) {
260try {
261HTable table = new HTable(config, tableName);
262// 当列column1的值为aaa时进行查询
263Filter filter = new SingleColumnValueFilter(
264Bytes.toBytes("column1"), null, CompareOp.EQUAL,
265Bytes.toBytes("aaa"));
266Scan s = new Scan();
267s.setFilter(filter);
268ResultScanner rs = table.getScanner(s);
269for (Result r : rs) {
270System.out.println("获得到rowkey:" + new String(r.getRow()));
271for (KeyValue keyValue : r.raw()) {
272System.out.println("列：" + new String(keyValue.getFamily())
273+ "====值:" + new String(keyValue.getValue()));
274}
275}
276} catch (Exception e) {
277e.printStackTrace();
278}
279}
280/**
281* 查询所有
282*
283* @param tableName
284*/
285public static void QueryByCondition3(String tableName) {
286try {
287HTable table = new HTable(config, tableName);
288List<Filter> filters = new ArrayList<Filter>();
289Filter filter1 = new SingleColumnValueFilter(
290Bytes.toBytes("column1"), null, CompareOp.EQUAL,
291Bytes.toBytes("aaa"));
292filters.add(filter1);
293Filter filter2 = new SingleColumnValueFilter(
294Bytes.toBytes("column2"), null, CompareOp.EQUAL,
295Bytes.toBytes("bbb"));
296filters.add(filter2);
297Filter filter3 = new SingleColumnValueFilter(
298Bytes.toBytes("column3"), null, CompareOp.EQUAL,
299Bytes.toBytes("ccc"));
300filters.add(filter3);
301FilterList filterList1 = new FilterList(filters);
302Scan scan = new Scan();
303scan.setFilter(filterList1);
304ResultScanner rs = table.getScanner(scan);
305for (Result r : rs) {
306System.out.println("获得到rowkey:" + new String(r.getRow()));
307for (KeyValue keyValue : r.raw()) {
308System.out.println("列：" + new String(keyValue.getFamily())
309+ "====值:" + new String(keyValue.getValue()));
310}
311}
312rs.close();
313} catch (Exception e) {
314e.printStackTrace();
315}
316}
317}
318HashChoreWoker：
319import java.util.Iterator;
320import java.util.TreeSet;
321import org.apache.hadoop.hbase.util.Bytes;
322public class HashChoreWoker{
323// 随机取机数目
324private int baseRecord;
325// rowkey生成器
326private RowKeyGenerator rkGen;
327// 取样时，由取样数目及region数相除所得的数量.
328private int splitKeysBase;
329// splitkeys个数
330private int splitKeysNumber;
331// 由抽样计算出来的splitkeys结果
332private byte[][] splitKeys;
333public HashChoreWoker(int baseRecord, int prepareRegions) {
334this.baseRecord = baseRecord;
335// 实例化rowkey生成器
336rkGen = new HashRowKeyGenerator();
337splitKeysNumber = prepareRegions - 1;
338splitKeysBase = baseRecord / prepareRegions;
339}
340public byte[][] calcSplitKeys() {
341splitKeys = new byte[splitKeysNumber][];
342// 使用treeset保存抽样数据，已排序过
343TreeSet<byte[]> rows = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
344for (int i = 0; i < baseRecord; i++) {
345rows.add(rkGen.nextId());
346}
347int pointer = 0;
348Iterator<byte[]> rowKeyIter = rows.iterator();
349int index = 0;
350while (rowKeyIter.hasNext()) {
351byte[] tempRow = rowKeyIter.next();
352rowKeyIter.remove();
353if ((pointer != 0) && (pointer % splitKeysBase == 0)) {
354if (index < splitKeysNumber) {
355splitKeys[index] = tempRow;
356index++;
357}
358}
359pointer++;
360}
361rows.clear();
362rows = null;
363return splitKeys;
364}
365}
366HashRowKeyGenerator：
367import org.apache.hadoop.hbase.util.Bytes;
368import org.apache.hadoop.hbase.util.MD5Hash;
369import com.kktest.hbase.BitUtils;
370public class HashRowKeyGenerator implements RowKeyGenerator {
371private static long currentId = 1;
372private static long currentTime = System.currentTimeMillis();
373//private static Random random = new Random();
374public byte[] nextId()
375{
376try {
377currentTime = getRowKeyResult(Long.MAX_VALUE - currentTime);
378byte[] lowT = Bytes.copy(Bytes.toBytes(currentTime), 4, 4);
379byte[] lowU = Bytes.copy(Bytes.toBytes(currentId), 4, 4);
380byte[] result = Bytes.add(MD5Hash.getMD5AsHex(Bytes.add(lowT, lowU))
381.substring(0, 8).getBytes(), Bytes.toBytes(currentId));
382return result;
383} finally {
384currentId++;
385}
386}
387/**
388* getRowKeyResult
389* @param tmpData
390* @return
391*/
392public static long getRowKeyResult(long tmpData)
393{
394String str = String.valueOf(tmpData);
395StringBuffer sb = new StringBuffer();
396char[] charStr = str.toCharArray();
397for (int i = charStr.length -1 ; i > 0; i--)
398{
399sb.append(charStr[i]);
400}
401return Long.parseLong(sb.toString());
402}
403}

上述代码示例通过getNewRowkey和getNumRowkey产生新的rowkey，即在rowkey前添加hash值，解决了rowkey热点问题。
当然该示例还包括了put，get，scan等操作。

顺便提一下rowkey设计注意点：

避免rowkey热点，通过hash、翻转rowkey、组合rowkey等方法可以避免这个问题，尽量避免直接使用time作为rowkey。
充分利用rowkey有序的特点，key-value对在hbase中的存储，是按照key来进行排序的。
使用多个字段组合成rowkey。

玩转HBase RowKey设计及案例(code)，你会吗？

中国HBase技术社区第二届MeetUp

在7月21号13点，北京线下召开

Rowkey长度原则

Rowkey哈希原则

KeyGenerator及实现

测试代码

顺便提一下rowkey设计注意点：

更多技术交流，可关注微信交流群，微信公众号等：

猜你喜欢

热点阅读

玩转HBase RowKey设计及案例(code)，你会吗？

中国HBase技术社区第二届MeetUp

在7月21号13点，****北京线下召开

Rowkey长度原则

Rowkey哈希原则

KeyGenerator及实现

测试代码

顺便提一下rowkey设计注意点：

更多技术交流，可关注微信交流群，微信公众号等：

猜你喜欢

热点阅读

在7月21号13点，北京线下召开