hadoop入门系列--使用hbase过滤器(一篇全掌握)
2020-04-15 本文已影响0人
微生活_小阿楠
传送门
hadoop入门系列--hbase基础知识点
hadoop入门系列--从本地把数据导入Hbase
hadoop入门系列--用java代码实现创建hbase表
hadoop入门系列--使用hbase过滤器(一篇全掌握)
传送门
1)1.BinaryComparator()与SubstringComparator()区别
概念简述:
- BinaryComparator按字节索引顺序比较指定字节数组,采用Bytes.compareTo(byte[])
- SubstringComparator判断提供的子串是否出现在value中
总结:BinaryComparator一般用于已经确定好,肯定一模一样的(比如列名、列限定符),SubstringComparator一般用于模糊匹配(也可以说是有包含就匹配)(比如值)
注意:这里的代码完全是由题主亲自验证过的,题目由简到难。基本上每一道题目都用到了2个及其以上的过滤器,属于多条件过滤。
练习: 请按照以下要求完成查询任务:
- (1)请查询出属于“互联网”产业的公司的职位名称;
- (2)请查询出学历要求是“硕士”的职位信息;
- (3)请分页(每页2条)查询出职位为“机器学习”的职位信息(查2页);
- (4)请查询出“北京”或“上海”薪水在“10k-20k”之间的职位信息;
- (5)请查询出“北京”的公司规模在“100人”以上的职位信息;
2)话不多说,直接上代码
package hbase_put_scan;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.FamilyFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.PageFilter;
import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
public class HbaseFilterTest {
public static Configuration conf;
public static Connection conn;
static {
//1.获取资源
conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.property", "2181");
conf.set("hbasae.zookeeper.quorum", "centos");
conf.set("hbase.master", "centos:60000");
try {
//2.创建连接
conn = ConnectionFactory.createConnection(conf);
}catch(IOException e) {
e.printStackTrace();
}
}
//(1)请查询出属于“互联网”产业的公司的职位名称;
public static void scanColumnFamily(String Table,String Family,String company_industry,String qualifier,String job_name) throws IOException{
Admin admin = conn.getAdmin();
//3.依据指定表名建立table实例
Table table = conn.getTable(TableName.valueOf(Table));
//4.建立scan实例
Scan scan = new Scan();
//单列值过滤器
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(
Family.getBytes(),
company_industry.getBytes(),
CompareOp.EQUAL,
new SubstringComparator(qualifier)
);
//列过滤器
Filter qualifierFilter = new QualifierFilter(CompareOp.EQUAL,new BinaryComparator(job_name.getBytes()));
//把上面两个过滤器and
FilterList filterList = new FilterList();
filterList.addFilter(singleColumnValueFilter);
filterList.addFilter(qualifierFilter);
scan.setFilter(filterList);
ResultScanner resultScanner = table.getScanner(scan);
//5.遍历读取ResultScanner集中内容
int a = 0;
for(Result result : resultScanner) {
//System.out.println(new String(result.getValue(Family.getBytes(), job_name.getBytes())));
//遍历读取result集中的内容
List<Cell> cells = result.listCells();
for(Cell cell : cells) {a++;
System.out.print("行健:" + new String(CellUtil.cloneRow(cell) ));
System.out.print("列族:" + new String(CellUtil.cloneFamily(cell) ));
System.out.print("列:" + new String(CellUtil.cloneQualifier(cell) ));
System.out.println("值:" + new String(CellUtil.cloneValue(cell) ));
}
System.out.println("============================================="+ a);
}
//6.关闭打开的资源
resultScanner.close();
table.close();
conn.close();
}
//(2)请查询出学历要求是“硕士”的职位信息;
public static void job_info(String Table,String family,String job_edu_require,String qualifier,String job_info) throws IOException{
Admin admin = conn.getAdmin();
Table table = conn.getTable(TableName.valueOf(Table));
Scan scan = new Scan();
//单列值过滤器
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(
family.getBytes(),
job_edu_require.getBytes(),
CompareOp.EQUAL,
new SubstringComparator(qualifier)
);
scan.setFilter(singleColumnValueFilter);
ResultScanner resultScanner = table.getScanner(scan);
int a = 0;
for(Result result : resultScanner) {a++;
//方法一
System.out.println("RowKey:" + new String(result.getRow()) + " qualifier=" + new String(result.getValue(family.getBytes(),job_info.getBytes())));
System.out.println("========================");
//方法二 (参考第一道题)
} System.out.println(a);
}
//(3)请查询出“北京”或“上海”薪水在“10k-20k”之间的职位信息;
public static void company_job_info() throws IOException{
Admin admin = conn.getAdmin();
//3.依据指定表名建立table实例
Table table = conn.getTable(TableName.valueOf("jobs"));
//4.建立scan实例
Scan scan = new Scan();
//单列值过滤器--薪水在“10k-20k”
SingleColumnValueFilter singleColumnValueFilter_salary = new SingleColumnValueFilter("info".getBytes(),"job_salary".getBytes(),CompareOp.EQUAL,new SubstringComparator("10k-20k"));
//单列值过滤器--北京
SingleColumnValueFilter singleColumnValueFilter1 = new SingleColumnValueFilter("info".getBytes(),"company_location".getBytes(),CompareOp.EQUAL,new SubstringComparator("北京"));
//单列值过滤器--上海
SingleColumnValueFilter singleColumnValueFilter2 = new SingleColumnValueFilter("info".getBytes(),"company_location".getBytes(),CompareOp.EQUAL,new SubstringComparator("上海"));
//列过滤器--只获取所有职位信息
Filter qualifierFilter = new QualifierFilter(CompareOp.EQUAL,new SubstringComparator("job_"));
Filter qualifierFilter_companylocation = new QualifierFilter(CompareOp.EQUAL,new BinaryComparator("company_location".getBytes()));
//公司地址的组合--北京or上海
FilterList filterList_location = new FilterList(FilterList.Operator.MUST_PASS_ONE);
filterList_location.addFilter(singleColumnValueFilter1);
filterList_location.addFilter(singleColumnValueFilter2);
//查询列的组合(可以把你想要输出的列写在这里)
FilterList filterList_column = new FilterList(FilterList.Operator.MUST_PASS_ONE);
filterList_column.addFilter(qualifierFilter);
filterList_column.addFilter(qualifierFilter_companylocation);
//把上面所有过滤器and
FilterList filterList = new FilterList();
filterList.addFilter(singleColumnValueFilter_salary);
filterList.addFilter(filterList_location);
filterList.addFilter(filterList_column);
scan.setFilter(filterList);
ResultScanner resultScanner = table.getScanner(scan);
//5.遍历读取ResultScanner集中内容
for(Result result : resultScanner) {
//System.out.println(new String(result.getValue(Family.getBytes(), job_name.getBytes())));
//遍历读取result集中的内容
List<Cell> cells = result.listCells();
for(Cell cell : cells) {
System.out.print("行健:" + new String(CellUtil.cloneRow(cell) ));
System.out.print("列族:" + new String(CellUtil.cloneFamily(cell) ));
System.out.print("列:" + new String(CellUtil.cloneQualifier(cell) ));
System.out.println("值:" + new String(CellUtil.cloneValue(cell) ));
}
System.out.println("=============================================");
}
//6.关闭打开的资源
resultScanner.close();
table.close();
conn.close();
}
//(4)请查询出“北京”的公司规模在“100人”以上的职位信息;
public static void company_people_job_info() throws IOException{
Admin admin = conn.getAdmin();
//3.依据指定表名建立table实例
Table table = conn.getTable(TableName.valueOf("jobs"));
//4.建立scan实例
Scan scan = new Scan();
//单列值过滤器--北京
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(
"info".getBytes(),
"company_location".getBytes(),
CompareOp.EQUAL,
new SubstringComparator("北京")
);
//单列值过滤器--公司规模在“100人”以上
SingleColumnValueFilter singleColumnValueFilter_people = new SingleColumnValueFilter("info".getBytes(),"company_people".getBytes(),CompareOp.EQUAL,new SubstringComparator("00"));
//把上面两个过滤器and
FilterList filterList = new FilterList();
filterList.addFilter(singleColumnValueFilter);
filterList.addFilter(singleColumnValueFilter_people);
scan.setFilter(filterList);
ResultScanner resultScanner = table.getScanner(scan);
//5.遍历读取ResultScanner集中内容
int a = 0;
for(Result result : resultScanner) {
//System.out.println(new String(result.getValue(Family.getBytes(), job_name.getBytes())));
//遍历读取result集中的内容
List<Cell> cells = result.listCells();
for(Cell cell : cells) {a++;
System.out.print("行健:" + new String(CellUtil.cloneRow(cell) ));
System.out.print("列族:" + new String(CellUtil.cloneFamily(cell) ));
System.out.print("列:" + new String(CellUtil.cloneQualifier(cell) ));
System.out.println("值:" + new String(CellUtil.cloneValue(cell) ));
}
System.out.println("============================================="+ a);
}
//6.关闭打开的资源
resultScanner.close();
table.close();
conn.close();
}
public static void main(String[] args) throws IOException{
//(1)请查询出属于“互联网”产业的公司的职位名称
//scanColumnFamily("jobs","info","company_industry","互联网","job_name");
//(2)请查询出学历要求是“硕士”的职位信息;(下面是做法一、做法二);
//job_info("jobs","info","job_edu_require","硕士","job_info");
//scanColumnFamily("jobs","info","job_edu_require","硕士","job_info");
//(3)请查询出“北京”或“上海”薪水在“10k-20k”之间的职位信息;
company_job_info();
//(4)请查询出“北京”的公司规模在“100人”以上的职位信息;
//company_people_job_info();
}
}
