您的位置:首页 > 编程语言 > Java开发

布尔运算--java位图搜索实现

2016-06-11 18:39 316 查看

前言

布尔运算是伟大的布尔发明的代数运算,只有简单的逻辑与或非,一开始人们没发现没啥用,后来对计算机的影响太大了,从基础设施到搜索引擎无处不在。

场景

身为码农,在日常工作中,我也遇到了涉及它的需求。场景是这样的,我们的后台服务有一个复杂的配置,涉及到对用户多个维度的匹配,因为变化不会很频繁,

  每次都查询数据库显然不划算,数据量也不是太多,不到万的级别,人配置的嘛。    这样很自然的,缓存到服务器的内存吧,但是总不能蛮力的一个个匹配吧,也太啰嗦,效率也会很低,而且逻辑判断会有点复杂,配置主要是四个维度:机型、渠道、国家、版本,每个维度都是一个数组,有的值是一个["ALL"],意思是全部匹配;

引言

于是想到了布尔运算,比较简单,每个值都对应一长串数字位,有多少条数据,每个维度的每个值就是最多多少位。

  如总共10000条配置,则“中国”对应的位向量最多10000位,对于“中国”这个值,第100位是1就表示第100条配置包含这个"中国"维度值。

  查询国家是“中国”的就是从map中取“中国”对应的位向量和"ALL"对应的位向量做 或 运算。Java自带了大整数的实现:BigInteger;可以给构造方法传递一个代表二进制位的byte数组,byte数组的长度显然是: (配置条数/8)+1,BigInteger内部还会做些处理,主要是去除左边连续的0;

马上开始

由于我们是用的MongoDB数据库,操作是用的spring-data对mongoDB的封装。有Criteria和Query的API,这也是spring一贯的风格。

 于是,实现了一个通用的位图查询,和Criteria的API相似,这样代码改动可以很小(把import spring的包换成自己的就差不多完事了),不啰嗦了,上代码:

  

   先上测试case(例子有点low,将就看吧):

import java.util.Arrays;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;

import org.junit.Assert;
import org.junit.Test;

import bitmapSearch.BitmapSearcher;
import bitmapSearch.Criteria;

public class BitmapSearchTest {

@Test
public void testQuery() {
List<Car> cars = Arrays.asList(new Car(), //
new Car("大众").setToAreas("ALL"), //
new Car("耗子车", 3).setToAreas("某山区"), //
new Car("东方红", 6).setToAreas("中国"));
BitmapSearcher searcher = new BitmapSearcher(cars, new BitmapSearcher.IndexCreator<Car, String>() {
@Override
public String[] indexKeys() {
return new String[] { "id", "brand", "legs", "toAreas", "desc" };
}

@Override
public Object[] fieldValue(Car bean, String indexKey) {
if ("id".equals(indexKey)) {
return new Object[] { bean.id };
} else if ("brand".equals(indexKey)) {
return new Object[] { bean.getBrand() };
} else if ("legs".equals(indexKey)) {
return new Object[] { bean.getLegs() };
} else if ("toAreas".equals(indexKey)) {
return bean.getToAreas();
}else if ("desc".equals(indexKey)) {
return new Object[] { bean.getDesc() };
}
return null;
}
});
Car rs1 = searcher.findOne(
Criteria.where("legs").is(6)//
.andOperator(new Criteria().orOperator(//
Criteria.where("id").is(4), //
Criteria.where("desc").is("MadeInChina")))//
, null);//
Assert.assertTrue(rs1.brand.equals("东方红"));
List<Car> rs2 = searcher.find(Criteria.where("toAreas").in("中国", "ALL"));
Assert.assertTrue(rs2 != null && rs2.size() == 2);
}

private static class Car {
final int id;
static AtomicInteger ID_GEN = new AtomicInteger();
String brand = "QQ";
int legs = 4;
String[] toAreas;
String desc = "";

public Car() {
super();
id = ID_GEN.incrementAndGet();
}

public Car(String brand) {
this();
this.brand = brand;
}

public Car(String brand, int legs) {
this();
this.brand = brand;
this.legs = legs;
}

public String getBrand() {
return brand;
}

public void setBrand(String brand) {
this.brand = brand;
}

public int getLegs() {
return legs;
}

public void setLegs(int legs) {
this.legs = legs;
}

public Car setToAreas(String... toAreas) {
this.toAreas = toAreas;
return this;
}

public String[] getToAreas() {
return toAreas;
}

public String getDesc() {
return desc;
}

public void setDesc(String desc) {
this.desc = desc;
}

}
}





其中,抽象一个内部类IndexCreator,把构造索引以及如何获取索引字段值的工作抛给用户
</pre><p></p><p></p><pre name="code" class="html">/**
*
*/
package bitmapSearch;

import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* 通用位图搜索工具类
*
* @author HouKangxi
*
*/
public final class BitmapSearcher {
/**
* 对象list,只读。
*/
@SuppressWarnings("rawtypes")
private final List beansList;
/**
* 供搜索用的索引: K: index, value:<fieldVal--bits>
*/
private Map<Object, Map<Object, BigInteger>> indexMap;
/**
* 索引构造器
*/
@SuppressWarnings("rawtypes")
private final IndexCreator indexCreator;

/**
* 索引构造器
*
*
*
* @param <T,<span style="font-family: Arial, Helvetica, sans-serif;">INDEX</span>>
*/
public static interface IndexCreator<T, INDEX> {
/**
* 返回一组索引
*
* @return
*/
INDEX[] indexKeys();

/**
* 获取指定索引名对应的字段值
*
* @param bean
*            - list中的对象
* @param indexKey
*            - 索引名
* @return
*/
Object[] fieldValue(T bean, INDEX indexKey);
}

/**
* 构造方法
*
* @param objList
*            - 对象list
* @param ic
*            - 索引构造器
*/
public <T, INDEX> BitmapSearcher(List<T> objList, IndexCreator<T, INDEX> ic) {
indexCreator = ic;
if (objList != null && objList.size() > 0) {
beansList = Collections.unmodifiableList(objList);
createAllIndex();
} else {
beansList = Collections.emptyList();
}
}

Map<Object, BigInteger> getBitmap(Object key) {
return indexMap.get(key);
}

/**
* 查询一个结果
*
* @param <T>
*
* @param criteria
*            - 查询条件
* @param sorter
*            - 指定的排序器
* @return
*/
public <T> T findOne(Criteria criteria, Comparator<T> sorter) {
List<T> list = find(criteria);
if (list == null || list.isEmpty()) {
return null;
}
if (sorter != null)
Collections.sort(list, sorter);
// 取出第一个
return list.get(0);
}

/**
* 查询 list
*
* @param <T>
* @param criteria
* @return
*/
@SuppressWarnings("unchecked")
public <T> List<T> find(Criteria criteria) {
if (beansList == null || beansList.isEmpty()) {
return null;
}
BigInteger bInt = criteria.proc(this, null);
if (bInt == null) {
return null;
}
ArrayList<Integer> indexes = new ArrayList<Integer>();
int idx;
while ((idx = bInt.getLowestSetBit()) >= 0) {
indexes.add(idx);
// 将当前值与它减去1的值做&运算,正好下次可以得到最右边的 1
bInt = bInt.and(bInt.subtract(BigInteger.ONE));
}
// 序号 indexes 天然是从小到大排列,且不会重复,因为每次都是找最右边的1
if (indexes.isEmpty()) {
return null;
}
@SuppressWarnings("rawtypes")
ArrayList rslist = new ArrayList(indexes.size());
for (int i : indexes) {
rslist.add(beansList.get(i));
}
return rslist;
}

private void createAllIndex() {
Map<Object, Map<Object, byte[]>> t_Index = new HashMap<Object, Map<Object, byte[]>>();
Object[] keyNames = indexCreator.indexKeys();
for (int i = 0; i < keyNames.length; i++) {
t_Index.put(keyNames[i], new HashMap<Object, byte[]>());
}
int i = 0;
final int SUM = beansList.size();
for (Object o : beansList) {
createIndex(o, t_Index, SUM, i, keyNames);
i++;
}
indexMap = new HashMap<Object, Map<Object, BigInteger>>(t_Index.size());
bytes2BigInteger(t_Index, indexMap);
}

private void createIndex(Object o, Map<Object, Map<Object, byte[]>> t_Index, final int SUM, final int index,
Object[] indexes) {
if (o == null) {
return;
}
final int bytesLen = (SUM >> 3) + 1;
final int byteIndex = bytesLen - 1 - (index >> 3);
final int value = 1 << (index % 8);

for (int i = 0; i < indexes.length; i++) {
Object key = indexes[i];
@SuppressWarnings("unchecked")
Object fieldValues[] = indexCreator.fieldValue(o, key);
if (fieldValues == null) {
continue;
}
Map<Object, byte[]> bIntMap = t_Index.get(key);
for (Object fieldValue : fieldValues) {
if (fieldValue != null) {
byte[] bInt = bIntMap.get(fieldValue);
if (bInt == null) {
bIntMap.put(fieldValue, bInt = new byte[bytesLen]);
}
bInt[byteIndex] |= value;
}
}
}
}

@SuppressWarnings("unchecked")
private void bytes2BigInteger(Map<Object, Map<Object, byte[]>> t_Index,
Map<Object, Map<Object, BigInteger>> bigInts) {
for (Map.Entry<Object, Map<Object, byte[]>> entry : t_Index.entrySet()) {
Object key = entry.getKey();
Map<Object, byte[]> value = entry.getValue();
if (value == null || value.isEmpty()) {
continue;
}
@SuppressWarnings("rawtypes")
Map ov = value;
for (Map.Entry<Object, byte[]> v : value.entrySet()) {
ov.put(v.getKey(), new BigInteger(v.getValue()));
}
bigInts.put(key, ov);
}
}

}


下面是Criteria的一些实现类:

/**
*
*/
package bitmapSearch;

import java.math.BigInteger;
import java.util.LinkedList;
import java.util.List;

/**
* 通用查询约束
*
* @author houkangxi
*
*/
public class Criteria {
protected Object key;

protected List<Criteria> chain;
private Criteria prev = this;

public Criteria() {
chain = new LinkedList<Criteria>();
}

public Criteria(Object key) {
this();
this.key = key;
}

Criteria(int noInitChain) {
}

public static Criteria where(Object key) {
return new Criteria(key);
}

private Criteria addToChain(Criteria c) {
prev = c;
chain.add(c);
return this;
}

public Criteria is(Object val) {
prev.addToChain(new CriteriaOpIs(prev.key, val));
return this;
}

public Criteria ne(Object val) {
prev.addToChain(new CriteriaOpNot(prev.key, val));
return this;
}

public Criteria in(Object... val) {
prev.addToChain(new CriteriaOpIn(prev.key, val));
return this;
}

public Criteria and(String key) {
return addToChain(new CriteriaOpAnd(key));
}

public Criteria or(String key) {
return addToChain(new CriteriaOpOr(key));
}

public Criteria andOperator(Criteria... o) {
return addToChain(new CriteriaOpAnd(o));
}

public Criteria orOperator(Criteria... o) {
return addToChain(new CriteriaOpOr(o));
}

BigInteger proc(BitmapSearcher sea, BigInteger prev) {
if (chain == null) {
return null;
}
BigInteger rs = prev;
for (Criteria c : chain) {
rs = c.proc(sea, rs);
}
return rs;
}

@Override
public String toString() {
return getClass().getSimpleName() + "@key=" + key;
}
}

/**
*
*/
package bitmapSearch;

import java.math.BigInteger;
import java.util.Arrays;

/**
* @author houkangxi
*
*/
abstract class CriteriaChain extends Criteria {

CriteriaChain(String key) {
super(key);
}

CriteriaChain(Criteria[] list) {
super(0);
this.chain = Arrays.asList(list);
}

protected abstract BigInteger op(BigInteger o1, BigInteger o2);

@Override
protected final BigInteger proc(BitmapSearcher sea, BigInteger prev) {
if (chain == null || chain.isEmpty()) {
return null;
}
BigInteger h = chain.get(0).proc(sea, null);
for (int i = 1; i < chain.size() && h != null; i++) {
h = op(h, chain.get(i).proc(sea, h));
}

return op(prev, h);
}

}

/**
*
*/
package bitmapSearch;

import java.math.BigInteger;

/**
* And (且)查询
* @author houkangxi
*
*/
class CriteriaOpAnd extends CriteriaChain {

CriteriaOpAnd(String key) {
super(key);
}

CriteriaOpAnd(Criteria[] list) {
super(list);
}

@Override
protected BigInteger op(BigInteger o1, BigInteger o2) {
if (o2 == null || o1 == null) {
return null;
}
return o1.and(o2);
}
}
/**
*
*/
package bitmapSearch;

import java.math.BigInteger;
import java.util.Map;

/**
* IN 查询
* @author houkangxi
*
*/
class CriteriaOpIn extends Criteria {
Object[] colls;

CriteriaOpIn(Object key, Object[] colls) {
super(key);
this.colls = colls;
}

@Override
protected BigInteger proc(BitmapSearcher sea, BigInteger prev) {
Map<Object, BigInteger> bitmap = sea.getBitmap(key);
if (bitmap == null || colls == null) {
return null;
}
BigInteger bit = null;
for (int i = 0; i < colls.length; i++) {
Object val = colls[i];
BigInteger I = bitmap.get(val);
if (I != null) {
bit = bit == null ? I : bit.or(I);
}
}
return bit;
}
}
/**
*
*/
package bitmapSearch;

import java.math.BigInteger;
import java.util.Map;

/**
* Is查询
*
* @author houkangxi
*
*/
class CriteriaOpIs extends Criteria {
private Object ov;

CriteriaOpIs(Object k, Object ov) {
super(k);
this.ov = ov;
}

@Override
protected BigInteger proc(BitmapSearcher sea, BigInteger prev) {
Map<Object, BigInteger> bimap = sea.getBitmap(key);
if (bimap == null) {
return null;
}
return bimap.get(ov);
}
}
/**
*
*/
package bitmapSearch;

import java.math.BigInteger;
import java.util.Map;

/**
* NOT (非)查询
* @author houkangxi
*
*/
class CriteriaOpNot extends Criteria {
private Object ov;

CriteriaOpNot(Object k, Object ov) {
super(k);
this.ov = ov;
}

@Override
protected BigInteger proc(BitmapSearcher sea, BigInteger prev) {
Map<Object, BigInteger> bimap = sea.getBitmap(key);
if (bimap == null) {
return null;
}
BigInteger b = bimap.get(ov);
if (b == null) {
return null;
}
return b.not();
}
}
/**
*
*/
package bitmapSearch;

import java.math.BigInteger;

/**
* Or (或)查询
* @author houkangxi
*
*/
class CriteriaOpOr extends CriteriaChain {

CriteriaOpOr(String k) {
super(k);
}

CriteriaOpOr(Criteria[] list) {
super(list);
}

@Override
protected BigInteger op(BigInteger o1, BigInteger o2) {
if (o2 == null) {
return o1;
}
if (o1 == null) {
return o2;
}
return o1.or(o2);
}
}


                                            
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息