MapReduce处理数据(用户使用过的产品)
2014-09-10 09:32
162 查看
数据格式如下:
/* 0 */
{
"_id" : 1,
"logId" : "078EF19407AC4798882A7588B358B517",
"logType" : 0,
"username" : "0_{2001",
"userIdentityIds" : -1,
"productId" : 5,
"version" : "11.0.4",
"versionVal" : NumberLong("110000000004"),
"lockNumber" : "91487400000009bc",
"isLegalCopy" : 1,
"provinceId" : -2,
"cityId" : -2,
"loginOutIP" : "192.168.2.26",
"loginInIP" : "192.168.2.26",
"loginTime" : new Date("21/8/2014 09:36:38"),
"logoutTime" : new Date("21/8/2014 09:36:38"),
"addTime" : new Date("21/8/2014 09:36:38"),
"hardwareCodes" : ["dcc952a3855e0ca72e57c0632e0cd45e"],
"enterpriseId" : 0
}
/* 1 */
{
"_id" : 2,
"logId" : "4cb2ca79-743b-4791-8afe-cb5441745240",
"logType" : 1,
"username" : "0_elaine_yong",
"userIdentityIds" : -1,
"productId" : 3,
"version" : "21.2.0",
"versionVal" : NumberLong("210000200000"),
"lockNumber" : "91487400000009bc",
"isLegalCopy" : 1,
"provinceId" : -2,
"cityId" : -2,
"loginOutIP" : "192.168.2.22",
"loginInIP" : "192.168.2.22",
"loginTime" : new Date("21/8/2014 09:37:05"),
"logoutTime" : new Date("21/8/2014 09:37:05"),
"addTime" : new Date("21/8/2014 09:37:05"),
"hardwareCodes" : ["9290dc7e739cacad3460497fc0db1945"],
"enterpriseId" : 0
}
/* 2 */
{
"_id" : 3,
"logId" : "4cb2ca79-743b-4791-8afe-cb5441745240",
"logType" : 1,
"username" : "0_elaine_yong",
"userIdentityIds" : -1,
"productId" : 3,
"version" : "21.2.0",
"versionVal" : NumberLong("210000200000"),
"lockNumber" : "91487400000009bc",
"isLegalCopy" : 1,
"provinceId" : -2,
"cityId" : -2,
"loginOutIP" : "192.168.2.22",
"loginInIP" : "192.168.2.22",
"loginTime" : new Date("21/8/2014 09:37:17"),
"logoutTime" : new Date("21/8/2014 09:37:17"),
"addTime" : new Date("21/8/2014 09:37:17"),
"hardwareCodes" : ["9290dc7e739cacad3460497fc0db1945"],
"enterpriseId" : 0
}
/* 3 */
{
"_id" : 4,
"logId" : "4cb2ca79-743b-4791-8afe-cb5441745240",
"logType" : 1,
"username" : "0_elaine_yong",
"userIdentityIds" : -1,
"productId" : 5,
"version" : "21.2.0",
"versionVal" : NumberLong("210000200000"),
"lockNumber" : "91487400000009bc",
"isLegalCopy" : 1,
"provinceId" : -2,
"cityId" : -2,
"loginOutIP" : "192.168.2.22",
"loginInIP" : "192.168.2.22",
"loginTime" : new Date("21/8/2014 09:37:29"),
"logoutTime" : new Date("21/8/2014 09:37:29"),
"addTime" : new Date("21/8/2014 09:37:29"),
"hardwareCodes" : ["9290dc7e739cacad3460497fc0db1945"],
"enterpriseId" : 0
}
/* 4 */
{
"_id" : 5,
"logId" : "078EF19407AC4798882A7588B358B517",
"logType" : 0,
"username" : "0_{2001",
"userIdentityIds" : -1,
"productId" : 2,
"version" : "11.0.4",
"versionVal" : NumberLong("110000000004"),
"lockNumber" : "91487400000009bc",
"isLegalCopy" : 1,
"provinceId" : -2,
"cityId" : -2,
"loginOutIP" : "192.168.2.26",
"loginInIP" : "192.168.2.26",
"loginTime" : new Date("21/8/2014 09:40:27"),
"logoutTime" : new Date("21/8/2014 09:40:27"),
"addTime" : new Date("21/8/2014 09:40:27"),
"hardwareCodes" : ["dcc952a3855e0ca72e57c0632e0cd45e"],
"enterpriseId" : 0
}
/* 5 */
{
"_id" : 6,
"logId" : "078EF19407AC4798882A7588B358B517",
"logType" : 0,
"username" : "0_{2001",
"userIdentityIds" : -1,
"productId" : 5,
"version" : "11.0.4",
"versionVal" : NumberLong("110000000004"),
"lockNumber" : "91487400000009bc",
"isLegalCopy" : 1,
"provinceId" : -2,
"cityId" : -2,
"loginOutIP" : "192.168.2.26",
"loginInIP" : "192.168.2.26",
"loginTime" : new Date("21/8/2014 09:41:41"),
"logoutTime" : new Date("21/8/2014 09:41:41"),
"addTime" : new Date("21/8/2014 09:41:41"),
"hardwareCodes" : ["dcc952a3855e0ca72e57c0632e0cd45e"],
"enterpriseId" : 0
}
/* 6 */
{
"_id" : 7,
"logId" : "078EF19407AC4798882A7588B358B517",
"logType" : 0,
"username" : "0_{2001",
"userIdentityIds" : -1,
"productId" : 11,
"version" : "11.0.4",
"versionVal" : NumberLong("110000000004"),
"lockNumber" : "91487400000009bc",
"isLegalCopy" : 1,
"provinceId" : -2,
"cityId" : -2,
"loginOutIP" : "192.168.2.26",
"loginInIP" : "192.168.2.26",
"loginTime" : new Date("21/8/2014 09:41:52"),
"logoutTime" : new Date("21/8/2014 09:41:52"),
"addTime" : new Date("21/8/2014 09:41:52"),
"hardwareCodes" : ["dcc952a3855e0ca72e57c0632e0cd45e"],
"enterpriseId" : 0
}
统计出每个用户使用过的产品,一个用户只能有一条数据,产品使用数组存储
要求增量处理,比如第一次执行处理数据到3条,那么条件则是_id>0并且_id<4
下次处理从4开始,_id>3并且<最大ID
db.testlogin.mapReduce(
function() {
var array = new Array();
array.push(this.productId);
emit(
this.username,
{"username":this.username,"productIds": array}
);
},
function(key, values) {
//判断产品ID是否存在,存在则不添加
var checkProductIdExists = function(pid,pids){
var flag = false;
for(var j in pids){
if(pids[j] == pid){
flag = true;
break;
}
}
return flag;
}
var array = new Array();
var reduced = {username:'', productIds:array};
for(var i in values){
var obj = values[i];
for(var j in obj.productIds){
if(!checkProductIdExists(obj.productIds[j],reduced.productIds)){
reduced.productIds.push(obj.productIds[j]);
reduced.username = values[i].username;
}
}
}
return reduced;
},
{
out:{"reduce":"testloginMapReduce"},
query:{_id:{$gt: 0}},
}
);
统计结果:
{
"_id" : "0_elaine_yong",
"value" : {
"username" : "0_elaine_yong",
"productIds" : [5.0, 3.0]
}
}
/* 1 */
{
"_id" : "0_{2001",
"value" : {
"username" : "0_{2001",
"productIds" : [2.0, 5.0, 11.0]
}
}
/* 0 */
{
"_id" : 1,
"logId" : "078EF19407AC4798882A7588B358B517",
"logType" : 0,
"username" : "0_{2001",
"userIdentityIds" : -1,
"productId" : 5,
"version" : "11.0.4",
"versionVal" : NumberLong("110000000004"),
"lockNumber" : "91487400000009bc",
"isLegalCopy" : 1,
"provinceId" : -2,
"cityId" : -2,
"loginOutIP" : "192.168.2.26",
"loginInIP" : "192.168.2.26",
"loginTime" : new Date("21/8/2014 09:36:38"),
"logoutTime" : new Date("21/8/2014 09:36:38"),
"addTime" : new Date("21/8/2014 09:36:38"),
"hardwareCodes" : ["dcc952a3855e0ca72e57c0632e0cd45e"],
"enterpriseId" : 0
}
/* 1 */
{
"_id" : 2,
"logId" : "4cb2ca79-743b-4791-8afe-cb5441745240",
"logType" : 1,
"username" : "0_elaine_yong",
"userIdentityIds" : -1,
"productId" : 3,
"version" : "21.2.0",
"versionVal" : NumberLong("210000200000"),
"lockNumber" : "91487400000009bc",
"isLegalCopy" : 1,
"provinceId" : -2,
"cityId" : -2,
"loginOutIP" : "192.168.2.22",
"loginInIP" : "192.168.2.22",
"loginTime" : new Date("21/8/2014 09:37:05"),
"logoutTime" : new Date("21/8/2014 09:37:05"),
"addTime" : new Date("21/8/2014 09:37:05"),
"hardwareCodes" : ["9290dc7e739cacad3460497fc0db1945"],
"enterpriseId" : 0
}
/* 2 */
{
"_id" : 3,
"logId" : "4cb2ca79-743b-4791-8afe-cb5441745240",
"logType" : 1,
"username" : "0_elaine_yong",
"userIdentityIds" : -1,
"productId" : 3,
"version" : "21.2.0",
"versionVal" : NumberLong("210000200000"),
"lockNumber" : "91487400000009bc",
"isLegalCopy" : 1,
"provinceId" : -2,
"cityId" : -2,
"loginOutIP" : "192.168.2.22",
"loginInIP" : "192.168.2.22",
"loginTime" : new Date("21/8/2014 09:37:17"),
"logoutTime" : new Date("21/8/2014 09:37:17"),
"addTime" : new Date("21/8/2014 09:37:17"),
"hardwareCodes" : ["9290dc7e739cacad3460497fc0db1945"],
"enterpriseId" : 0
}
/* 3 */
{
"_id" : 4,
"logId" : "4cb2ca79-743b-4791-8afe-cb5441745240",
"logType" : 1,
"username" : "0_elaine_yong",
"userIdentityIds" : -1,
"productId" : 5,
"version" : "21.2.0",
"versionVal" : NumberLong("210000200000"),
"lockNumber" : "91487400000009bc",
"isLegalCopy" : 1,
"provinceId" : -2,
"cityId" : -2,
"loginOutIP" : "192.168.2.22",
"loginInIP" : "192.168.2.22",
"loginTime" : new Date("21/8/2014 09:37:29"),
"logoutTime" : new Date("21/8/2014 09:37:29"),
"addTime" : new Date("21/8/2014 09:37:29"),
"hardwareCodes" : ["9290dc7e739cacad3460497fc0db1945"],
"enterpriseId" : 0
}
/* 4 */
{
"_id" : 5,
"logId" : "078EF19407AC4798882A7588B358B517",
"logType" : 0,
"username" : "0_{2001",
"userIdentityIds" : -1,
"productId" : 2,
"version" : "11.0.4",
"versionVal" : NumberLong("110000000004"),
"lockNumber" : "91487400000009bc",
"isLegalCopy" : 1,
"provinceId" : -2,
"cityId" : -2,
"loginOutIP" : "192.168.2.26",
"loginInIP" : "192.168.2.26",
"loginTime" : new Date("21/8/2014 09:40:27"),
"logoutTime" : new Date("21/8/2014 09:40:27"),
"addTime" : new Date("21/8/2014 09:40:27"),
"hardwareCodes" : ["dcc952a3855e0ca72e57c0632e0cd45e"],
"enterpriseId" : 0
}
/* 5 */
{
"_id" : 6,
"logId" : "078EF19407AC4798882A7588B358B517",
"logType" : 0,
"username" : "0_{2001",
"userIdentityIds" : -1,
"productId" : 5,
"version" : "11.0.4",
"versionVal" : NumberLong("110000000004"),
"lockNumber" : "91487400000009bc",
"isLegalCopy" : 1,
"provinceId" : -2,
"cityId" : -2,
"loginOutIP" : "192.168.2.26",
"loginInIP" : "192.168.2.26",
"loginTime" : new Date("21/8/2014 09:41:41"),
"logoutTime" : new Date("21/8/2014 09:41:41"),
"addTime" : new Date("21/8/2014 09:41:41"),
"hardwareCodes" : ["dcc952a3855e0ca72e57c0632e0cd45e"],
"enterpriseId" : 0
}
/* 6 */
{
"_id" : 7,
"logId" : "078EF19407AC4798882A7588B358B517",
"logType" : 0,
"username" : "0_{2001",
"userIdentityIds" : -1,
"productId" : 11,
"version" : "11.0.4",
"versionVal" : NumberLong("110000000004"),
"lockNumber" : "91487400000009bc",
"isLegalCopy" : 1,
"provinceId" : -2,
"cityId" : -2,
"loginOutIP" : "192.168.2.26",
"loginInIP" : "192.168.2.26",
"loginTime" : new Date("21/8/2014 09:41:52"),
"logoutTime" : new Date("21/8/2014 09:41:52"),
"addTime" : new Date("21/8/2014 09:41:52"),
"hardwareCodes" : ["dcc952a3855e0ca72e57c0632e0cd45e"],
"enterpriseId" : 0
}
统计出每个用户使用过的产品,一个用户只能有一条数据,产品使用数组存储
要求增量处理,比如第一次执行处理数据到3条,那么条件则是_id>0并且_id<4
下次处理从4开始,_id>3并且<最大ID
db.testlogin.mapReduce(
function() {
var array = new Array();
array.push(this.productId);
emit(
this.username,
{"username":this.username,"productIds": array}
);
},
function(key, values) {
//判断产品ID是否存在,存在则不添加
var checkProductIdExists = function(pid,pids){
var flag = false;
for(var j in pids){
if(pids[j] == pid){
flag = true;
break;
}
}
return flag;
}
var array = new Array();
var reduced = {username:'', productIds:array};
for(var i in values){
var obj = values[i];
for(var j in obj.productIds){
if(!checkProductIdExists(obj.productIds[j],reduced.productIds)){
reduced.productIds.push(obj.productIds[j]);
reduced.username = values[i].username;
}
}
}
return reduced;
},
{
out:{"reduce":"testloginMapReduce"},
query:{_id:{$gt: 0}},
}
);
统计结果:
{
"_id" : "0_elaine_yong",
"value" : {
"username" : "0_elaine_yong",
"productIds" : [5.0, 3.0]
}
}
/* 1 */
{
"_id" : "0_{2001",
"value" : {
"username" : "0_{2001",
"productIds" : [2.0, 5.0, 11.0]
}
}
相关文章推荐
- MapReduce处理数据(用户每周登录次数)
- 用mapreduce处理用户使用流量统计
- 使用Hadoop的MapReduce与HDFS处理数据
- SpringMVC(27):json数据的传递处理的示例(实现功能:使用jQuery框架的ajax()方法实现用户信息查看)
- 使用Hadoop的MapReduce与HDFS处理数据
- 使用MapReduce处理Hbase数据
- 字符串处理是许多程序中非常重要的一部分,它们可以用于文本显示,数据表示,查找键和很多目的.在Unix下,用户可以使用正则表达式的强健功能实现这些 目的,从Java1.4起,Java核心API就引入了java.util.regex程序包,它是一种有价值的基础
- 大数据采集、清洗、处理:使用MapReduce进行离线数据分析完整案例
- 使用MapReduce处理Hbase数据
- 使用OWC时处理有间断的数据的方法
- ORACLE用户常用数据字典的查询使用方法
- SQL Server 系统表使用-查询指定数据库中用户表及其列、数据类、长度
- 使用事务与锁,实现一个用户取过的数据不被其他用户取到
- 使用事务与锁,实现一个用户取过的数据不被其他用户取到
- 使用 EL、JSTL 处理表单数据(转载)
- 使用Hibernate处理数据
- 一步一步建网-4-Serv-U-1-使用MsSQL05管理用户数据
- 使用ADO.NET 和C# 处理BLOB 数据
- 使用数据2分处理的通用分页存储过程 前半部分与后半部分数据访问时间相同
- COM组件中使用用户自定义数据类型