您的位置：首页 > 大数据

大数据应用之HBase数据插入性能优化之多线程并行插入测试案例

2013-10-10 23:01 671 查看

一、引言：

　　上篇文章提起关于HBase插入性能优化设计到的五个参数，从参数配置的角度给大家提供了一个性能测试环境的实验代码。根据网友的反馈，基于单线程的模式实现的数据插入毕竟有限。通过个人实测，在我的虚拟机环境下，单线程插入数据的值约为4w/s。集群指标是：CPU双核1.83，虚拟机512M内存，集群部署单点模式。本文给出了基于多线程并发模式的，测试代码案例和实测结果，希望能给大家一些启示：

二、源程序：

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.client.Put;

public class HBaseImportEx {
static Configuration hbaseConfig = null;
public static HTablePool pool = null;
public static String tableName = "T_TEST_1";
static{
//conf = HBaseConfiguration.create();
Configuration HBASE_CONFIG = new Configuration();
HBASE_CONFIG.set("hbase.master", "192.168.230.133:60000");
HBASE_CONFIG.set("hbase.zookeeper.quorum", "192.168.230.133");
HBASE_CONFIG.set("hbase.zookeeper.property.clientPort", "2181");
hbaseConfig = HBaseConfiguration.create(HBASE_CONFIG);

pool = new HTablePool(hbaseConfig, 1000);
}
/*
* Insert Test single thread
* */
public static void SingleThreadInsert()throws IOException
{
System.out.println("---------开始SingleThreadInsert测试----------");
long start = System.currentTimeMillis();
//HTableInterface table = null;
HTable table = null;
table = (HTable)pool.getTable(tableName);
table.setAutoFlush(false);
table.setWriteBufferSize(24*1024*1024);
//构造测试数据
List<Put> list = new ArrayList<Put>();
int count = 10000;
byte[] buffer = new byte[350];
Random rand = new Random();
for(int i=0;i<count;i++)
{
Put put = new Put(String.format("row %d",i).getBytes());
rand.nextBytes(buffer);
put.add("f1".getBytes(), null, buffer);
//wal=false
put.setWriteToWAL(false);
list.add(put);
if(i%10000 == 0)
{
table.put(list);
list.clear();
table.flushCommits();
}
}
long stop = System.currentTimeMillis();
//System.out.println("WAL="+wal+",autoFlush="+autoFlush+",buffer="+writeBuffer+",count="+count);

System.out.println("插入数据："+count+"共耗时："+ (stop - start)*1.0/1000+"s");

System.out.println("---------结束SingleThreadInsert测试----------");
}
/*
* 多线程环境下线程插入函数
*
* */
public static void InsertProcess()throws IOException
{
long start = System.currentTimeMillis();
//HTableInterface table = null;
HTable table = null;
table = (HTable)pool.getTable(tableName);
table.setAutoFlush(false);
table.setWriteBufferSize(24*1024*1024);
//构造测试数据
List<Put> list = new ArrayList<Put>();
int count = 10000;
byte[] buffer = new byte[256];
Random rand = new Random();
for(int i=0;i<count;i++)
{
Put put = new Put(String.format("row %d",i).getBytes());
rand.nextBytes(buffer);
put.add("f1".getBytes(), null, buffer);
//wal=false
put.setWriteToWAL(false);
list.add(put);
if(i%10000 == 0)
{
table.put(list);
list.clear();
table.flushCommits();
}
}
long stop = System.currentTimeMillis();
//System.out.println("WAL="+wal+",autoFlush="+autoFlush+",buffer="+writeBuffer+",count="+count);

System.out.println("线程:"+Thread.currentThread().getId()+"插入数据："+count+"共耗时："+ (stop - start)*1.0/1000+"s");
}

/*
* Mutil thread insert test
* */
public static void MultThreadInsert() throws InterruptedException
{
System.out.println("---------开始MultThreadInsert测试----------");
long start = System.currentTimeMillis();
int threadNumber = 10;
Thread[] threads=new Thread[threadNumber];
for(int i=0;i<threads.length;i++)
{
threads[i]= new ImportThread();
threads[i].start();
}
for(int j=0;j< threads.length;j++)
{
(threads[j]).join();
}
long stop = System.currentTimeMillis();

System.out.println("MultThreadInsert："+threadNumber*10000+"共耗时："+ (stop - start)*1.0/1000+"s");
System.out.println("---------结束MultThreadInsert测试----------");
}

/**
* @param args
*/
public static void main(String[] args)  throws Exception{
// TODO Auto-generated method stub
//SingleThreadInsert();
MultThreadInsert();

}

public static class ImportThread extends Thread{
public void HandleThread()
{
//this.TableName = "T_TEST_1";

}
//
public void run(){
try{
InsertProcess();
}
catch(IOException e){
e.printStackTrace();
}finally{
System.gc();
}
}
}

}

三、说明

1.线程数设置需要根据本集群硬件参数，实际测试得出。否则线程过多的情况下，总耗时反而是下降的。

2.单笔提交数对性能的影响非常明显，需要在自己的环境下，找到最理想的数值，这个需要与单条记录的字节数相关。

四、测试结果

---------开始MultThreadInsert测试----------

线程:8插入数据：10000共耗时：1.328s
线程:16插入数据：10000共耗时：1.562s
线程:11插入数据：10000共耗时：1.562s
线程:10插入数据：10000共耗时：1.812s
线程:13插入数据：10000共耗时：2.0s
线程:17插入数据：10000共耗时：2.14s
线程:14插入数据：10000共耗时：2.265s
线程:9插入数据：10000共耗时：2.468s
线程:15插入数据：10000共耗时：2.562s
线程:12插入数据：10000共耗时：2.671s
MultThreadInsert：100000共耗时：2.703s
---------结束MultThreadInsert测试----------

备注：该技术专题讨论正在群Hadoop高级交流群：293503507同步直播中，敬请关注。

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航