solr dataimport 数据导入源码分析 补充
2012-09-10 03:19
513 查看
上部分的代码还可以进一步优化,主要是构建Collection<SolrInputDocument> 集合,分批次提交,优化新增索引速度
其实分页方式也是分批次提交的,不过这种方式 更优雅
参考如下代码
import java.io.IOException;
import java.net.MalformedURLException;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Types;
import java.util.ArrayList;
import java.util.Collection;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.common.SolrInputDocument;
public class Test
{
private static int fetchSize = 1000;
private static String url = "http://localhost:8983/solr/core1/";
private static CommonsHttpSolrServer solrCore;
public Test() throws MalformedURLException
{
solrCore = new CommonsHttpSolrServer(url);
}
/**
* Takes an SQL ResultSet and adds the documents to solr. Does it in batches
* of fetchSize.
*
* @param rs
* A ResultSet from the database.
* @return The number of documents added to solr.
* @throws SQLException
* @throws SolrServerException
* @throws IOException
*/
public long addResultSet(ResultSet rs) throws SQLException,
SolrServerException, IOException
{
long count = 0;
int innerCount = 0;
Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
ResultSetMetaData rsm = rs.getMetaData();
int numColumns = rsm.getColumnCount();
String[] colNames = new String[numColumns + 1];
/**
* JDBC numbers the columns starting at 1, so the normal java convention
* of starting at zero won't work.
*/
for (int i = 1; i < (numColumns + 1); i++)
{
colNames[i] = rsm.getColumnName(i);
/**
* If there are fields that you want to handle manually, check for
* them here and change that entry in colNames to null. This will
* cause the loop in the next section to skip that database column.
*/
// //Example:
// if (rsm.getColumnName(i) == "db_id")
// {
// colNames[i] = null;
// }
}
while (rs.next())
{
count++;
innerCount++;
SolrInputDocument doc = new SolrInputDocument();
/**
* At this point, take care of manual document field assignments for
* which you previously assigned the colNames entry to null.
*/
// //Example:
// doc.addField("solr_db_id", rs.getLong("db_id"));
for (int j = 1; j < (numColumns + 1); j++)
{
if (colNames[j] != null)
{
Object f;
switch (rsm.getColumnType(j))
{
case Types.BIGINT:
{
f = rs.getLong(j);
break;
}
case Types.INTEGER:
{
f = rs.getInt(j);
break;
}
case Types.DATE:
{
f = rs.getDate(j);
break;
}
case Types.FLOAT:
{
f = rs.getFloat(j);
break;
}
case Types.DOUBLE:
{
f = rs.getDouble(j);
break;
}
case Types.TIME:
{
f = rs.getDate(j);
break;
}
case Types.BOOLEAN:
{
f = rs.getBoolean(j);
break;
}
default:
{
f = rs.getString(j);
}
}
doc.addField(colNames[j], f);
}
}
docs.add(doc);
/**
* When we reach fetchSize, index the documents and reset the inner
* counter.
*/
if (innerCount == fetchSize)
{
solrCore.add(docs);
docs.clear();
innerCount = 0;
}
}
/**
* If the outer loop ended before the inner loop reset, index the
* remaining documents.
*/
if (innerCount != 0)
{
solrCore.add(docs);
}
return count;
}
}
其实分页方式也是分批次提交的,不过这种方式 更优雅
参考如下代码
import java.io.IOException;
import java.net.MalformedURLException;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Types;
import java.util.ArrayList;
import java.util.Collection;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.common.SolrInputDocument;
public class Test
{
private static int fetchSize = 1000;
private static String url = "http://localhost:8983/solr/core1/";
private static CommonsHttpSolrServer solrCore;
public Test() throws MalformedURLException
{
solrCore = new CommonsHttpSolrServer(url);
}
/**
* Takes an SQL ResultSet and adds the documents to solr. Does it in batches
* of fetchSize.
*
* @param rs
* A ResultSet from the database.
* @return The number of documents added to solr.
* @throws SQLException
* @throws SolrServerException
* @throws IOException
*/
public long addResultSet(ResultSet rs) throws SQLException,
SolrServerException, IOException
{
long count = 0;
int innerCount = 0;
Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
ResultSetMetaData rsm = rs.getMetaData();
int numColumns = rsm.getColumnCount();
String[] colNames = new String[numColumns + 1];
/**
* JDBC numbers the columns starting at 1, so the normal java convention
* of starting at zero won't work.
*/
for (int i = 1; i < (numColumns + 1); i++)
{
colNames[i] = rsm.getColumnName(i);
/**
* If there are fields that you want to handle manually, check for
* them here and change that entry in colNames to null. This will
* cause the loop in the next section to skip that database column.
*/
// //Example:
// if (rsm.getColumnName(i) == "db_id")
// {
// colNames[i] = null;
// }
}
while (rs.next())
{
count++;
innerCount++;
SolrInputDocument doc = new SolrInputDocument();
/**
* At this point, take care of manual document field assignments for
* which you previously assigned the colNames entry to null.
*/
// //Example:
// doc.addField("solr_db_id", rs.getLong("db_id"));
for (int j = 1; j < (numColumns + 1); j++)
{
if (colNames[j] != null)
{
Object f;
switch (rsm.getColumnType(j))
{
case Types.BIGINT:
{
f = rs.getLong(j);
break;
}
case Types.INTEGER:
{
f = rs.getInt(j);
break;
}
case Types.DATE:
{
f = rs.getDate(j);
break;
}
case Types.FLOAT:
{
f = rs.getFloat(j);
break;
}
case Types.DOUBLE:
{
f = rs.getDouble(j);
break;
}
case Types.TIME:
{
f = rs.getDate(j);
break;
}
case Types.BOOLEAN:
{
f = rs.getBoolean(j);
break;
}
default:
{
f = rs.getString(j);
}
}
doc.addField(colNames[j], f);
}
}
docs.add(doc);
/**
* When we reach fetchSize, index the documents and reset the inner
* counter.
*/
if (innerCount == fetchSize)
{
solrCore.add(docs);
docs.clear();
innerCount = 0;
}
}
/**
* If the outer loop ended before the inner loop reset, index the
* remaining documents.
*/
if (innerCount != 0)
{
solrCore.add(docs);
}
return count;
}
}
相关文章推荐
- solr dataimport 数据导入源码分析(十)总结
- solr dataimport 数据导入源码分析(四)
- solr dataimport 数据导入源码分析(十三)
- solr dataimport 数据导入源码分析(五)
- solr dataimport 数据导入源码分析(十四)
- solr dataimport 数据导入源码分析(六)
- solr dataimport 数据导入源码分析(一)
- solr dataimport 数据导入源码分析(十二)
- solr dataimport 数据导入源码分析(二)
- solr dataimport 数据导入源码分析(三)
- solr dataimport 数据导入源码分析(七)
- solr dataimport 数据导入源码分析(八)
- solr dataimport 数据导入源码分析(九)
- Solr1.4.0源码分析(一) 解决DataImportHandler从数据库导入大量数据而内存溢出的问题
- Solr学习笔记之3、Solr dataimport - 从SQLServer导入数据建立索引
- Solr4:数据导入(dataimport)时,不符合Solr日期类型要求的字段的处理
- solr3.6数据导入DataImport实现
- 20161216-solr cloud 集群数据导入(dataimport)笔记
- Solr-5.3.1 dataimport 导入mysql数据
- 如何从Excel中把数据导入到SharePoint List(Import Excel data to SharePoint List)