您的位置:首页 > 编程语言 > Java开发

Match_ToTerm.java

2015-11-05 16:13 267 查看
Match_ToTerm.java将match里的东西都变成分词结果,分词空格隔开,产品间可替代还是用逗号,搭配用;隔开,输出文件ToTerms2.txt

MatchDeleteLine.txt,输出文件ToTerms3.txt,将dim_fashion_matches.txt中第一列删除。

package test;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import redis.clients.jedis.Jedis;

public class Match_ToTerm {

public static void getStrings(int count) {
FileInputStream fis;
InputStreamReader isr;
BufferedReader br = null;
Jedis jedis;
String host = "10.20.100.5";
int port = 6379;
jedis = new Jedis(host, port);
try {

//fis = new FileInputStream("D://te.txt");
//fis = new FileInputStream("D://dim_items.txt");
fis = new FileInputStream("/public/home/dsj/Public/zfy/resources/dim_items.txt");
isr = new InputStreamReader(fis, "UTF-8");
br = new BufferedReader(isr);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
String[] strings = new String[1];
int i=0;
String str;
//  ArrayList<Map<String, Object>> list = new ArrayList<Map<String, Object>>();
try {
while ((str = br.readLine()) != null) {
count++;
strings[0] = str;
String[] str1 = str.split(" ");
//分割“ ”
Map<String, Object> map = new HashMap<String, Object>();
for(int k=0;k<3;k++)
{
map.put("item_id",str1[0]);
map.put("cat_id", str1[1]);
map.put("terms", str1[2]);
jedis.set(str1[0].getBytes(), writeObject(map));

}
//      list.add(map);

}
} catch (IOException e) {
e.printStackTrace();
}
//  return list;
}

private static byte[] writeObject(Object obj)//写对象
{
byte[] array = null;
try
{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream os = new ObjectOutputStream(baos);
os.writeObject(obj);
array =baos.toByteArray();
// System.out.println("序列化成功。");
// System.out.println(array);
}
catch(Exception ex)
{
ex.printStackTrace();
}
return array;
}

private static Object readObject( byte[] array)//读对象
{
try
{
ByteArrayInputStream bais = new ByteArrayInputStream(array);
ObjectInputStream is = new ObjectInputStream(bais);

Object temp = (Object) is.readObject();

if (temp != null)
{
//  System.out.println("反序列化成功。");
//              System.out.println("age"+temp.age);
//  System.out.println(temp);
return temp;
}
}
catch(Exception ex)
{
ex.printStackTrace();
}

return null;
}

public static void appendMethod(String fileName, String content) {
try {
//打开一个写文件器,构造函数中的第二个参数true表示以追加形式写文件
FileWriter writer = new FileWriter(fileName, true);
writer.write(content);
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}

public static void main(String args[])
{
Jedis jedis;
String host = "10.20.100.5";
int port = 6379;
jedis = new Jedis(host, port);
int count=0;
getStrings(count);
String fileName = "/public/home/dsj/Public/zfy/resources/ToTerms2.txt";
String content;

FileInputStream fis;
InputStreamReader isr;
BufferedReader br = null;
try {
fis = new FileInputStream("/public/home/dsj/Public/zfy/resources/dim_fashion_matchsets.txt");
isr = new InputStreamReader(fis, "UTF-8");
br = new BufferedReader(isr);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
String[] strings = new String[1];

String str;
try {
while ((str = br.readLine()) != null) {
strings[0] = str;
String[] str1 = str.split(" ");
//              System.out.println("str1[1]"+str1[1]);
String[] str2 = str1[1].split(";");
//          System.out.println(str2.length);
//          System.out.println(str2[0]);
for(int k=0;k<str2.length;k++)
{
//          System.out.println(str2[k].length());
if(str2[k].length()>7)
{
String[] str3 = str2[k].split(",");
for(int j=0;j<str3.length;j++)
{
//System.out.println(str3[j]);
//need replace
//  map=list.get(binarySearch(list, str3[j]));
byte[] mapbyte=jedis.get(str3[j].getBytes());
Object mapobj=  readObject(mapbyte);
//System.out.println("id"+((Map<String, Object>) mapobj).get("terms"));
//write file append
content=(String)((Map<String, Object>) mapobj).get("terms");

appendMethod(fileName, content.replaceAll(",", " "));
appendMethod(fileName, ",");
}
}else
{
//  System.out.println(str2[k]);
//need replace
//  map=list.get(binarySearch(list, str2[k]));
byte[] mapbyte=jedis.get(str2[k].getBytes());
Object mapobj=  readObject(mapbyte);
//System.out.println("id"+((Map<String, Object>) mapobj).get("terms"));
//write file append
content=(String)((Map<String, Object>) mapobj).get("terms");
appendMethod(fileName, content.replaceAll(",", " "));
appendMethod(fileName, ";");
}

}
appendMethod(fileName, "\n");
}
} catch (IOException e) {
e.printStackTrace();
}

}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: