Match_ToTerm.java
2015-11-05 16:13
267 查看
Match_ToTerm.java将match里的东西都变成分词结果,分词空格隔开,产品间可替代还是用逗号,搭配用;隔开,输出文件ToTerms2.txt
MatchDeleteLine.txt,输出文件ToTerms3.txt,将dim_fashion_matches.txt中第一列删除。
MatchDeleteLine.txt,输出文件ToTerms3.txt,将dim_fashion_matches.txt中第一列删除。
package test; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.UnsupportedEncodingException; import java.math.BigInteger; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import redis.clients.jedis.Jedis; public class Match_ToTerm { public static void getStrings(int count) { FileInputStream fis; InputStreamReader isr; BufferedReader br = null; Jedis jedis; String host = "10.20.100.5"; int port = 6379; jedis = new Jedis(host, port); try { //fis = new FileInputStream("D://te.txt"); //fis = new FileInputStream("D://dim_items.txt"); fis = new FileInputStream("/public/home/dsj/Public/zfy/resources/dim_items.txt"); isr = new InputStreamReader(fis, "UTF-8"); br = new BufferedReader(isr); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } String[] strings = new String[1]; int i=0; String str; // ArrayList<Map<String, Object>> list = new ArrayList<Map<String, Object>>(); try { while ((str = br.readLine()) != null) { count++; strings[0] = str; String[] str1 = str.split(" "); //分割“ ” Map<String, Object> map = new HashMap<String, Object>(); for(int k=0;k<3;k++) { map.put("item_id",str1[0]); map.put("cat_id", str1[1]); map.put("terms", str1[2]); jedis.set(str1[0].getBytes(), writeObject(map)); } // list.add(map); } } catch (IOException e) { e.printStackTrace(); } // return list; } private static byte[] writeObject(Object obj)//写对象 { byte[] array = null; try { ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutputStream os = new ObjectOutputStream(baos); os.writeObject(obj); array =baos.toByteArray(); // System.out.println("序列化成功。"); // System.out.println(array); } catch(Exception ex) { ex.printStackTrace(); } return array; } private static Object readObject( byte[] array)//读对象 { try { ByteArrayInputStream bais = new ByteArrayInputStream(array); ObjectInputStream is = new ObjectInputStream(bais); Object temp = (Object) is.readObject(); if (temp != null) { // System.out.println("反序列化成功。"); // System.out.println("age"+temp.age); // System.out.println(temp); return temp; } } catch(Exception ex) { ex.printStackTrace(); } return null; } public static void appendMethod(String fileName, String content) { try { //打开一个写文件器,构造函数中的第二个参数true表示以追加形式写文件 FileWriter writer = new FileWriter(fileName, true); writer.write(content); writer.close(); } catch (IOException e) { e.printStackTrace(); } } public static void main(String args[]) { Jedis jedis; String host = "10.20.100.5"; int port = 6379; jedis = new Jedis(host, port); int count=0; getStrings(count); String fileName = "/public/home/dsj/Public/zfy/resources/ToTerms2.txt"; String content; FileInputStream fis; InputStreamReader isr; BufferedReader br = null; try { fis = new FileInputStream("/public/home/dsj/Public/zfy/resources/dim_fashion_matchsets.txt"); isr = new InputStreamReader(fis, "UTF-8"); br = new BufferedReader(isr); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } String[] strings = new String[1]; String str; try { while ((str = br.readLine()) != null) { strings[0] = str; String[] str1 = str.split(" "); // System.out.println("str1[1]"+str1[1]); String[] str2 = str1[1].split(";"); // System.out.println(str2.length); // System.out.println(str2[0]); for(int k=0;k<str2.length;k++) { // System.out.println(str2[k].length()); if(str2[k].length()>7) { String[] str3 = str2[k].split(","); for(int j=0;j<str3.length;j++) { //System.out.println(str3[j]); //need replace // map=list.get(binarySearch(list, str3[j])); byte[] mapbyte=jedis.get(str3[j].getBytes()); Object mapobj= readObject(mapbyte); //System.out.println("id"+((Map<String, Object>) mapobj).get("terms")); //write file append content=(String)((Map<String, Object>) mapobj).get("terms"); appendMethod(fileName, content.replaceAll(",", " ")); appendMethod(fileName, ","); } }else { // System.out.println(str2[k]); //need replace // map=list.get(binarySearch(list, str2[k])); byte[] mapbyte=jedis.get(str2[k].getBytes()); Object mapobj= readObject(mapbyte); //System.out.println("id"+((Map<String, Object>) mapobj).get("terms")); //write file append content=(String)((Map<String, Object>) mapobj).get("terms"); appendMethod(fileName, content.replaceAll(",", " ")); appendMethod(fileName, ";"); } } appendMethod(fileName, "\n"); } } catch (IOException e) { e.printStackTrace(); } } }
相关文章推荐
- Spring mvc interceptor配置拦截器,没有登录跳到登录页
- Java 常见的几种排序算法-插入、选择、冒泡、快排、堆排等
- java动态代理的实现
- Java输出数组的内容
- Eclipse上面安装egit插件,以及上传项目到开源社区Git
- Eclipse生成jar文件
- java 多线程 实现窗口买票功能
- 【解决】SSH项目问题(7)java.lang.NoSuchMethodError: antlr.collections.AST.getLine()I
- java的动态代理机制详解
- java算法----判断字符串是否为数值型字符串
- 用xfire实现基于java的webservice接口调用
- RxJava学习资源
- leetcode 166:Fraction to Recurring Decimal java实现(细致分析)
- 怎样用MyEclipse编写调试JSP网页
- java中的多线程
- Java中Process和Runtime()使用,以及调用cmd命令阻塞在process.waitfor( )的问题解决
- mark Java NIO
- Spring拦截器中通过request获取到该请求对应Controller中的method对象
- java.net.UnknownHostException 异常处理
- java常用库学习(3)