您的位置:首页 > 编程语言 > Java开发

ForeResult.java

2015-11-05 16:18 411 查看
ForeResult.java(输出:Result2.txt)将ExampleToTerms2.txt中到产品库匹配最相似的200条

package test;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

public class ForeResult {
public static double[] bubbleSort(double[] a,int[] b)
{
for (int i = 0; i < 200; i++)
{
for (int j = i + 1; j < a.length; j++)
{
if(a[i] < a[j])
{
double temp;
int temp1;
int temp2;
temp = a[j];
a[j] = a[i];
a[i] = temp;
temp1 = b[j];
b[j] = b[i];
b[i] = temp1;

}
}
}
return a;
}
public static int count(String[] s1,String[] s2)
{
int count1=0;
for(int k=0;k<s2.length;k++)
{
for(int j=0;j<s1.length;j++)
{
if(s2[k].equals(s1[j]))
count1++;
}
}
return count1;
}
public static List<String> getAllSameElement2(String[] strArr1,String[] strArr2)
{
if(strArr1 == null || strArr2 == null) {
return null;
}
Arrays.sort(strArr1);
Arrays.sort(strArr2);

List<String> list = new ArrayList<String>();

int k = 0;
int j = 0;
while(k<strArr1.length && j<strArr2.length) {
if(strArr1[k].compareTo(strArr2[j])==0) {
if(strArr1[k].equals(strArr2[j]) ) {
list.add(strArr1[k]);
k++;
j++;
}
continue;
} else  if(strArr1[k].compareTo(strArr2[j])<0){
k++;
} else {
j++;
}
}
return list;
}
public static void appendMethod(String fileName, String content) {
try {
//打开一个写文件器,构造函数中的第二个参数true表示以追加形式写文件
FileWriter writer = new FileWriter(fileName, true);
writer.write(content);
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static String[] getStrings() {
FileInputStream fis3;
InputStreamReader isr3;
BufferedReader br3 = null;
try {
//fis3 = new FileInputStream("/public/home/dsj/Public/sundujing/fpgrowth/ExampleToTerms1.txt");
fis3 = new FileInputStream("/public/home/dsj/Public/sundujing/fpgrowth/di.txt");

//              fis = new FileInputStream("D://ToCats.txt");
isr3 = new InputStreamReader(fis3, "UTF-8");
br3 = new BufferedReader(isr3);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
String[] strings = new String[5462];
int i=0;
String str;
try {
while ((str = br3.readLine()) != null) {

strings[i] = str;
i++;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return strings;
}
public static void main(String[] args)
{

String[] strings1 = getStrings();
try {
//while ((str = br.readLine()) != null)
for(int i1=0;i1<5462;i1++)
{

String fileName = "/public/home/dsj/Public/sundujing/fpgrowth/Result4.txt";
String content;

//System.out.println(str+"str");
//              String[] str1 = strings1[i1].split(",");
//              for(int i=0;i<str1.length;i++)//对于每个需要匹配相似的商品
//              {
//                  Map<String,String> map = new HashMap<String,String>();
//                  map.clear();
String[] str2=strings1[i1].split(",");//对于每个商品的分词

FileInputStream fis2;
InputStreamReader isr2;
BufferedReader br2 = null;
try {
fis2 = new FileInputStream("/public/home/dsj/Public/sundujing/fpgrowth/dim_items.txt");
//fis2 = new FileInputStream("D://dim_fashion_matchsets.txt");
isr2 = new InputStreamReader(fis2, "UTF-8");
br2 = new BufferedReader(isr2);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
String[] strings = new String[499983];
int[] id=new int[499983];
double[] simila=new double[499983];
int j=0;
String str3;
try {
while ((str3 = br2.readLine()) != null)
{
String[] str4 = str3.split(" ");
if(str4.length>2)
{
id[j]=Integer.parseInt(str4[0]);
strings[j] = str4[2];//164790,105471,86400,114225,190615,50713,58540,117806,23594,42881,142870,181931,140701,195056,39064
String[] str5=strings[j].split(",");
//simila[j]=(double)count(str2,str5)/str2.length;

List<String> list=getAllSameElement2(str2,str5);
simila[j]=(double)list.size()/str2.length;
//System.out.println(list.size());
//计算相似度str5与str2的相似度
//map.put(str4[0], "simila[j]");
j++;
}

}
} catch (IOException e) {e.printStackTrace();}
//sort simila,id
//                     List<Map.Entry<String,String>> list=new ArrayList<Map.Entry<String,String>>(map.entrySet());
//                      Collections.sort(list,new Comparator<Map.Entry<String,String>>(){
//                          public int compare(Entry<String,String> o1,Entry<String,String> o2){
//                              return o2.getValue().compareTo(o1.getValue());
//                          }
//                      }
//                      );

bubbleSort(simila,id);

//select Top200id,结尾用;隔开,后续再根据概率选取
for(int k = 0;k<200;k++)
{
//appendMethod(fileName, list.get(k).getKey());
appendMethod(fileName, id[k]+"");
appendMethod(fileName, ",");
//System.out.println("key:" + list.get(k).getKey() + "---Value:" + list.get(k).getValue() );
}
//appendMethod(fileName, ";");
//                      map.clear();
//                      list.clear();
//                      list.removeAll(list);
//              }
appendMethod(fileName, "\n");

}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: