您的位置：首页 > 理论基础 > 数据结构算法

1、一组N个数，确定其中第k个最大值

2017-11-28 15:25 1341 查看

一组N个数，确定其中第k个最大值

《数据结构与算法分析（C语言描述）》一书已购多日，没有时间看，今天重拾，发现确是一本不可多得的好书，从第一章开始看起，决定将书中的每个问题均码一遍，为了兼顾Python和Java的学习，本博客采用三种编程语言同时编写，直到笔者对Python和Java已经很熟练为止。

在本书的第1章引论的1.1节中（P1），作者给出了这样的一个问题：

设有一组N个数而要确定其中第k个最大者

作者给出了两种思考的方法：

1、将这N个数读进一个数组中，再通过简单的算法，比如冒泡排序算法，以递减规律排序，然后返回位置k上的数字

2、将前k个数读入数组并递减排序，然后将剩下的元素逐个读入，当新元素被读到时，如果它小于数组中的第k个元素，则忽略，否则就将其放到数组中正确的位置上，同时将数组中的一个元素挤出数组。当算法终止，位于第k个位置的元素返回

首先我们用下面Python代码生成一个存放在F盘的txt文档，保存了10000个随机浮点数。然后我们取第5000个最大的数。

# -*- coding:utf-8 -*-
# 采用Python3.6编写
import random
import datetime
import time

dataCount = 10000

def genDataBase1(fileName,dataCount):
outp = open(fileName,'w')
i = 0
while i<dataCount:
value=random.random()*dataCount
mLine = "%f\n"%(value)
outp.write(mLine)
i += 1
outp.close()

if __name__ == "__main__":
random.seed()
start = time.time()
genDataBase1('F:\Data Structures and Algorithm Analysis in C\\random_float.txt',dataCount)
end = time.time()
print('use time:',(end-start))
print('Ok')

1、以下是C++代码：

结论是利用快速排序比冒泡排序速度快很多。

C++是在release下生成解决方案，然后在命令行下运行exe程序得到的时间。

C++代码（利用库函数sort快速排序）（在VS2015中编译通过）

时间0.019s，答案为4987.73。

#include <iostream>
#include<string>
#include<fstream>
#include<vector>
#include <sstream>
#include <time.h>
#include<algorithm>
using namespace std;
int main()
{
//读取txt的输入流，要#include<fstream>
string filename = "F:\\Data Structures and Algorithm Analysis in C\\random_float.txt";
ifstream infile(filename.c_str());
string temp;
float value;
vector<float> list;
clock_t start_time = clock();//声明计时器对象并开始计时 ，要#include <time.h>
int k = 5000;

while (getline(infile, temp))
{
//利用字符串流将字符串转换为浮点数，要#include <sstream>
stringstream stream(temp);
stream >> value;
//将读入的浮点数添加到列表内
list.push_back(value);
}
sort(list.begin(), list.end()); //对vector的排序函数，要#include<algorithm>
value = list[10000 - 5000];

clock_t end_time = clock();
//static_cast是标准转换运算符，确保类型转换成功。CLOCKS_PER_SEC定义了每秒钟包含多少了时钟单元数
cout << "运行时间：" << static_cast<double>(end_time - start_time) / CLOCKS_PER_SEC << "s" << endl;//输出已流失的时间
cout << "答案为：" << value << endl;//输出在10000个数中第5000个大数的值。
getchar();
}

C++代码（利用冒泡排序）（在VS2015中编译通过）

时间0.147s，答案为4987.73。

#include <iostream>
#include<string>
#include<fstream>
#include<vector>
#include <sstream>
#include <time.h>
using namespace std;
int main()
{
//读取txt的输入流，要#include<fstream>
string filename = "F:\\Data Structures and Algorithm Analysis in C\\random_float.txt";
ifstream infile(filename.c_str());
string temp;
float value;
int index_out;
int index_in;
vector<float> list;
clock_t start_time=clock();//声明计时器对象并开始计时 ，要#include <time.h>
int k = 5000;

while (getline(infile, temp))
{
//利用字符串流将字符串转换为浮点数，要#include <sstream>
stringstream stream(temp);
stream >> value;
//将读入的浮点数添加到列表内
list.push_back(value);
}

//冒泡排序算法
for (index_out = 0; index_out < 10000-1; index_out++)
{
for (index_in = 0; index_in < 10000 - 1 - index_out; index_in++)
{
if (list[index_in] > list[index_in + 1])
{
value = list[index_in + 1];
list[index_in + 1] = list[index_in];
list[index_in] = value;
}
}
}
value = list[10000-5000];

clock_t end_time = clock();
//static_cast是标准转换运算符，确保类型转换成功。CLOCKS_PER_SEC定义了每秒钟包含多少了时钟单元数
cout << "运行时间：" << static_cast<double>(end_time - start_time) / CLOCKS_PER_SEC<< "s" << endl;//输出已流失的时间
cout << "答案为：" << value << endl;//输出在10000个数中第5000个大数的值。
getchar();
}

C++代码（库函数sort快速排序+挤出数组法）（在VS2015中编译通过）

时间0.171s，答案为4987.73。

#include <iostream>
#include<string>
#include<fstream>
#include<vector>
#include <sstream>
#include <time.h>
#include <algorithm>
using namespace std;

int main()
{
//读取txt的输入流，要#include<fstream>
string filename = "F:\\Data Structures and Algorithm Analysis in C\\random_float.txt";
ifstream infile(filename.c_str());
string temp;
float value;
int index_out;
int index_in;
vector<float> list;
clock_t start_time=clock();//声明计时器对象并开始计时 ，要#include <time.h>
int k = 5000;
int i = 0;
while (i<k&&getline(infile, temp))
{
//利用字符串流将字符串转换为浮点数，要#include <sstream>
stringstream stream(temp);
stream >> value;
//将读入的浮点数添加到列表内
list.push_back(value);
i++;
}

sort(list.begin(), list.end());

//将新数往数组内插入
while (getline(infile, temp))
{
stringstream stream(temp);
stream >> value;
if (value > list[0])
{
list[0] = value;
sort(list.begin(), list.end());
}
}

value = list[0];

clock_t end_time = clock();
//static_cast是标准转换运算符，确保类型转换成功。CLOCKS_PER_SEC定义了每秒钟包含多少了时钟单元数
cout << "运行时间：" << static_cast<double>(end_time - start_time) / CLOCKS_PER_SEC<< "s" << endl;//输出已流失的时间
cout << "答案为：" << value << endl;//输出在10000个数中第100个大数的值。
getchar();
}

C++代码（冒泡排序+挤出数组法）（在VS2015中编译通过）

时间0.061s，答案为4987.73。

#include <iostream>
#include<string>
#include<fstream>
#include<vector>
#include <sstream>
#include <time.h>
using namespace std;

//往原有数组内插入新数后重排顺序
void update(vector<float> &list) //函数中的形参要用引用值，而不用指针或其他，这也是对引用的一种复习吧
{
int index, k = 5000;
float value;
//冒泡排序算法
for (index = 0; index< k - 1; index++)
{
if (list[index] > list[index + 1])
{
value = list[index + 1];
list[index + 1] = list[index];
list[index] = value;
}
}
}

int main()
{
//读取txt的输入流，要#include<fstream>
string filename = "F:\\Data Structures and Algorithm Analysis in C\\random_float.txt";
ifstream infile(filename.c_str());
string temp;
float value;
int index_out;
int index_in;
vector<float> list;
clock_t start_time=clock();//声明计时器对象并开始计时 ，要#include <time.h>
int k = 5000;
int i = 0;
while (i<k&&getline(infile, temp))
{
//利用字符串流将字符串转换为浮点数，要#include <sstream>
stringstream stream(temp);
stream >> value;
//将读入的浮点数添加到列表内
list.push_back(value);
i++;
}

//冒泡排序算法
for (index_out = 0; index_out < k-1; index_out++)
{
for (index_in = 0; index_in < k - 1 - index_out; index_in++)
{
if (list[index_in] > list[index_in + 1])
{
value = list[index_in + 1];
list[index_in + 1] = list[index_in];
list[index_in] = value;
}
}
}

//将新数往数组内插入
while (getline(infile, temp))
{
stringstream stream(temp);
stream >> value;
if (value > list[0])
{
list[0] = value;
//这里要在update函数中更新vector数组的实参值，因此要传入实参，函数中要用引用值
update(list);
}
}

value = list[0];

clock_t end_time = clock();
//static_cast是标准转换运算符，确保类型转换成功。CLOCKS_PER_SEC定义了每秒钟包含多少了时钟单元数
cout << "运行时间：" << static_cast<double>(end_time - start_time) / CLOCKS_PER_SEC<< "s" << endl;//输出已流失的时间
cout << "答案为：" << value << endl;//输出在10000个数中第5000个大数的值。
getchar();
}

2、以下是Python3.6代码：

Python代码1：不得不说，Python计算的速度、精确度都远比其他语言高，而且代码量很少。

时间0.004999876022338867s，答案为4987.7267。

#-*- coding:utf-8-*-
import string
import time
data=[]
start=time.time()
for line in open("F:\Data Structures and Algorithm Analysis in C\\random_float.txt"):
value=float(line)
data.append(value)
data.sort()
end=time.time()
print('use time:',(end-start))
print(data[5000])

Python代码2：注意Python传值而非传引用！

时间6.343999862670898s，答案为4987.7267。

#-*- coding:utf-8-*-
import string
import time

#完全冒泡排序算法
def sort_full(data):
output = data[:]  #这句话中间冒号说明是完全复制data数组的全部内容
for i in range(9999):  #这个范围是0,1,2,3,,,,9998
for j in range(9999-i):
if output[j]>output[j+1]:
#Python中交换两个值的方法很容易的
output[j],output[j+1] = output[j+1],output[j]
return output

if __name__=="__main__":
data=[]
start=time.time()
for line in open("F:\D
a060
ata Structures and Algorithm Analysis in C\\random_float.txt"):
value=float(line)
data.append(value)
data = sort_full(data)
end=time.time()
print('use time:',(end-start))
print(data[5000])

3、以下是Java代码：

先占坑

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签： python java 数据结构 c++语言

相关文章推荐

新的分享

章节导航