您的位置：首页 > 其它

快速排序的优化

2012-11-04 17:49 197 查看

今天，我们组这周小会讲了讲算法导论上的快速排序优化问题；和大家分享，有什么问题，希望大家指正，谢谢！！！

1. 快速排序定义：

通过一趟排序将待排序数组分割成独立的两部分，其中一部分记录的关键字均比另一部分记录的关键字小，则可分别对这两部分记录继续进行排序，以达到整个序列有序。

它是采用分治思想，进行排序；每次选取一个枢轴，确定枢轴的位置，将原数组分成两部分，进行递归排序；

2. 普通排序算法

以下是 严蔚敏 课本上的数据结构代码的实现：

#include "stdafx.h"
#include <time.h>
#include <iostream>
#include <algorithm>
using namespace std;

const int LEN = 1000000; 

int  Partition(int* arr, int low, int high)
{
	int temp = arr[low];
	while(low < high)
	{
		while ( low < high && arr[high] >= temp )
		{
			high--;
		}
		arr[low] = arr[high];

		while ( low < high && arr[low] <= temp )
		{
			low++;
		}
		arr[high] = arr[low];
	}

	arr[low] = temp;	
	return low;
}

//数组的首地址，最小下标和最大下标
void QuickSort(int* arr, int low, int high)
{
	int pivotloc = -1;
	if ( high - low > 0 )
	{
		pivotloc = Partition(arr, low, high);
		QuickSort(arr, low, pivotloc-1);
		QuickSort(arr, pivotloc+1, high);
	}
}

int _tmain(int argc, _TCHAR* argv[])
{
	int* arr = new int[LEN];
	srand((unsigned)time(NULL));
	for (int i = 0; i < LEN; i++)
	{
		arr[i] = rand()%1000000;
	}

	QuickSort(arr,0,LEN-1);

	//输出结果
	cout<<"排序后的数组:"<<endl;
	for ( int i = 0; i< LEN; i++)
	{
		cout<<arr[i]<<" ";
	}
	cout<<endl;

	return 0;
}

但是，我们一般都是把low位置的元素作为枢轴，但是这样，可能不是最优的情况；

原因是：当数组有序时，待排序的时间复杂度为O(n^2)；平均情况下为O(nlogn)。

3. 对枢轴选取的优化策略：

可分为

1、随即选择枢轴

原理：随机选择一个枢轴下标与0下标交换；

//////////////////////////////////////////////////////////////////////////
//随机产生的枢轴

//将数组中下标为index1和index2的两整数进行交换
inline void Swap(int* arr, int index1, int index2)
{
	if ( arr[index1] == arr[index2] )
	{
		return;
	}

	int temp = arr[index1];
	arr[index1] = arr[index2];
	arr[index2] = temp;
}

//枢轴采用随机产生
int Partition_RandPiv(int* arr, int low, int high)
{
	srand((unsigned)time(NULL));
	int index = rand()%(high-low) + low;

	Swap(arr, low, index);//将枢轴与low值交换

	int temp = arr[low];
	while(low < high)
	{
		while ( low < high && arr[high] >= temp )
		{
			high--;
		}
		arr[low] = arr[high];

		while ( low < high && arr[low] <= temp )
		{
			low++;
		}
		arr[high] = arr[low];
	}

	arr[low] = temp;	
	return low;
}

void QuickSort_RandPiv(int* arr, int low, int high)
{
	int pivotloc = -1;
	if ( high - low > 0 )
	{
		pivotloc = Partition_RandPiv(arr, low, high);
		QuickSort_RandPiv(arr, low, pivotloc-1);
		QuickSort_RandPiv(arr, pivotloc+1, high);
	}
}

2、三数取中

原理：将下标为左、中、右的元素值进行比较，取中值作为枢轴值，进行本次排序

//////////////////////////////////////////////////////////////////////////
//
//3数取中
//
int MidNumOf3(int* arr, int n1, int n2, int n3)
{
	int temp1, temp2;
	if ( arr[n1] < arr[n2] )
	{
		temp1 = n1;
		temp2 = n2;
	}
	else
	{
		temp1 = n2;
		temp2 = n1;
	}

	if ( arr[n3] > arr[temp2] )
	{
		return temp2;
	}
	else if ( arr[n1] > arr[temp2] )
	{
		return temp1;
	}
	else
	{
		return n3;
	}
}

int Partition_Mid_3(int* arr, int low, int high)
{
	int minIndex = MidNumOf3(arr,low, low + ((high-low)>>1), high);
	
	Swap(arr,minIndex, low);

	int temp = arr[low];
	while(low < high)
	{
		while ( low < high && arr[high] >= temp )
		{
			high--;
		}
		arr[low] = arr[high];

		while ( low < high && arr[low] <= temp )
		{
			low++;
		}
		arr[high] = arr[low];
	}

	arr[low] = temp;	
	return low;
}

void QuickSort_Mid_3(int* arr, int low, int high)
{
	int pivotloc = -1;
	if ( high - low > 0 )
	{
		pivotloc = Partition_Mid_3(arr, low, high);
		QuickSort_Mid_3(arr, low, pivotloc-1);
		QuickSort_Mid_3(arr, pivotloc+1, high);
	}
}

3、一次划分，将与枢轴相同的数字聚集到一起，减少递归次数

例如：对数组{6,4,6,7,1,6,7,6,8,6}进行排序;

对第一次划分：6作为枢轴，划分后得到：{1,4,6,6,6,6,6,7,8,7};

然后再对{1,4}，{7,8,7}递归进行排序。

原理：这样既可以减少划分次数，且降低了递归次数。

void Partition_Gather(int* arr, int low, int high, int& lowEnd, int& highStart)
{

	bool flag1 = true, falg2 = true;
	int start1=-1, end1=-1;
	int start2=-1, end2=-1;
	int nMove=-1;
	int temp = arr[low];
	while(low < high)
	{
		while ( low < high )
		{
			if ( arr[high] > temp )
			{
				high--;
			}
			else if ( arr[high] == temp )
			{
				if ( flag1 )
				{
					end1 = high;
					start1 = end1;
					flag1 = false;
				}
				else
				{
					start1--;
					Swap(arr, high, start1);	//两数交换
				}
				high--;
			}
			else
			{
				break;
			}
		}
		arr[low] = arr[high];

		while ( low < high )
		{
			if ( arr[low] < temp )
			{
				low++;
			}
			else if ( arr[low] == temp )
			{
				if ( falg2 )
				{
					start2 = low;
					end2 = low;
					falg2 = false;
				}
				else
				{
					end2++;
					Swap(arr, low, end2);
				}
				low++;
			}
			else
			{
				break;
			}
		}
		arr[high] = arr[low];
	}

	arr[low] = temp;	

	//将与枢轴元素一样的聚集在一块
	nMove = low - 1;
	if ( start2 != -1 )
	{
		nMove = low -1;
		while ( nMove >= end2 && start2 <= end2 )
		{
			Swap(arr, nMove, start2);
			start2++;
			nMove--;
		}
	}
	lowEnd = nMove;

	nMove = low + 1;
	if ( start1 != -1 )
	{
		nMove = low +1;
		while ( nMove <= start1 && start1 <= end1 )
		{
			Swap(arr, nMove, end1);
			end1--;
			nMove++;
		}
	}
	highStart = nMove;

}

void QuickSort_Gather(int* arr, int low, int high)
{
	int pivotl = -1;
	int pivotr = -1;
	if ( low >= 0 && high - low > 0 )
	{
		Partition_Gather(arr, low, high, pivotl,pivotr);
		QuickSort_Gather(arr, low, pivotl);
		QuickSort_Gather(arr, pivotr, high);
	}
}

4、实验时间比较

我们将上述几种，以及固定枢轴和STL中sort排序进行了时间统计，看看他们的效果如何：

我们对随机生成的10000000个整数进行了排序，效果如下：

时间比较

排序方法	时间
固定枢轴（为low）	1770ms
随机枢轴	2560ms
3数取中	1924ms
聚集相等元素	1312ms
STL中sort方法	843ms

由于数据是随机产生的一千万个整数，数字中肯定会有大量重复的数字，而sort排序速度之快，是因为它集成了几种优化策略，将3数取中等几种优化放到一起，接下来就说一下，其他的优化策略。

4. 对内存栈空间的优化

但由于如果数组很长，那么递归次数较多，而递归我们使用的是系统内存中栈，有可能出界。

因此，我们得考虑尽量减少使用栈的大小；

原理：使用各种方法，降低递归次数；

方法有：

1、与插入排序结合使用；

原理：当待排序的数据量较小时，插排的效率是较高的。因此，当将要排序的子序列为几个(8或10等均可)元素时，直接用插排；最终减少了递归的深度和次数；

2、循环代替递归；

原理：我们在递归处理当前待排序的两个子序列时，我们可以规定，仅第一个子序列使用递归处理，第二个子序列我们使用while循环去处理；如下所示：

void QuickSort(int* arr, int low, int high)
{
	int pivotloc = -1;
	if ( high - low > 0 )
	{
		pivotloc = Partition(arr, low, high);
		QuickSort(arr, low, pivotloc-1);
		low = pivotloc + 1;
	}
}

这时，我们可能大家可能会想到对当前处理的一种优化策略：每次选取较长的待排子序列进行循环，而将较短的子序列进行递归处理；

代码如下描述：

void QuickSort(int* arr, int low, int high)
{
	int pivotloc = -1;
	if ( high - low > 0 )
	{
		pivotloc = Partition(arr, low, high);
		if ( high - pivotloc > pivotloc - low )
		{
			QuickSort(arr, low, pivotloc-1);
			low = pivotloc + 1;
		}
		else
		{
			QuickSort(arr, pivotloc + 1, high);
			high = pivotloc -1;
		}
		
	}
}

3、上述第3中方法，每次将与枢轴相等元素聚集在一起；

这里将不再详述。参考上面的内容。

4、使用并行或多线程处理子序列

利用并行计算来并行处理这些重复的子问题，使用Erlang可以非常方便非常清晰地利用这种方法解决问题。由于Erlang是一种声明式、函数式编程语言，所以要表述基本的快速排序算法非常方便：其中F是一个用于比较的函数。

qsort(_, []) -> [];
qsort(F, [H|T]) ->
    qsort(F, [X || X <- T, F(X, H)])
    ++ [H] ++
    qsort(F, [X || X <- T, not F(X, H)]).

qsort(L) -> qsort(fun(X, Y) -> X < Y end, L).

具体详述，请参考http://shiningray.cn/parallel-quick-sort-in-erlang.html，由于本人现在还没弄明白。

5. 参考文献

《算法导论》；

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航