pthread 并行程序中unsigned效率问题
2011-10-13 19:12
302 查看
下面是利用pthread并行求PI的代码,当我运行串行程序时,迭代8亿次用时12秒多。在双核对笔记本下面创建8个线程运行下面的程序,竟然用时15秒多(而不是想象对串行一半的时间),不可思议啊。这个问题困扰了我一天多,先看程序吧:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <sys/time.h>
#include <pthread.h>
#define MAX_N_THREADS 20
static long num_steps=800000000;
double step = 1.0 / (double)num_steps;
double pi;
unsigned long length = 0;
double g_sum = 0;
pthread_mutex_t mtSum;
void* work(void* p)
{
timeval tstart, tend;
gettimeofday(&tstart, NULL);
int ithread = (int)p;
unsigned long start = ithread * length;
unsigned long end = length + start;
printf("thread %d start, start = %ld, length = %ld, g_sum = %lf\n", ithread, start, end, g_sum);
unsigned long i = start;
double x = 0.0, sum = 0.0;
for ( ; i < end; i++) {
x = (i + 0.5)*step;
sum = sum + 4.0/(1.0 + x*x);
}
gettimeofday(&tend, NULL);
double tcost = tend.tv_sec - tstart.tv_sec + (double)(tend.tv_usec-tstart.tv_usec)/1000000.0;
printf("thread %d calculate end, cost %10.6f seconds\n", ithread, tcost);
pthread_mutex_lock(&mtSum);
g_sum += sum;
pthread_mutex_unlock(&mtSum);
gettimeofday(&tend, NULL);
tcost = tend.tv_sec - tstart.tv_sec + (double)(tend.tv_usec-tstart.tv_usec)/1000000.0;
printf("thread %d exit, sum = %lf, g_sum = %lf, cost %10.6f seconds\n", ithread, sum, g_sum, tcost);
//pthread_exit((void*)0);
return ((void*)NULL);
}
int main(int argc, char* argv[])
{
double x, sum = 0.0;
timeval start, end;
double tcost;
unsigned int nthreads;
for (int i = 1; i < argc; i++) {
char *ts = strstr(argv[i], "-p=");
if (ts == NULL)
continue;
sscanf(ts,"-p=%d", &nthreads);
}
printf("nthreads = %d\n", nthreads);
gettimeofday(&start, NULL);
pthread_t threads[MAX_N_THREADS];
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
pthread_mutex_init(&mtSum, NULL);
length = (unsigned long)ceil(num_steps / nthreads);
for (int i = 0; i < nthreads; i++){
int rc = pthread_create(&threads[i], &attr, work, (void*)i);
//int rc = pthread_create(&threads[i], NULL, work, (void*)i);
if (rc) {
printf("ERROR; return code from pthread_create(%d) is %d\n",i, rc);
exit(-1);
}
}
void *status;
for (int i = 0; i < nthreads; i++)
{
//pthread_join(threads[i], &status);
pthread_join(threads[i], NULL);
}
pi = step * g_sum;
gettimeofday(&end, NULL);
tcost = end.tv_sec - start.tv_sec + (double)(end.tv_usec-start.tv_usec)/1000000.0;
printf("Pi = %12.9f, cost %10.6f seconds\n", pi, tcost);
pthread_mutex_destroy(&mtSum);
//pthread_exit(NULL);
return 0;
}
问题出现在work线程中的循环,当起始、终止点start和end类型均为unsigned long时,程序运行时间为15秒多,改为long后,程序运行时间变为了7秒多(与预测值相似)。这是为什么呢?暂时依然无解。
通过vimdiff 对比二者的汇编代码,发现一共有三处不同。左边为unsigned版本,右边为signed版本。
who can tell me why?
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <sys/time.h>
#include <pthread.h>
#define MAX_N_THREADS 20
static long num_steps=800000000;
double step = 1.0 / (double)num_steps;
double pi;
unsigned long length = 0;
double g_sum = 0;
pthread_mutex_t mtSum;
void* work(void* p)
{
timeval tstart, tend;
gettimeofday(&tstart, NULL);
int ithread = (int)p;
unsigned long start = ithread * length;
unsigned long end = length + start;
printf("thread %d start, start = %ld, length = %ld, g_sum = %lf\n", ithread, start, end, g_sum);
unsigned long i = start;
double x = 0.0, sum = 0.0;
for ( ; i < end; i++) {
x = (i + 0.5)*step;
sum = sum + 4.0/(1.0 + x*x);
}
gettimeofday(&tend, NULL);
double tcost = tend.tv_sec - tstart.tv_sec + (double)(tend.tv_usec-tstart.tv_usec)/1000000.0;
printf("thread %d calculate end, cost %10.6f seconds\n", ithread, tcost);
pthread_mutex_lock(&mtSum);
g_sum += sum;
pthread_mutex_unlock(&mtSum);
gettimeofday(&tend, NULL);
tcost = tend.tv_sec - tstart.tv_sec + (double)(tend.tv_usec-tstart.tv_usec)/1000000.0;
printf("thread %d exit, sum = %lf, g_sum = %lf, cost %10.6f seconds\n", ithread, sum, g_sum, tcost);
//pthread_exit((void*)0);
return ((void*)NULL);
}
int main(int argc, char* argv[])
{
double x, sum = 0.0;
timeval start, end;
double tcost;
unsigned int nthreads;
for (int i = 1; i < argc; i++) {
char *ts = strstr(argv[i], "-p=");
if (ts == NULL)
continue;
sscanf(ts,"-p=%d", &nthreads);
}
printf("nthreads = %d\n", nthreads);
gettimeofday(&start, NULL);
pthread_t threads[MAX_N_THREADS];
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
pthread_mutex_init(&mtSum, NULL);
length = (unsigned long)ceil(num_steps / nthreads);
for (int i = 0; i < nthreads; i++){
int rc = pthread_create(&threads[i], &attr, work, (void*)i);
//int rc = pthread_create(&threads[i], NULL, work, (void*)i);
if (rc) {
printf("ERROR; return code from pthread_create(%d) is %d\n",i, rc);
exit(-1);
}
}
void *status;
for (int i = 0; i < nthreads; i++)
{
//pthread_join(threads[i], &status);
pthread_join(threads[i], NULL);
}
pi = step * g_sum;
gettimeofday(&end, NULL);
tcost = end.tv_sec - start.tv_sec + (double)(end.tv_usec-start.tv_usec)/1000000.0;
printf("Pi = %12.9f, cost %10.6f seconds\n", pi, tcost);
pthread_mutex_destroy(&mtSum);
//pthread_exit(NULL);
return 0;
}
问题出现在work线程中的循环,当起始、终止点start和end类型均为unsigned long时,程序运行时间为15秒多,改为long后,程序运行时间变为了7秒多(与预测值相似)。这是为什么呢?暂时依然无解。
通过vimdiff 对比二者的汇编代码,发现一共有三处不同。左边为unsigned版本,右边为signed版本。
who can tell me why?
相关文章推荐
- linux下错误使用pthread_mutex_lock导致程序奔溃问题分析
- 程序效率的问题
- 程序效率的问题
- 串行&amp;并行程序在效率上的简单比较
- 关于程序中查询效率的问题
- 程序效率的问题
- pthread使用的例子程序以及内存泄露问题
- OpenMP程序 for 循环并行的效率
- 变量的存储位置和程序的效率问题
- 因为效率的问题,写Delphi下的求子串的KMP&BM算法,为我的程序提速不少
- 进行一个调查,谢谢大家,请问:您是如何回避除程序以外的问题带来的效率浪费
- “提高一下dotnet程序的效率一”中关于exception的问题
- java程序中的效率问题
- 开发网络程序验证效率和稳定性时需要注意的几个问题
- VS开发问题:应用程序无法启动 因为程序的并行配置不正确 ,解决方案
- 串行&并行程序在效率上的简单比较
- 解决VS在Release下编译程序报“应用程序无法启动,应用程序的并行配置不正确”的问题
- 诡异的程序_1【程序的效率问题】
- 出差总结2:程序效率问题
- C51程序unsigned和signed类型数据判断时候需要注意的问题