您的位置：首页 > 运维架构 > Linux

linux的零复制splice、tee

2016-04-18 22:50 459 查看

要说零复制，就要先说管道pipe。

pipe在linux的实现中，用的是生产者消费者的模型，在linux/pipe_fs_i.h中我们能看到一下的代码：

#define PIPE_DEF_BUFFERS    16//...
struct pipe_inode_info {
struct mutex mutex;
wait_queue_head_t wait;
unsigned int nrbufs, curbuf, buffers;
unsigned int readers;
unsigned int writers;
unsigned int files;
unsigned int waiting_writers;
unsigned int r_counter;
unsigned int w_counter;
struct page *tmp_page;
struct fasync_struct *fasync_readers;
struct fasync_struct *fasync_writers;
struct pipe_buffer *bufs;
};

其中bufs就是一个指向管道缓冲区的指针，而管道缓冲区的结构如下：

struct pipe_buffer {
struct page *page;
unsigned int offset, len;
const struct pipe_buf_operations *ops;
unsigned int flags;
unsigned long private;
};

其中page是指向包含pipe buffer的页，是物理上的页地址，不是虚拟地址，这样方便进程间的通信。

在创建管道缓冲区时，会创建PIPE_DEF_BUFFERS个pipe_buffer大小的空间给bufs，也就是bufs指向一个大小是PIPE_DEF_BUFFERS的pipe_buffer数组。一个页大小是4k，那么linux的管道缓冲区大小就是64k了。

在使用管道缓冲区时，就和生产者消费者的模型一样，一边把数据写进去，另一边把数据取出来，慢时写阻塞，空时读阻塞。在写入的时候，为了效率，linux会倾向于以页为单位的写，因此缓冲区满时未必是64k的数据。

接下来就是说splice了，

#include <fcntl.h>

ssize_t splice(int fd_in, loff_t *off_in, int fd_out,loff_t *off_out, size_t len, unsigned int flags);

成功返回spliced的字节数，出错-1

这个函数中，fd_in和fd_out中有一个要是管道，off_in、off_out分别是两个文件描述符的偏移，如果其对应的文件描述符不是普通文件，那么就不能有偏移量，就要设为NULL，当是NULL时，就是从文件当前位置读/写,结束后会更新偏移的位置。len就是要移动的数据，至于flags自己看manpage。

它之所以能零复制，就是利用了管道作为中介，先把数据“复制”管道，然后再从管道中读取即可：

pipe(fd_pair[2]);

splice(source_file,...,fd_pair[1],...);

splice(fd_pair[0],...,destination_file,...);

可是注意的是，其实我们并没有真的把数据复制进管道缓冲区，我们只是修改了管道缓冲区的page指针、偏移、长度，使它指向源数据的实际物理地址，然后再从管道中读出来，整个过程都没有设计用户空间和内核空间的复制，在内核中也没有多余的复制，因此是零复制（复制了一次，但术语是叫零复制）。

我们要注意用splice传送超过缓冲区64k的文件时，要更新：

while (filesize > 0) {       len = splice(sourcefd,&off_in,pipe_pair[1],NULL,filesize-off_in,SPLICE_F_MOVE);
splice(pipe_pair[0], NULL,dstfd,&off_out,len, SPLICE_F_MOVE);
if (len < 0) {
perror("splice");
break;
}       filesize -= len;
}

还有一个tee函数，这也是一个零复制函数

#include <fcntl.h>

ssize_t tee(int fd_in, int fd_out, size_t len, unsigned int flags);

成功返回“复制”的字节数，出错-1

EINVAL fd_in or fd_out does not refer to a pipe; or fd_in and fd_out refer to the same pipe.

这个函数是用于两个不同管道之间的零复制，就相当于把两个管道连通

测试

下面是段性能比较，测试程序是这样的，读入一个文件，然后分别用read-write、mmap、splice三种方法复制这个文件，我使用的是一个300多mb的视频文件来测试，最终得到的测试结果是：

最上面的数字是真正复制所用的时间，下面的是time的输出，因为还有其他的影响，因此用户时间+系统时间！=函数工作时间

read-write:

0.820000

real 0m11.919s

user 0m0.028s

sys 0m0.996s

mmap:

0.830000

real 0m10.109s

user 0m0.312s

sys 0m0.676s

splice:

0.550000

real 0m10.643s

user 0m0.000s

sys 0m0.732s

mmap的时间居然和read-write差不多。。。不过我们可以看出用mmap的系统调用时间比read-write少30%左右，可是用户调用时间比较大，因为mmap这个操作本身就是一个消耗很大的函数，如果与要长时间使用这个文件的话，那么就可以冲淡mmap消耗。splice是最快的方法，用户调用时间很少，因为它的工作就是直接在内核完成，不需要频繁的在用户空间和内核空间之间切换。

更新：

发现mmap分段的复制比较直接复制的快，不过还是慢过splice

int size=8192,total=0;
while(total < statbuf.st_size) {
size = statbuf.st_size-total > 4096 ? 4096 : statbuf.st_size-total;
memcpy(dst,src,size);
dst += size;
src += size;
total += size;
}

编写程序时出现的问题：

在下面的程序中，对于splice，不知为什么说SPLICE_F_MOVE未声明，我只好用0x1来代替。。。

还有mmap时，不知为什么我把它们都设成写，就是permission denied，读写打开文件（mmap也是读写）就可以。

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<fcntl.h>
#include<sys/stat.h>
#include<unistd.h>
#include<time.h>
#include<sys/mman.h>

#define MAX 1024
#define BUF_SIZE 4096

typedef void (*p_func)(void);

int in_fd,out_fd1,out_fd2,out_fd3;
struct stat statbuf;

void do_std_copy();
void do_mmap_copy();
void do_splice_copy();
void testfun(p_func pa);

int main(int argc,char **argv)
{
if(argc!=2) return 1;

char outfile1[MAX],outfile2[MAX],outfile3[MAX];
p_func pa[3] = {do_std_copy,do_mmap_copy,do_splice_copy};

strcpy(outfile1,argv[1]);
strcat(outfile1,"1");
strcpy(outfile2,argv[1]);
strcat(outfile2,"2");
strcpy(outfile3,argv[1]);
strcat(outfile3,"3");

if((in_fd = open(argv[1],O_RDONLY))<0) {
perror("open in_fd faild");
return 1;
}
if((out_fd1 = open(outfile1,O_WRONLY|O_CREAT|O_TRUNC,0777))<0) {
perror("open out_fd1 faild");
return 1;
}
if((out_fd2 = open(outfile2,O_RDWR|O_CREAT|O_TRUNC,0777))<0){//mmap要读写
perror("open out_fd2 faild");
return 1;
}
if((out_fd3 = open(outfile3,O_WRONLY|O_CREAT|O_TRUNC,0777))<0) {
perror("open out_fd3 faild");
return 1;
}

fstat(in_fd,&statbuf);

for(int i = 0; i<3; ++i)
testfun(pa[i]);
close(in_fd);
close(out_fd1);
close(out_fd2);
close(out_fd3);
return 0;
}

void testfun(p_func pa)
{
clock_t begin,end;

lseek(in_fd,0,SEEK_SET);
begin = clock();
pa();
end = clock();
printf("%f\n",(double)(end-begin)/CLOCKS_PER_SEC);
}

void do_std_copy()
{
char buffer[BUF_SIZE];
int bytes;
while((bytes = read(in_fd,buffer,sizeof(buffer))) >0) {
if(write(out_fd1,buffer,bytes) != bytes) {
perror("write errno");
exit(1);
}
}
}

void do_mmap_copy()
{
if(ftruncate(out_fd2,statbuf.st_size) < 0) {
perror("ftruncate faild");
return;
}
//如果是lseek创建空洞，就要write一个空字节进去
void *src = mmap(NULL,statbuf.st_size,PROT_READ,MAP_SHARED,in_fd,0);
if(src==MAP_FAILED) {
perror("mmap map src faild");
return;
}
void *dst = mmap(NULL,statbuf.st_size,PROT_READ|PROT_WRITE,MAP_SHARED,out_fd2,0);
if(dst==MAP_FAILED) {
munmap(src,statbuf.st_size);
perror("mmap map dst faild");
return;
}

memcpy(dst,src,statbuf.st_size);
munmap(src,statbuf.st_size);
munmap(dst,statbuf.st_size);
}

void do_splice_copy()
{
int pipefd[2],len=statbuf.st_size;
pipe(pipefd);

for(;;) {
if((len=splice(in_fd,NULL,pipefd[1],NULL,len,0x1))<0) {
perror("splice in_fd faild");
return;
}
if(len==0) return;
if(splice(pipefd[0],NULL,out_fd3,NULL,len,0x1)<0) {
perror("splice out_fd3 faild");
return;
}
}
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航