您的位置:首页 > 其它

sk_buff结构

2017-09-16 01:55 183 查看
sk_buff结构用来描述已接收或者待发送的数据报文信息;skb在不同网络协议层之间传递,可被用于不同网络协议,如二层的mac或其他链路层协议,三层的ip,四层的tcp或者udp协议,其中某些成员变量会在该结构从一层向另一层传递时发生改变,从上层向下层传递需要添加首部,从下层向上层传递需要移除首部;

/* skb头结构 */
struct sk_buff_head {
/* These two members must be first. */
/* 通过下面两个指针成员将skb连接成双向链表 */
struct sk_buff    *next; /* 指向后一个skb */
struct sk_buff    *prev; /* 指向前一个skb */

__u32        qlen; /* 链表中元素个数 */
spinlock_t    lock; /* 自旋锁 */
};


/**
*    struct sk_buff - socket buffer
*    @next: Next buffer in list
*    @prev: Previous buffer in list
*    @tstamp: Time we arrived/left
*    @rbnode: RB tree node, alternative to next/prev for netem/tcp
*    @sk: Socket we are owned by
*    @dev: Device we arrived on/are leaving by
*    @cb: Control buffer. Free for use by every layer. Put private vars here
*    @_skb_refdst: destination entry (with norefcount bit)
*    @sp: the security path, used for xfrm
*    @len: Length of actual data
*    @data_len: Data length
*    @mac_len: Length of link layer header
*    @hdr_len: writable header length of cloned skb
*    @csum: Checksum (must include start/offset pair)
*    @csum_start: Offset from skb->head where checksumming should start
*    @csum_offset: Offset from csum_start where checksum should be stored
*    @priority: Packet queueing priority
*    @ignore_df: allow local fragmentation
*    @cloned: Head may be cloned (check refcnt to be sure)
*    @ip_summed: Driver fed us an IP checksum
*    @nohdr: Payload reference only, must not modify header
*    @pkt_type: Packet class
*    @fclone: skbuff clone status
*    @ipvs_property: skbuff is owned by ipvs
*    @tc_skip_classify: do not classify packet. set by IFB device
*    @tc_at_ingress: used within tc_classify to distinguish in/egress
*    @tc_redirected: packet was redirected by a tc action
*    @tc_from_ingress: if tc_redirected, tc_at_ingress at time of redirect
*    @peeked: this packet has been seen already, so stats have been
*        done for it, don't do them again
*    @nf_trace: netfilter packet trace flag
*    @protocol: Packet protocol from driver
*    @destructor: Destruct function
*    @_nfct: Associated connection, if any (with nfctinfo bits)
*    @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
*    @skb_iif: ifindex of device we arrived on
*    @tc_index: Traffic control index
*    @hash: the packet hash
*    @queue_mapping: Queue mapping for multiqueue devices
*    @xmit_more: More SKBs are pending for this queue
*    @ndisc_nodetype: router type (from link layer)
*    @ooo_okay: allow the mapping of a socket to a queue to be changed
*    @l4_hash: indicate hash is a canonical 4-tuple hash over transport
*        ports.
*    @sw_hash: indicates hash was computed in software stack
*    @wifi_acked_valid: wifi_acked was set
*    @wifi_acked: whether frame was acked on wifi or not
*    @no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
*    @dst_pending_confirm: need to confirm neighbour
*    @napi_id: id of the NAPI struct this skb came from
*    @secmark: security marking
*    @mark: Generic packet mark
*    @vlan_proto: vlan encapsulation protocol
*    @vlan_tci: vlan tag control information
*    @inner_protocol: Protocol (encapsulation)
*    @inner_transport_header: Inner transport layer header (encapsulation)
*    @inner_network_header: Network layer header (encapsulation)
*    @inner_mac_header: Link layer header (encapsulation)
*    @transport_header: Transport layer header
*    @network_header: Network layer header
*    @mac_header: Link layer header
*    @tail: Tail pointer
*    @end: End pointer
*    @head: Head of buffer
*    @data: Data head pointer
*    @truesize: Buffer size
*    @users: User count - see {datagram,tcp}.c
*/
/* skb结构 */
struct sk_buff {
union {
struct {
/* These two members must be first. */
struct sk_buff        *next;
struct sk_buff        *prev;

/* 报文到达或者离开的时间戳 */
union {
ktime_t        tstamp;
struct skb_mstamp skb_mstamp;
};
};
struct rb_node    rbnode; /* used in netem & tcp stack */
};

/*
指向缓冲区的套接字sock数据结构。当数据在本地产生或者正由本地进程接收时,
该数据以及套接字相关信息会被L4(tcp或者udp)以及用户应用程序使用
当缓冲区只是被转发时(本地机器不是来源也不是目的地),该指针为NULL
*/
struct sock        *sk;

union {
/* 报文到达或者离开时的网络设备 */
struct net_device    *dev;
/* Some protocols might use this space to store information,
* while device pointer would be NULL.
* UDP receive path is one user.
*/
unsigned long        dev_scratch;
};
/*
* This is the control buffer. It is free to use for every
* layer. Please put your private variables there. If you
* want to keep them across layers you have to do a skb_clone()
* first. This is owned by whoever has the skb queued ATM.
*/
/*
控制缓冲区,用于存储私有信息,每层协议自己维护并使用,
并且只在本层有有效
*/
char            cb[48] __aligned(8);

/* 路由缓存,输入或者输出报文都要查询到目的路由缓存项,才能确定流向 */
unsigned long        _skb_refdst;

/*
当缓冲区被删除时,可以完成某些清理工作
当缓冲区不属于一个套接字时,该函数通常不被初始化
属于一个套接字时,通常设置为sock_rfree或sock_wfree
sock_xxx函数用于更新套接字队列中所持有的内存
*/
void            (*destructor)(struct sk_buff *skb);
#ifdef CONFIG_XFRM
struct    sec_path    *sp;
#endif
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
unsigned long         _nfct;
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info    *nf_bridge;
#endif
/*
缓冲区的数据区块大小,该长度包括主缓冲区(head指针指向)的数据
以及一些片段(fragment)的数据,当缓冲区从一个网络分层移动到下一个
网络分层时,该值会发生变化,因为在协议栈中向上层移动时报头会被丢弃
向下层移动时报头会添加,len也会把协议报头算在内,与"数据预留和对齐"操作
*/
unsigned int        len,
/* 片段(fragment)中的数据大小 */
data_len;
/* mac报头大小 */
__u16            mac_len,
/* 克隆skb时可写报文头部长度 */
hdr_len;

/* Following fields are _not_ copied in __copy_skb_header()
* Note that queue_mapping is here mostly to fill a hole.
*/
kmemcheck_bitfield_begin(flags1);
__u16            queue_mapping;

/* if you move cloned around you also must adapt those constants */
#ifdef __BIG_ENDIAN_BITFIELD
#define CLONED_MASK    (1 << 7)
#else
#define CLONED_MASK    1
#endif
#define CLONED_OFFSET()        offsetof(struct sk_buff, __cloned_offset)

__u8            __cloned_offset[0];
/* 表示该skb是另外一个skb的克隆 */
__u8            cloned:1,
/*
payload是否被单独引用,不存在协议首部,如果被引用,则不能修改协议首部,也不能通过skb->data来访问协议首部
*/
nohdr:1,
/*
当前克隆状态
SKB_FCLONE_UNAVAILABLE-skb未被克隆
SKB_FCLONE_ORIG-在skbuff_fclone_cache分配的父skb,可以被克隆
SKB_FCLONE_CLONE-在skbuff_fclone_cache分配的子skb,从父skb克隆得到
*/
fclone:2,
peeked:1,
head_frag:1,
xmit_more:1,
__unused:1; /* one bit hole */
kmemcheck_bitfield_end(flags1);

/* fields enclosed in headers_start/headers_end are copied
* using a single memcpy() in __copy_skb_header()
*/
/* private: */
__u32            headers_start[0];
/* public: */

/* if you move pkt_type around you also must adapt those constants */
#ifdef __BIG_ENDIAN_BITFIELD
#define PKT_TYPE_MAX    (7 << 5)
#else
#define PKT_TYPE_MAX    7
#endif
#define PKT_TYPE_OFFSET()    offsetof(struct sk_buff, __pkt_type_offset)

__u8            __pkt_type_offset[0];
/*
此字段根据l2的目的地址进行划分
PACKET_HOST-mac地址与接收设备mac地址相等,说明是发给该主机的
PACKET_BROADCAST-mac地址是接收设备的广播地址
PACKET_MULTICAST-mac地址接收改设备注册的多播地址之一
PACKET_OTHERHOST-mac地址不属于接收设备的地址,启用转发则转发,否则丢弃
PACKET_OUTGOING-数据包将被发出,用到这个标记的功能包括decnet,
或者为每个网络tab都复制一份发出包的函数
PACKET_LOOPBACK-数据包发往回环设备,有此标识,处理回环设备时,
可以跳过一些真实设备所需的操作
PACKET_USER-发送到用户空间,netlink使用
PACKET_KERNEL-发送到内核空间,netlink使用
PACKET_FASTROUTE-未使用
*/
__u8            pkt_type:3;
__u8            pfmemalloc:1;
__u8            ignore_df:1;

__u8            nf_trace:1;
/*
CHECKSUM_NONE-硬件不支持,完全由软件执行校验和
CHECKSUM_PARTIAL-由硬件来执行校验和
CHECKSUM_UNNECESSARY-没必要执行校验和
CHECKSUM_COMPLETE-已完成执行校验和
*/
__u8            ip_summed:2;
__u8            ooo_okay:1;
__u8            l4_hash:1;
__u8            sw_hash:1;
__u8            wifi_acked_valid:1;
__u8            wifi_acked:1;

__u8            no_fcs:1;
/* Indicates the inner headers are valid in the skbuff. */
__u8            encapsulation:1;
__u8            encap_hdr_csum:1;
__u8            csum_valid:1;
__u8            csum_complete_sw:1;
__u8            csum_level:2;
__u8            csum_bad:1;

__u8            dst_pending_confirm:1;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8            ndisc_nodetype:2;
#endif
__u8            ipvs_property:1;
__u8            inner_protocol_type:1;
__u8            remcsum_offload:1;
#ifdef CONFIG_NET_SWITCHDEV
__u8            offload_fwd_mark:1;
#endif
#ifdef CONFIG_NET_CLS_ACT
__u8            tc_skip_classify:1;
__u8            tc_at_ingress:1;
__u8            tc_redirected:1;
__u8            tc_from_ingress:1;
#endif

#ifdef CONFIG_NET_SCHED
__u16            tc_index;    /* traffic control index */
#endif

union {
/* 校验和,必须包含csum_start和csum_offset */
__wsum        csum;
struct {
/* 校验开始位置,相对于header */
__u16    csum_start;
/* 校验和存储位置,相对于csum_start */
__u16    csum_offset;
};
};
/*
正在被传输的数据包QoS等级
数据包由本地产生,套接字会定义优先级的值
数据包在被转发,则在调用ip_forward函数时,会根据
ip头本身的ToS字段定义该值
*/
__u32            priority;
int            skb_iif;
__u32            hash;
__be16            vlan_proto;
__u16            vlan_tci;
#if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS)
union {
unsigned int    napi_id;
unsigned int    sender_cpu;
};
#endif
#ifdef CONFIG_NETWORK_SECMARK
__u32        secmark;
#endif

union {
__u32        mark;
__u32        reserved_tailroom;
};

/* 封装的协议 */
union {
__be16        inner_protocol;
__u8        inner_ipproto;
};
/* 封装的传输层头部相对于head的偏移 */
__u16            inner_transport_header;
/* 封装的网络层头部相对于head的偏移 */
__u16            inner_network_header;
/* 封装的链路层头部相对于head的偏移 */
__u16            inner_mac_header;

/*
l3层协议值
如ETH_P_IP-ipv4报文
ETH_P_ARP-arp报文等
*/
__be16            protocol;
/* 传输层头部相对于head的偏移 */
__u16            transport_header;
/* 网络层头部相对于head的偏移 */
__u16            network_header;
/* 链路层头部相对于head的偏移 */
__u16            mac_header;

/* private: */
__u32            headers_end[0];
/* public: */

/* These elements must be at the end, see alloc_skb() for details.  */
/* 实际数据的尾部 */
sk_buff_data_t        tail;
/* 缓冲区的尾部 */
sk_buff_data_t        end;
/* 缓冲区的头部 */
unsigned char        *head,
/* 实际数据的头部 */
*data;
/*
缓冲区的总大小,包括skb本身和实际数据len大小,alloc_skb函数将
该字段设置为len+sizeof(sk_buff)
每当len值更新,该值也要对应更新
*/
unsigned int        truesize;

/*
引用计数,在使用该skb缓冲区的实例个数,当引用计数为0时,skb才能被释放
skb_get()获取操作中会增加引用计数,kfree_skb释放过程中检查引用计数,
引用计数为0时,才真正释放skb
该计数器只计算sk_buff结构引用计数,缓冲区包含的实际数据由
skb_shared_info->dataref字段记录
*/
atomic_t        users;
};
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: