Thursday, January 27, 2011

網路Driver 3 - Linux内核网络源码分析——发送数据

Linux内核网络源码分析——发送数据

目录:Linux内核网络源码分析
UDP发送:
|       sys_write               fs/read_write.c
|       sock_writev             net/socket.c                   
|       sock_sendmsg            net/socket.c
|       inet_sendmsg            net/ipv4/af_inet.c
|       udp_sendmsg             net/ipv4/udp.c
|       ip_build_xmit           net/ipv4/ip_output.c
|       output_maybe_reroute    net/ipv4/ip_output.c
|       ip_output               net/ipv4/ip_output.c
|       ip_finish_output        net/ipv4/ip_output.c
|       dev_queue_xmit          net/dev.c
|       ——————————————–
|       el3_start_xmit          driver/net/3c309.c
V
write()
e.g. write(sockfd,”Hello”,strlen(“Hello”));
user
————————————————–
kernel

sys_write() <fs/read_write.c>
asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t count)

ret = vfs_write(file, buf, count, &pos);

vfs_write()

if (file->f_op->write)
ret = file->f_op->write(file, buf, count, pos);

//在前面建立socket时sock_map_fd将sock对应file的f_op定义为socket_file_ops,参见:
static const struct file_operations socket_file_ops = {

.aio_write = sock_aio_write,

}
sock_aio_write()//与之前的版本不同了。。。
do_sock_write()
__sock_sendmsg()
static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size)

return sock->ops->sendmsg(iocb, sock, msg, size);

//sock->ops在inet_create函数中被初始化,参见:
inet_create() <net/ipv4/af_inet.c:>
static struct inet_protosw inetsw_array[] <net/ipv4/af_inet.c:>
<net/ipv4/tcp_ipv4.c>
const struct proto_ops inet_stream_ops ={

.sendmsg = tcp_sendmsg,

}
<net/ipv4/udp.c>
const struct proto_ops inet_dgram_ops ={

.sendmsg = inet_sendmsg,

}
————————————————–
UDP
inet_sendmsg(…)
int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size)

return sk->sk_prot->sendmsg(iocb, sk, msg, size);


udp_sendmsg(…)
ip_route_output_flow()
这里进行路由!参见5、路由和ARP
ip_append_data()
* ip_append_data() and ip_append_page() can make one large IP datagram
* from many pieces of data. Each pieces will be holded on the socket
* until ip_push_pending_frames() is called. Each piece can be a page
* or non-page data.
*
* Not only UDP, other transport protocols – e.g. raw sockets – can use
* this interface potentially.
*
* LATER: length must be adjusted by pad at tail, when it is required.
udp_push_pending_frames()
udp_push_pending_frames()
* Push out all pending data as one UDP datagram. Socket is locked.
————————————————–
TCP
tcp_transmit_skb()
err = icsk->icsk_af_ops->queue_xmit(skb,0);
tcp_transmit_skb 引用表:
tcp_mtu_probe
tcp_write_xmit
tcp_push_one
tcp_retransmit_skb
tcp_send_active_reset
tcp_send_synack
tcp_connect
tcp_send_ack
tcp_xmit_probe_skb
tcp_write_wakeup

ip_queue_xmit() ip_send_reply() ip_build_and_send_pkt()
int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
这里有route过程
ip_route_output_flow(…)
*dccp int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, __be32 saddr, __be32 daddr, struct ip_options *opt)
Add an ip header to a skbuff and send it out.
void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len) <net/ipv4/ip_output.c>
* Generic function to send a packet as reply to another packet.
* Used to send TCP resets so far. ICMP should use this function too.
* Should run single threaded per socket because it uses the sock
* structure to pass arguments.
这里有用到ip_route_output_key()进行路由。
int ip_push_pending_frames(struct sock *sk);
Combined all pending IP fragments on the socket as one IP datagram
and push them out.
ip_local_out();
————————————————–
IP

ip_push_pending_frames()

ip_local_out() <ip_output.c>
int ip_local_out(struct sk_buff *skb)
{
int err;
err = __ip_local_out(skb);
if (likely(err == 1))
err = dst_output(skb);
return err;
}
EXPORT_SYMBOL_GPL(ip_local_out);
__ip_local_out() <ip_output.c>
int __ip_local_out(struct sk_buff *skb)
{
struct iphdr *iph = ip_hdr(skb);
iph->tot_len = htons(skb->len);
ip_send_check(iph);
return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
dst_output);
}
dst_output() <net/core/dst.c>
/* Output packet to network from transport. */
static inline int dst_output(struct sk_buff *skb)
{
return skb->dst->output(skb);
}
其中dst->output() = ip_output();在__mkroute_output()和__mkroute_input()中注册。
ip_output() <net/ipv4/ip_output.c>
return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, dev,
ip_finish_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));

ip_finish_output() <net/ipv4/ip_output.c>
static int ip_finish_output(struct sk_buff *skb)
{
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
if (skb->dst->xfrm != NULL) {
IPCB(skb)->flags |= IPSKB_REROUTED;
return dst_output(skb);
}
#endif
if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
return ip_fragment(skb, ip_finish_output2);
else
return ip_finish_output2(skb);
}

ip_finish_output2() <net/ipv4/ip_output.c>

if (dst->hh)
         return neigh_hh_output(dst->hh, skb);
else if (dst->neighbour)
         return dst->neighbour->output(skb);
// 此函数在neigh_alloc中注册为neigh_blackhole(), 但这个是默认的,一般会被替换掉
static struct neighbour *neigh_alloc(struct neigh_table *tbl)

n->output = neigh_blackhole;
// 此函数在arp_constructor中注册为ops->queue_xmit或ops->output或connected_output
static int arp_constructor(struct neighbour *neigh)

.output = neigh_resolve_output,
.connected_output = neigh_connected_output,
.queue_xmit = dev_queue_xmit()
// 故一般为neigh_resolve_output
neigh_resolve_output() <net/core/neighbour.c>

err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
// 这里有Mac头填充的动作,参见路由和ARP
// 说明此前neigh->ha已获取。如果neigh->ha为空呢?

if (err >= 0)
        rc = neigh->ops->queue_xmit(skb); // 此函数注册为dev_queue_xmit()
dev_queue_xmit() <net/core/dev.c>
int dev_queue_xmit(struct sk_buff *skb)

if (!netif_queue_stopped(dev) &&
    !netif_subqueue_stopped(dev, skb)) {
    rc = 0;
    if (!dev_hard_start_xmit(skb, dev)) {
        HARD_TX_UNLOCK(dev);
        goto out;
    }
}
dev_hard_start_xmit() <net/core/dev.c>
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)

return dev->hard_start_xmit(skb, dev);
xxx_start_xmit() <drivers/net/xxx.c>

No comments:

Post a Comment