TCP/IP协议TCPLinux kernel中的拥塞控制

TCP重点系列之快速重传tcp_fastretrans_aler

2017-04-24  本文已影响94人  Allenkevin

本文主要分析在TCP拥塞状态机的实现中,函数tcp_fastretrans_alert()的实现,及对一些相关函数也做了介绍。

变量介绍

这些变量都在include/linux/tcp.h中声明,在net/ipv4/tcp.c中被赋初值。

u32 packets_out; /* 表示离开网络但没被确认的包 */
u32 sacked_out; 
/* Packets, which arrived to receiver out of order and hence not ACKed.
 * With SACK this number is simply amount of SACKed data. Even withou
 * SACKs it is easy to give pretty reliable estimate of this number, counting
 * duplicate ACKs.
 * 上面是sacked_out的英文解释,其实应该分两种情况来看,开和没开SACK选项:
 * 如果开了SACK选项,那么这个值无疑就是表示被SACK的乱序包的个数,
 * 如果没开SACK选项,那么该值就是表示dupack的个数。具体可参考tcp_add_reno_sack()函数相关代码.
 */
u32 fackets_out;/* SACK数和丢失包的总和,fackets_out = lost_out + sacked_out */

tcp_fastretrans_alert()函数被调用条件

(1) 每一个到来的ACK,其状态不是Open.
(2) ACK不是普通ack,即是:
   SACK,
   Duplicate ACK,
   ECE ECN

tcp_fastretrans_alert()函数实现细节

@kernel version 3.12/net/ipv4/tcp_input.c

static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
int prior_sacked, int prior_packets, bool is_dupack, int flag)
{
    struct inet_connection_sock *icsk = inet_csk(sk);
    struct tcp_sock *tp = tcp_sk(sk);
    /* is_dupack表示重复ack,FLAG_DATA_SACKED表示SACK中添加了新的数据*/
    int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
                    (tcp_fackets_out(tp) > tp->reordering));
    int newly_acked_sacked = 0; 
    int fast_rexmit = 0; 

    /* 如果packets_out为0,但sacked_out不为0,那么sacked_out应改为0 */
    if (WARN_ON(!tp->packets_out && tp->sacked_out))
        tp->sacked_out = 0; 
    /* 如果sacked_out为0, 那么fackets_out应为0 */
    if (WARN_ON(!tp->sacked_out && tp->fackets_out))
        tp->fackets_out = 0; 

    /* Now state machine starts.
     * A. ECE, hence prohibit cwnd undoing, the reduction is required. 
     * 禁止cwnd撤销,并减小cwnd.
     */
    if (flag & FLAG_ECE)
        tp->prior_ssthresh = 0; 

    /* B. In all the states check for reneging SACKs.
     * 检查是否为虚假SACK,虚假SACK是指:最新收到的ACK的ack_seq指向已记录的SACK
     * 块,这说明记录的SACK并没有反应接收方的真实的状态.
     */
    if (tcp_check_sack_reneging(sk, flag))
        return;

    /* C. Check consistency of the current state. 
     * 丢失的包应该比发送出去的包少,即left_out < packets_out.
     */
    tcp_verify_left_out(tp);

    /* D. Check state exit conditions. State can be terminated
     *    when high_seq is ACKed. 
     * 如果state = TCP_CA_Open,就不应该有重传包.
     */
    if (icsk->icsk_ca_state == TCP_CA_Open) {
        WARN_ON(tp->retrans_out != 0);
        tp->retrans_stamp = 0; //将重传发送时间置0.
        /* 如果snd_una >= high_seq,state接下来应该从其他状态返回到Open状态 */
    } else if (!before(tp->snd_una, tp->high_seq)) {
        /* state的几种不同值表示网络处在不同的状态,在这篇blog[]()中有详细介绍. */
        switch (icsk->icsk_ca_state) {
        case TCP_CA_CWR:
            /* CWR is to be held something *above* high_seq
             * is ACKed for CWR bit to reach receiver. */
             /* 如果snd_una > high_seq,结束快速重传,返回Open状态 */
            if (tp->snd_una != tp->high_seq) {
                inet_csk(sk)->icsk_retrans_ops->end_cwnd_reduction(sk);
                tcp_set_ca_state(sk, TCP_CA_Open);
            }
            break;

        case TCP_CA_Recovery:
            if (tcp_is_reno(tp)) /* 不是sack */
                tcp_reset_reno_sack(tp); /* 重置sack_out = 0 */
            if (tcp_try_undo_recovery(sk)) /* 尝试撤销 */
                return;
            /* 结束快速重传 */    
            inet_csk(sk)->icsk_retrans_ops->end_cwnd_reduction(sk);
            break;
        }
    }
    /* 非正常ack处理情况 */
    /* E. Process state. */
    switch (icsk->icsk_ca_state) {
    case TCP_CA_Recovery:
        /* FLAG_SND_UNA_ADVANCED表示snd_una更新了 */
        if (!(flag & FLAG_SND_UNA_ADVANCED)) {
            /* 不是sack,是一个dupack则增加sacked_out */
            if (tcp_is_reno(tp) && is_dupack)
                tcp_add_reno_sack(sk);
        } else
            /* 这个函数见下文 */
            do_lost = tcp_try_undo_partial(sk, pkts_acked);
        /* 计算ack了多少新数据 */
        newly_acked_sacked = prior_packets - tp->packets_out +
                     tp->sacked_out - prior_sacked;
        break;

        /* timeout后的处理*/
    case TCP_CA_Loss:
        tcp_process_loss(sk, flag, is_dupack);
        if (icsk->icsk_ca_state != TCP_CA_Open)
            return;
        /* Fall through to processing in Open state. */
    default:
        if (tcp_is_reno(tp)) {
            if (flag & FLAG_SND_UNA_ADVANCED)
                tcp_reset_reno_sack(tp); /* 重置sacked_out = 0 */
            if (is_dupack)
                tcp_add_reno_sack(sk);
        }
        /* 计算ack了多少新数据*/
        newly_acked_sacked = prior_packets - tp->packets_out +
                     tp->sacked_out - prior_sacked;

        if (icsk->icsk_ca_state <= TCP_CA_Disorder)
            tcp_try_undo_dsack(sk);

        if (!tcp_time_to_recover(sk, flag)) {
            tcp_try_to_open(sk, flag, newly_acked_sacked);
            return;
        }

        /* MTU probe failure: don't reduce cwnd */
        if (icsk->icsk_ca_state < TCP_CA_CWR &&
            icsk->icsk_mtup.probe_size &&
            tp->snd_una == tp->mtu_probe.probe_seq_start) {
            tcp_mtup_probe_failed(sk);
            /* Restores the reduction we did in tcp_mtup_probe() */
            tp->snd_cwnd++;
            tcp_simple_retransmit(sk);/* 做一个简单的转发,而不使用回退机制 */
            return;
        }

        /* Otherwise enter Recovery state */
        tcp_enter_recovery(sk, (flag & FLAG_ECE)); /* 进入恢复状态 */
        fast_rexmit = 1;/* 快速重传标志 */
    }
    /* 打上lost标志 */
    if (do_lost || (tcp_is_fack(tp) && tcp_head_timeout(sk))) {
        /* 更新记分牌,标记丢失和超时的数据包 */
        tcp_update_scoreboard(sk, fast_rexmit);
    }
    /* 降低cwnd */
    inet_csk(sk)->icsk_retrans_ops->cwnd_reduction(sk, newly_acked_sacked, fast_rexmit);
    /* 重传有lost标志的包 */
    tcp_xmit_retransmit_queue(sk);
}

tcp_add_reno_sack()函数

/* Emulate SACKs for SACKless connection: account for a new dupack. */

static void tcp_add_reno_sack(struct sock *sk)
{
    struct tcp_sock *tp = tcp_sk(sk);
    tp->sacked_out++; /* 收到重复ack,sacked_out++*/
    /* 检查乱序情况,该函数具体定义在下面介绍 */
    tcp_check_reno_reordering(sk, 0); 
    tcp_verify_left_out(tp);
}

tcp_check_reno_reordering()函数

/* If we receive more dupacks than we expected counting segments
 * in assumption of absent reordering, interpret this as reordering.
 * The only another reason could be bug in receiver TCP.
 */
static void tcp_check_reno_reordering(struct sock *sk, const int addend)
{
    struct tcp_sock *tp = tcp_sk(sk);
    /* 检查sack的数量是否超过了限度,是则更新reordering */
    if (tcp_limit_reno_sacked(tp))
        tcp_update_reordering(sk, tp->packets_out + addend, 0);
}

tcp_limit_reno_sacked()函数

/* Limits sacked_out so that sum with lost_out isn't ever larger than
 * packets_out. Returns false if sacked_out adjustement wasn't necessary.
 */
static bool tcp_limit_reno_sacked(struct tcp_sock *tp) 
{
    u32 holes;

    holes = max(tp->lost_out, 1U); 
    holes = min(holes, tp->packets_out);

    if ((tp->sacked_out + holes) > tp->packets_out) {
        tp->sacked_out = tp->packets_out - holes;
        return true;
    }    
    return false;
}

tcp_update_scoreboard()函数

/* Account newly detected lost packet(s) */
static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
{
    struct tcp_sock *tp = tcp_sk(sk);

    if (tcp_is_reno(tp)) {/* 不是SACK */
        tcp_mark_head_lost(sk, 1, 1);/* 标记一个丢失 */
    } else if (tcp_is_fack(tp)) {/* 如果是fack */
        int lost = tp->fackets_out - tp->reordering;/* 计算所有的丢包数 */
        if (lost <= 0)
            lost = 1; 
        tcp_mark_head_lost(sk, lost, 0);/* 给所有丢包打标记 */
    } else {/* 是一个简单的sack */
        int sacked_upto = tp->sacked_out - tp->reordering;
        if (sacked_upto >= 0)
            tcp_mark_head_lost(sk, sacked_upto, 0);
        else if (fast_rexmit)
            tcp_mark_head_lost(sk, 1, 1);
    }    

    tcp_timeout_skbs(sk);
}

tcp_mark_head_lost()函数

* Detect loss in event "A" above by marking head of queue up as lost.
 * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
 * are considered lost. For RFC3517 SACK, a segment is considered lost if it
 * has at least tp->reordering SACKed seqments above it; "packets" refers to
 * the maximum SACKed segments to pass before reaching this limit.
 * high_seq:可以标记为lost的段序号的最大值。
 * mark_head: 为1表示只需要标志发送队列的第一个段。
 */
static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
{
    struct tcp_sock *tp = tcp_sk(sk);
    struct sk_buff *skb;
    int cnt, oldcnt;
    int err;
    unsigned int mss;
    /* Use SACK to deduce losses of new sequences sent during recovery */
    const u32 loss_high = tcp_is_sack(tp) ?  tp->snd_nxt : tp->high_seq;
    /* 丢失的包不可能必发出去的包还多 */
    WARN_ON(packets > tp->packets_out);
    /* 如果已经有被标记的段了 */
    if (tp->lost_skb_hint) {
        skb = tp->lost_skb_hint;/* 让skb指向这个段,便于后面的遍历 */
        cnt = tp->lost_cnt_hint;/* 已经标记了多少段 */
        /* Head already handled? */
        /* 已经有标记但,skb不等于发送队列的第一个包,则返回 */
        if (mark_head && skb != tcp_write_queue_head(sk))
            return;
    } else {
        skb = tcp_write_queue_head(sk);/* 获得发送队列第一个包 */
        cnt = 0;/* 初始化标记了0个数据 */
    }
    tcp_for_write_queue_from(skb, sk) {/* 根据取出来的skb,遍历重传队列 */
        if (skb == tcp_send_head(sk))
            break;/* 如果遍历到snd_nxt,则停止 */
        /* TODO: do this better */
        /* this is not the most efficient way to do this... */
        tp->lost_skb_hint = skb;
        tp->lost_cnt_hint = cnt;/* 暗示已经标记有多少丢包 */
        /* loss_high是最大的标记为lost的序号,end_seq不可能大于它 */
        if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
            break;

        oldcnt = cnt;
        if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
            (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
            cnt += tcp_skb_pcount(skb);/* 此段已经被sacked */

        /* 主要用于判断时机 */
        if (cnt > packets) {
            if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
                (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
                (oldcnt >= packets))
                break;

            mss = skb_shinfo(skb)->gso_size;
            err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
            if (err < 0)
                break;
            cnt = packets;
        }
        tcp_skb_mark_lost(tp, skb);

        if (mark_head)/* 只标记一段的话,那么就可以退出了 */
            break;
    }
    tcp_verify_left_out(tp);
}

tcp_skb_mark_lost()函数

static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
{
    if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
        tcp_verify_retransmit_hint(tp, skb);/* 更新重传队列 */

        tp->lost_out += tcp_skb_pcount(skb);/* 统计丢包数 */
        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;/* 打上丢包标记 */
    }    
}
上一篇下一篇

猜你喜欢

热点阅读