ethereum p2p Kademlia的实现之二

2018-04-17 本文已影响44人古则

Kademlia的相关原理见https://blog.csdn.net/doleria/article/details/78685531

1.Kademlia相关数据结构

整体结构是
node（节点）=》bucket（k桶中的行）=》Table(k桶及其他一些实现)

node

// p2p/discover/node.go
// Node represents a host on the network.
// The fields of Node may not be modified.
type Node struct {
    IP       net.IP // len 4 for IPv4 or 16 for IPv6
    UDP, TCP uint16 // port numbers
    ID       NodeID // the node's public key

    // This is a cached copy of sha3(ID) which is used for node
    // distance calculations. This is part of Node in order to make it
    // possible to write tests that need a node at a certain distance.
    // In those tests, the content of sha will not actually correspond
    // with ID.
    sha common.Hash

    // Time when the node was added to the table.
    addedAt time.Time
}

node中包含了节点的ip， udp，tcp端口
还有节点id（NodeID 512bit) ，但Kademlia中的距离计算不是根据NodeID，而是sha3(NodeID)，及节点的hash值，32个字节
addedAt 是指节点被加入到K桶时的时间戳

bucket， k桶的一行

// bucket contains nodes, ordered by their last activity. the entry
// that was most recently active is the first element in entries.
type bucket struct {
    entries      []*Node // live entries, sorted by time of last contact
    replacements []*Node // recently seen nodes to be used if revalidation fails
    ips          netutil.DistinctNetSet
}

entries是一个存活的node数组
ips是一个存储距离的集合，后面再进行具体分析

Table k桶的实现

type Table struct {
    mutex   sync.Mutex        // protects buckets, bucket content, nursery, rand
    buckets [nBuckets]*bucket // index of known nodes by distance
    nursery []*Node           // bootstrap nodes
    rand    *mrand.Rand       // source of randomness, periodically reseeded
    ips     netutil.DistinctNetSet

    db         *nodeDB // database of known nodes
    refreshReq chan chan struct{}
    initDone   chan struct{}
    closeReq   chan struct{}
    closed     chan struct{}

    bondmu    sync.Mutex
    bonding   map[NodeID]*bondproc
    bondslots chan struct{} // limits total number of active bonding processes

    nodeAddedHook func(*Node) // for testing

    net  transport
    self *Node // metadata of the local node
}

k桶的数据存于buckets中，

hashBits = len(common.Hash{}) * 8
nBuckets = hashBits / 15       // Number of buckets

可以算出k桶一共有 32 * 8 / 15 = 17个bucket

self 是自身节点
nursery 是bootnodes节点，由p2p.Server.Start()方法传入
bondmu，bonding，bondslots，这三个成员的使用的方法调用路径是

func (tab *Table) Lookup(targetID NodeID)
=>
func (tab *Table) lookup(targetID NodeID, refreshIfEmpty bool) []*Node
=>
func (tab *Table) bondall(nodes []*Node) (result []*Node)
=>
func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) (*Node, error)
=>
func (tab *Table) pingpong(w *bondproc, pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16)

至于其作用，稍后详述

2.node的距离计算

看p2p/discover/node.go中node节点间距离计算的相关代码

// distcmp 比较 a<->target and b<->target.的距离
// 如果a更接近target，返回-1
// 如果b更接近target，返回1
// 如果相等返回0
func distcmp(target, a, b common.Hash) int {
    for i := range target {
        da := a[i] ^ target[i]
        db := b[i] ^ target[i]
        if da > db {
            return 1
        } else if da < db {
            return -1
        }
    }
    return 0
}

// 这个表描述一个字节（8bits）表示的值（0-255），各有多少个前置的0
//如0的时候有8个，64的时候有1个0
var lzcount = [256]int{
    8, 7, 6, 6, 5, 5, 5, 5,
    4, 4, 4, 4, 4, 4, 4, 4,
    3, 3, 3, 3, 3, 3, 3, 3,
    3, 3, 3, 3, 3, 3, 3, 3,
    2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
}

// 这个方法计算两个节点之间的逻辑距离
// 计算方法是求两个node id hash之间的异或值 除了前置的0，还有多少位
// int(log2(a ^ b))
func logdist(a, b common.Hash) int {
    lz := 0
    for i := range a {
        x := a[i] ^ b[i]
        if x == 0 {
            lz += 8
        } else {
            lz += lzcount[x]
            break
        }
    }
    return len(a)*8 - lz
}

// 已知亮点之间的距离，跟其中一点的NodeID的hash值，求另外一点NodeID的hash值
func hashAtDistance(a common.Hash, n int) (b common.Hash) {
    if n == 0 {
        return a
    }
    // flip bit at position n, fill the rest with random bits
    b = a
    pos := len(a) - n/8 - 1
    bit := byte(0x01) << (byte(n%8) - 1)
    if bit == 0 {
        pos++
        bit = 0x80
    }
    b[pos] = a[pos]&^bit | ^a[pos]&bit // TODO: randomize end bits
    for i := pos + 1; i < len(a); i++ {
        b[i] = byte(rand.Intn(255))
    }
    return b
}

记住这句话：两点之间的距离的定义是：求两个node id hash之间的异或值，除了前置的0，还有多少位

3.bonding，bondslots的分析

bonding的主要作用是使用table的net完成ping pong过程，实现两个节点间的握手

type Table struct {
    mutex   sync.Mutex        // protects buckets, bucket content, nursery, rand
    buckets [nBuckets]*bucket // index of known nodes by distance
    nursery []*Node           // bootstrap nodes
    rand    *mrand.Rand       // source of randomness, periodically reseeded
    ips     netutil.DistinctNetSet

    db         *nodeDB // database of known nodes
    refreshReq chan chan struct{}
    initDone   chan struct{}
    closeReq   chan struct{}
    closed     chan struct{}

    bondmu    sync.Mutex
    bonding   map[NodeID]*bondproc
    bondslots chan struct{} // limits total number of active bonding processes

    nodeAddedHook func(*Node) // for testing

    net  transport
    self *Node // metadata of the local node
}

type bondproc struct {
    err  error
    n    *Node
    done chan struct{}
}

现在来看相关方法

func (tab *Table) bondall(nodes []*Node) (result []*Node) {
    rc := make(chan *Node, len(nodes))
    for i := range nodes {
        go func(n *Node) {
            nn, _ := tab.bond(false, n.ID, n.addr(), n.TCP)
            rc <- nn
        }(nodes[i])
    }
    for range nodes {
        if n := <-rc; n != nil {
            result = append(result, n)
        }
    }
    return result
}

这个方法主要是对每个节点循环调用bond，看bond方法

//该方法的主要目的就是保证本地节点跟远程节点完成握手
//握手的过程就是双方都完成ping pong的信令交换
//findnode等消息只能在握手完成后
func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) (*Node, error) {
    if id == tab.self.ID {
        return nil, errors.New("is self")
    }
    if pinged && !tab.isInitDone() {
        return nil, errors.New("still initializing")
    }
    // Start bonding if we haven't seen this node for a while or if it failed findnode too often.
    node, fails := tab.db.node(id), tab.db.findFails(id)
    age := time.Since(tab.db.bondTime(id))
    var result error
    if fails > 0 || age > nodeDBNodeExpiration {
        log.Trace("Starting bonding ping/pong", "id", id, "known", node != nil, "failcount", fails, "age", age)

        tab.bondmu.Lock()
        w := tab.bonding[id]
        if w != nil {
            // Wait for an existing bonding process to complete.
            tab.bondmu.Unlock()
            <-w.done
        } else {
            // Register a new bonding process.
            w = &bondproc{done: make(chan struct{})}
            tab.bonding[id] = w
            tab.bondmu.Unlock()
            // Do the ping/pong. The result goes into w.
            tab.pingpong(w, pinged, id, addr, tcpPort)
            // Unregister the process after it's done.
            tab.bondmu.Lock()
            delete(tab.bonding, id)
            tab.bondmu.Unlock()
        }
        // Retrieve the bonding results
        result = w.err
        if result == nil {
            node = w.n
        }
    }
    // Add the node to the table even if the bonding ping/pong
    // fails. It will be relaced quickly if it continues to be
    // unresponsive.
    if node != nil {
        tab.add(node)
        tab.db.updateFindFails(id, 0)
    }
    return node, result
}

4.pingpong中的底层网络

前面说到 bonding的主要作用是使用table的net完成ping pong过程，实现两个节点间的握手。
而net是以下过程中创建

//p2p/discover/udp.go
func newUDP(c conn, cfg Config) (*Table, *udp, error) {
    udp := &udp{
        conn:        c,
        priv:        cfg.PrivateKey,
        netrestrict: cfg.NetRestrict,
        closing:     make(chan struct{}),
        gotreply:    make(chan reply),
        addpending:  make(chan *pending),
    }
    realaddr := c.LocalAddr().(*net.UDPAddr)
    if cfg.AnnounceAddr != nil {
        realaddr = cfg.AnnounceAddr
    }
    // TODO: separate TCP port
    udp.ourEndpoint = makeEndpoint(realaddr, uint16(realaddr.Port))
    tab, err := newTable(udp, PubkeyID(&cfg.PrivateKey.PublicKey), realaddr, cfg.NodeDBPath, cfg.Bootnodes)
    if err != nil {
        return nil, nil, err
    }
    udp.Table = tab

    go udp.loop()
    go udp.readLoop(cfg.Unhandled)
    return udp.Table, udp, nil
}

从p2p.server.Start()方法中得知 c conn参数是一个监听udp端口的conn
udp struct中包含udp的conn
udp struct实现了//p2p/discover/table.go中定义的

// transport is implemented by the UDP transport.
// it is an interface so we can test without opening lots of UDP
// sockets and without generating a private key.
type transport interface {
    ping(NodeID, *net.UDPAddr) error
    waitping(NodeID) error
    findnode(toid NodeID, addr *net.UDPAddr, target NodeID) ([]*Node, error)
    close()
}

接口
所以当在bond方法中调用ping等方法时，执行的是udp接口的方法（p2p/discover/table.go）
以下对这些方法做出分析

5.p2p的网络维护协议

数据协议的相关结构体如下：

//p2p/discover/udp.go
ping struct {
        Version    uint
        From, To   rpcEndpoint
        Expiration uint64
        // Ignore additional fields (for forward compatibility).
        Rest []rlp.RawValue `rlp:"tail"`
    }

    // pong is the reply to ping.
    pong struct {
        // This field should mirror the UDP envelope address
        // of the ping packet, which provides a way to discover the
        // the external address (after NAT).
        To rpcEndpoint

        ReplyTok   []byte // This contains the hash of the ping packet.
        Expiration uint64 // Absolute timestamp at which the packet becomes invalid.
        // Ignore additional fields (for forward compatibility).
        Rest []rlp.RawValue `rlp:"tail"`
    }

    // findnode is a query for nodes close to the given target.
    findnode struct {
        Target     NodeID // doesn't need to be an actual public key
        Expiration uint64
        // Ignore additional fields (for forward compatibility).
        Rest []rlp.RawValue `rlp:"tail"`
    }

    // reply to findnode
    neighbors struct {
        Nodes      []rpcNode
        Expiration uint64
        // Ignore additional fields (for forward compatibility).
        Rest []rlp.RawValue `rlp:"tail"`
    }