veth虚拟网卡
veth是kernel提供的一种虚拟网卡,总是成对出现。在一端发送数据,就可以在另一端接收到,类似一根网线。那么它是如何实现的呢?今天就看一下它的实现。
veth创建
可以使用ip命令创建,如下。
[root@localhost ~]# ip link add vetha type veth peer name vethb
在kernel端需要提前加载veth module,如下,这个module做的事情很简单,就是注册一个 rtnl_link_ops
[root@localhost ~]# modprobe veth
[root@localhost ~]# lsmod | grep veth
veth 13410 0
//veth.ko 初始化
#define DRV_NAME "veth"
static struct rtnl_link_ops veth_link_ops = {
.kind = DRV_NAME,
.priv_size = sizeof(struct veth_priv),
.setup = veth_setup,
.validate = veth_validate,
.newlink = veth_newlink,
.dellink = veth_dellink,
.policy = veth_policy,
.maxtype = VETH_INFO_MAX,
};
/*
* init/fini
*/
static __init int veth_init(void)
{
return rtnl_link_register(&veth_link_ops);
}
int rtnl_link_register(struct rtnl_link_ops *ops)
{
int err;
rtnl_lock();
err = __rtnl_link_register(ops);
rtnl_unlock();
return err;
}
int __rtnl_link_register(struct rtnl_link_ops *ops)
{
if (rtnl_link_ops_get(ops->kind))
return -EEXIST;
/* The check for setup is here because if ops
* does not have that filled up, it is not possible
* to use the ops for creating device. So do not
* fill up dellink as well. That disables rtnl_dellink.
*/
if (ops->setup && !ops->dellink)
ops->dellink = unregister_netdevice_queue;
list_add_tail(&ops->list, &link_ops);
return 0;
}
通过命令 ip link add vetha type veth peer name vethb 创建veth时,在kernel中调用rtnl_newlink,会根据传入的type查找rtnl_link_ops,再调用rtnl_link_ops的newlink创建veth的peer,并将两端veth分别放在对方的私有数据中。
rtnl_newlink
if (linkinfo[IFLA_INFO_KIND]) {
nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
ops = rtnl_link_ops_get(kind);
struct net_device *dev;
dev = rtnl_create_link(dest_net, ifname, name_assign_type, ops, tb);
if (ops->newlink) {
err = ops->newlink(net, dev, tb, data);
static int veth_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
struct net_device *peer;
peer = rtnl_create_link(net, ifname, name_assign_type,
&veth_link_ops, tbp);
register_netdevice(peer);
register_netdevice(dev);
/*
* tie the deviced together
*/
//这里是关键,将peer放在dev的priv中,将dev放在peer的priv中,
//这样将这两个虚拟设备绑定到一起。
priv = netdev_priv(dev);
rcu_assign_pointer(priv->peer, peer);
priv = netdev_priv(peer);
rcu_assign_pointer(priv->peer, dev);
static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
//取出peer设备
struct veth_priv *priv = netdev_priv(dev);
rcv = rcu_dereference(priv->peer);
//调用 dev_forward_skb 发送数据时,已经换成peer设备rcv了
//相当于 peer 设备的接收
if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
u64_stats_update_begin(&stats->syncp);
stats->bytes += length;
stats->packets++;
u64_stats_update_end(&stats->syncp);
} else {
drop:
atomic64_inc(&priv->dropped);
}
rcu_read_unlock();
return NETDEV_TX_OK;
}
//如果报文可转发,则调用netif_rx_internal将报文送入主机协议栈
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{
return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
}
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{
if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
atomic_long_inc(&dev->rx_dropped);
kfree_skb(skb);
return NET_RX_DROP;
}
}
if (unlikely(!is_skb_forwardable(dev, skb))) {
atomic_long_inc(&dev->rx_dropped);
kfree_skb(skb);
return NET_RX_DROP;
}
skb_scrub_packet(skb, true);
skb->protocol = eth_type_trans(skb, dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
return 0;
}
//根据目的mac设备 pkt_type
__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
{
unsigned short _service_access_point;
const unsigned short *sap;
const struct ethhdr *eth;
skb->dev = dev;
skb_reset_mac_header(skb);
skb_pull_inline(skb, ETH_HLEN);
eth = eth_hdr(skb);
if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
skb->pkt_type = PACKET_BROADCAST;
else
skb->pkt_type = PACKET_MULTICAST;
}
else if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
dev->dev_addr)))
skb->pkt_type = PACKET_OTHERHOST;
....
}
veth使用
我见过的使用方法有两种,如下
a. 一端在 root namespace,另一端放在其他 namespace,连接两个namespace,比如在k8s中,calico, cilium和ovs这些cni都是如此实现的。
b.两端分别放在两个网桥上,连接网桥。
第二种情况比较简单,重点说一下第一种情况遇到的几个问题。
实验步骤如下,
创建一对veth口,vetha和vethb,
创建一个namespace test,
将vetha放入namespace test,
将vetha和vethb都up起来,
给vetha配置ip 1.1.1.2
[root@localhost ~]# ip link add vetha type veth peer name vethb
[root@localhost ~]# ip link set dev vethb up
[root@localhost ~]# ip netns add test
[root@localhost ~]# ip link set dev vetha netns test
[root@localhost ~]# ip netns exec test ip link set dev vetha up
[root@localhost ~]# ip netns exec test ip address add dev vetha 1.1.1.2/24
[root@localhost ~]# ip netns exec test ip a
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN qlen 1
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
30: vetha@if29: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP qlen 1000
link/ether 96:7f:a6:ea:93:23 brd ff:ff:ff:ff:ff:ff link-netnsid 0
inet 1.1.1.2/24 scope global vetha
valid_lft forever preferred_lft forever
inet6 fe80::74ef:e3ff:fe5d:2db0/64 scope link
valid_lft forever preferred_lft forever
[root@localhost ~]# ip netns exec test ip r
1.1.1.0/24 dev vetha proto kernel scope link src 1.1.1.2
[root@localhost ~]# ip netns exec test arp -n
Address HWtype HWaddress Flags Mask Iface
1.1.1.4 (incomplete) vetha
[root@localhost ~]# ifconfig vethb
vethb: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet6 fe80::947f:a6ff:feea:9321 prefixlen 64 scopeid 0x20<link>
ether 96:7f:a6:ea:93:21 txqueuelen 1000 (Ethernet)
RX packets 208784 bytes 12836250 (12.2 MiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 1143 bytes 62469 (61.0 KiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
问题1:如果在test namespace中,设置一个静态arp,ping一个不存在的地址1.1.1.4
[root@localhost ~]# ip netns exec test arp -s 1.1.1.4 00:00:00:00:00:01
[root@localhost ~]# ip netns exec test arp -n
Address HWtype HWaddress Flags Mask Iface
1.1.1.4 ether 00:00:00:00:00:01 CM vetha
icmp报文到达vethb设备后,走host的协议栈,匹配到host的默认路由表,应该会从em1发出去,但是结果是vethb可以抓到icmp报文,em1抓不到。
[root@localhost ~]# ip r
default via 10.164.129.1 dev em1 proto static metric 100
10.10.10.0/24 dev gre10 proto kernel scope link src 10.10.10.1
10.164.129.0/24 dev em1 proto kernel scope link src 10.164.129.16 metric 100
[root@localhost ~]# tcpdump -vne -i vethb icmp
tcpdump: listening on vethb, link-type EN10MB (Ethernet), capture size 262144 bytes
08:04:49.386991 f6:d4:d2:de:20:be > 00:00:00:00:00:01, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 64, id 48297, offset 0, flags [DF], proto ICMP (1), length 84)
1.1.1.2 > 1.1.1.4: ICMP echo request, id 30782, seq 21, length 64
08:04:50.386985 f6:d4:d2:de:20:be > 00:00:00:00:00:01, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 64, id 48427, offset 0, flags [DF], proto ICMP (1), length 84)
1.1.1.2 > 1.1.1.4: ICMP echo request, id 30782, seq 22, length 64
^C
2 packets captured
2 packets received by filter
0 packets dropped by kernel
[root@localhost ~]# tcpdump -vne -i em1 icmp
tcpdump: listening on em1, link-type EN10MB (Ethernet), capture size 262144 bytes
^C
0 packets captured
0 packets received by filter
0 packets dropped by kernel
原因是icmp报文的目的mac为00:00:00:00:00:01,而在vethb收到此报文后,在函数eth_type_trans中会根据目的mac给skb->pkt_type赋值,因为目的mac不为vethb的mac,所以skb->pkt_type被设置成PACKET_OTHERHOST。
veth_xmit ->dev_forward_skb -> __dev_forward_skb -> eth_type_trans
if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
skb->pkt_type = PACKET_BROADCAST;
else
skb->pkt_type = PACKET_MULTICAST;
}
else if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
dev->dev_addr)))
skb->pkt_type = PACKET_OTHERHOST;
随后将报文送入主机协议栈,在ip_rcv中有个判断如果skb->pkt_type == PACKET_OTHERHOST就直接drop报文,比较恶心的是,这个drop没有统计信息可看。
看来得在test namespace中将1.1.1.4对应的mac设置为vethb的mac地址。
netif_rx_internal -> enqueue_to_backlog -> process_backlog ->__netif_receive_skb -> __netif_receive_skb_core -> ip_rcv
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
const struct iphdr *iph;
u32 len;
/* When the interface is in promisc. mode, drop all the crap
* that it receives, do not try to analyse it.
*/
if (skb->pkt_type == PACKET_OTHERHOST)
goto drop;
drop:
kfree_skb(skb);
out:
return NET_RX_DROP;
}
问题2: 如下,将1.1.1.4对应的mac修改为vethb的mac了,但是仍然有问题,vethb可以收到,em1收不到。
[root@localhost ~]# ip link show dev vethb
33: vethb@if34: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT qlen 1000
link/ether 96:ec:6a:a8:67:ed brd ff:ff:ff:ff:ff:ff link-netnsid 0
[root@localhost ~]# ip netns exec test arp -d 1.1.1.4
[root@localhost ~]# ip netns exec test arp -s 1.1.1.4 96:ec:6a:a8:67:ed
[root@localhost ~]# ip netns exec test arp -n
Address HWtype HWaddress Flags Mask Iface
1.1.1.4 ether 96:ec:6a:a8:67:ed CM vetha
[root@localhost ~]# ip link show dev vethb
33: vethb@if34: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT qlen 1000
link/ether 96:ec:6a:a8:67:ed brd ff:ff:ff:ff:ff:ff link-netnsid 0
[root@localhost ~]# tcpdump -vne -i vethb icmp
tcpdump: listening on vethb, link-type EN10MB (Ethernet), capture size 262144 bytes
08:15:21.495979 f6:d4:d2:de:20:be > 96:ec:6a:a8:67:ed, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 64, id 23666, offset 0, flags [DF], proto ICMP (1), length 84)
1.1.1.2 > 1.1.1.4: ICMP echo request, id 31770, seq 32, length 64
^C
1 packet captured
1 packet received by filter
0 packets dropped by kernel
[root@localhost ~]# tcpdump -vne -i em1 icmp
tcpdump: listening on em1, link-type EN10MB (Ethernet), capture size 262144 bytes
^C
0 packets captured
0 packets received by filter
0 packets dropped by kernel
问题3: 这又是另一个问题了,在调用ip_route_input_noref查找路由时,虽然可以匹配到默认路由,但是因为 vethb 没有开启forward功能,所以仍然会失败。
ip_route_input_noref -> ip_route_input_slow
fl4.flowi4_oif = 0;
fl4.flowi4_iif = dev->ifindex;
fl4.flowi4_mark = skb->mark;
fl4.flowi4_tos = tos;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.daddr = daddr;
fl4.saddr = saddr;
err = fib_lookup(net, &fl4, &res);
if (!IN_DEV_FORWARD(in_dev)) {
err = -EHOSTUNREACH;
goto no_route;
}
no_route:
RT_CACHE_STAT_INC(in_no_route);
res.type = RTN_UNREACHABLE;
res.fi = NULL;
可以通过下面的命令查看丢包计数 in_no_route
cat /proc/net/stat/rt_cache | awk -F " " '{print $5}'
接下来使能vethb的forwarding试试看
[root@localhost ~]# echo 1 > /proc/sys/net/ipv4/conf/vethb/forwarding
[root@localhost ~]# cat /proc/sys/net/ipv4/conf/vethb/forwarding
1
注意:如果 /proc/sys/net/ipv4/conf/all/forwarding 使能了,则新创建的网卡的forwarding 功能都会默认被使能。
问题4: 再次ping还是不行,这是由于反向路径检查失败导致的。会调用fib_validate_source使用报文的源ip作为目的查找路由表,只能匹配到默认路由,因为默认路由出接口和报文入接口不是同一个,所以判断失败。收发同一个报文应该是同一个设备,这称为对称路由。
ip_route_input_slow -> __mkroute_input
/* Ignore rp_filter for packets protected by IPsec. */
err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
in_dev->dev, in_dev, &itag);
if (err < 0) {
ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
saddr);
goto cleanup;
}
int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
u8 tos, int oif, struct net_device *dev,
struct in_device *idev, u32 *itag)
{
int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
//反向路径检查开关,为0就不做检查
if (!r && !fib_num_tclassid_users(dev_net(dev)) &&
IN_DEV_ACCEPT_LOCAL(idev) &&
(dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
*itag = 0;
return 0;
}
return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag);
}
static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
u8 tos, int oif, struct net_device *dev,
int rpf, struct in_device *idev, u32 *itag)
{
int ret, no_addr;
struct fib_result res;
struct flowi4 fl4;
struct net *net;
bool dev_match;
fl4.flowi4_oif = 0;
fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
fl4.daddr = src;
fl4.saddr = dst;
fl4.flowi4_tos = tos;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
no_addr = idev->ifa_list == NULL;
fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
net = dev_net(dev);
if (fib_lookup(net, &fl4, &res))
goto last_resort;
if (res.type != RTN_UNICAST &&
(res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
goto e_inval;
if (!rpf && !fib_num_tclassid_users(dev_net(dev)) &&
(dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev)))
goto last_resort;
fib_combine_itag(itag, &res);
dev_match = false;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
for (ret = 0; ret < res.fi->fib_nhs; ret++) {
struct fib_nh *nh = &res.fi->fib_nh[ret];
if (nh->nh_dev == dev) {
dev_match = true;
break;
}
}
#else
//如果路由中的出接口是入接口才会成功。否则就是反向路径检查失败。
if (FIB_RES_DEV(res) == dev)
dev_match = true;
#endif
if (dev_match) {
ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
return ret;
}
if (no_addr)
goto last_resort;
if (rpf == 1)
goto e_rpf;
fl4.flowi4_oif = dev->ifindex;
ret = 0;
if (fib_lookup(net, &fl4, &res) == 0) {
if (res.type == RTN_UNICAST)
ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
}
return ret;
last_resort:
if (rpf)
goto e_rpf;
*itag = 0;
return 0;
e_inval:
return -EINVAL;
e_rpf:
return -EXDEV;
}
可以通过下面的命令查看drop计数 in_martian_src
[root@localhost ~]# cat /proc/net/stat/rt_cache | awk -F " " '{print $8}'
这个问题的解决办法有两个
a. 添加对称路由
b. 关闭反向路径检查
a. 添加对称路由,如下添加到test namespace路由后,icmp报文可以从em1发出去了
[root@localhost ~]# ip route add 1.1.1.2 dev vethb
[root@localhost ~]# ip r
default via 10.164.129.1 dev em1 proto static metric 100
1.1.1.2 dev vethb scope link
10.10.10.0/24 dev gre10 proto kernel scope link src 10.10.10.1
10.164.129.0/24 dev em1 proto kernel scope link src 10.164.129.16 metric 100
169.254.0.0/16 dev provisioning_nw scope link metric 1016
169.254.0.0/16 dev idrac_nw scope link metric 1017
192.168.0.0/24 dev provisioning_nw proto kernel scope link src 192.168.0.253
192.168.10.0/24 dev idrac_nw proto kernel scope link src 192.168.10.13
192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1
[root@localhost ~]# tcpdump -vne -i em1 icmp or arp
tcpdump: listening on em1, link-type EN10MB (Ethernet), capture size 262144 bytes
08:56:10.509045 90:b1:1c:55:37:1e > 00:00:0c:07:ac:02, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 63, id 28970, offset 0, flags [DF], proto ICMP (1), length 84)
1.1.1.2 > 1.1.1.4: ICMP echo request, id 1009, seq 1664, length 64
08:56:11.509051 90:b1:1c:55:37:1e > 00:00:0c:07:ac:02, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 63, id 29200, offset 0, flags [DF], proto ICMP (1), length 84)
1.1.1.2 > 1.1.1.4: ICMP echo request, id 1009, seq 1665, length 64
b. 关闭反向路径检查
rp_filter取决于设备和all的最大值,所以必须把设备和all的rp_filter都关闭
#define IN_DEV_RPFILTER(in_dev) IN_DEV_MAXCONF((in_dev), RP_FILTER)
#define IN_DEV_MAXCONF(in_dev, attr) \
(max(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr), \
IN_DEV_CONF_GET((in_dev), attr)))
[root@localhost ~]# ip route del 1.1.1.2 dev vethb
[root@localhost ~]# echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter
[root@localhost ~]# echo 0 > /proc/sys/net/ipv4/conf/vethb/rp_filter
[root@localhost ~]# tcpdump -vne -i em1 icmp or arp
tcpdump: listening on em1, link-type EN10MB (Ethernet), capture size 262144 bytes
09:01:22.555047 90:b1:1c:55:37:1e > 00:00:0c:07:ac:02, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 63, id 58344, offset 0, flags [DF], proto ICMP (1), length 84)
1.1.1.2 > 1.1.1.4: ICMP echo request, id 1009, seq 1976, length 64
09:01:23.555046 90:b1:1c:55:37:1e > 00:00:0c:07:ac:02, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 63, id 58481, offset 0, flags [DF], proto ICMP (1), length 84)
1.1.1.2 > 1.1.1.4: ICMP echo request, id 1009, seq 1977, length 64
proxy_arp
到这里icmp报文算是成功发出去了,但是test namespace中1.1.1.4的mac地址是手动设置的,不太灵活,可以使用设备的 proxy_arp 功能。
//使能 proxy_arp
[root@localhost ~]# echo 1 > /proc/sys/net/ipv4/conf/vethb/proxy_arp
[root@localhost ~]# taskset -c 3 ip netns exec test ping 1.1.1.4
PING 1.1.1.4 (1.1.1.4) 56(84) bytes of data.
^C
--- 1.1.1.4 ping statistics ---
1 packets transmitted, 0 received, 100% packet loss, time 0ms
//学到了vethb的mac地址
[root@localhost ~]# ip netns exec test arp -n
Address HWtype HWaddress Flags Mask Iface
1.1.1.4 ether 96:ec:6a:a8:67:ed C vetha
但是在 arp 处理过程中,也会查找路由,反向路径检查等流程,所以上面的问题也都会遇到,按照上面的设置一下就行。
arp_process
if (arp->ar_op == htons(ARPOP_REQUEST) &&
ip_route_input_noref(skb, tip, sip, 0, dev) == 0) {
rt = skb_rtable(skb);
addr_type = rt->rt_type;
if (addr_type == RTN_LOCAL) {
...
} else if (IN_DEV_FORWARD(in_dev)) {
if (addr_type == RTN_UNICAST &&
(arp_fwd_proxy(in_dev, dev, rt) ||
arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
(rt->dst.dev != dev &&
pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) {
n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
if (n)
neigh_release(n);
if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
skb->pkt_type == PACKET_HOST ||
NEIGH_VAR(in_dev->arp_parms, PROXY_DELAY) == 0) {
arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
dev, tip, sha, dev->dev_addr,
sha);
} else {
pneigh_enqueue(&arp_tbl,
in_dev->arp_parms, skb);
return 0;
}
goto out;
}
总结
a. 如果veth一端在其他namespace,另一端在root namespace,并没有被加入到网桥,则test namespace中通过veth发送的报文的目的mac如果是单播的,则必须是veth的peer设备的mac。可以在test namespace中静态配置或者设置代理arp
b. 为了通过反向路径检查,可以关闭反向路径检查或者设置对称路由。
c. 必须使能设备的 forwarding 功能。
用到的命令如下,其实在k8s的calico cni网络中,基本上就是下面的几个设置。
echo 1 > /proc/sys/net/ipv4/conf/vethb/proxy_arp
echo 1 > /proc/sys/net/ipv4/conf/vethb/forwarding
echo 0 > /proc/sys/net/ipv4/conf/vethb/rp_filter
ip route add 1.1.1.2 dev vethb