From: Peter Zijlstra Subject: [PATCH 22/31] netvm: skb processing Patch-mainline: Not yet In order to make sure emergency packets receive all memory needed to proceed ensure processing of emergency SKBs happens under PF_MEMALLOC. Use the (new) sk_backlog_rcv() wrapper to ensure this for backlog processing. Skip taps, since those are user-space again. Signed-off-by: Jiri Slaby [lock imbalance fix] Signed-off-by: Peter Zijlstra Signed-off-by: Suresh Jayaraman --- include/net/sock.h | 5 ++++ net/core/dev.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++---- net/core/sock.c | 16 +++++++++++++++ 3 files changed, 72 insertions(+), 4 deletions(-) --- a/include/net/sock.h +++ b/include/net/sock.h @@ -682,8 +682,13 @@ static inline __must_check int sk_add_ba return 0; } +extern int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); + static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { + if (skb_emergency(skb)) + return __sk_backlog_rcv(sk, skb); + return sk->sk_backlog_rcv(sk, skb); } --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2814,6 +2814,30 @@ int __skb_bond_should_drop(struct sk_buf } EXPORT_SYMBOL(__skb_bond_should_drop); +/* + * Filter the protocols for which the reserves are adequate. + * + * Before adding a protocol make sure that it is either covered by the existing + * reserves, or add reserves covering the memory need of the new protocol's + * packet processing. + */ +static int skb_emergency_protocol(struct sk_buff *skb) +{ + if (skb_emergency(skb)) + switch (skb->protocol) { + case __constant_htons(ETH_P_ARP): + case __constant_htons(ETH_P_IP): + case __constant_htons(ETH_P_IPV6): + case __constant_htons(ETH_P_8021Q): + break; + + default: + return 0; + } + + return 1; +} + static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; @@ -2824,6 +2848,7 @@ static int __netif_receive_skb(struct sk struct net_device *orig_or_bond; int ret = NET_RX_DROP; __be16 type; + unsigned long pflags = current->flags; if (!netdev_tstamp_prequeue) net_timestamp_check(skb); @@ -2831,9 +2856,21 @@ static int __netif_receive_skb(struct sk trace_netif_receive_skb(skb); + /* Emergency skb are special, they should + * - be delivered to SOCK_MEMALLOC sockets only + * - stay away from userspace + * - have bounded memory usage + * + * Use PF_MEMALLOC as a poor mans memory pool - the grouping kind. + * This saves us from propagating the allocation context down to all + * allocation sites. + */ + if (skb_emergency(skb)) + current->flags |= PF_MEMALLOC; + /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) - return NET_RX_DROP; + goto out; if (!skb->skb_iif) skb->skb_iif = skb->dev->ifindex; @@ -2875,6 +2912,9 @@ static int __netif_receive_skb(struct sk } #endif + if (skb_emergency(skb)) + goto skip_taps; + list_for_each_entry_rcu(ptype, &ptype_all, list) { if (ptype->dev == null_or_orig || ptype->dev == skb->dev || ptype->dev == orig_dev) { @@ -2884,13 +2924,17 @@ static int __netif_receive_skb(struct sk } } +skip_taps: #ifdef CONFIG_NET_CLS_ACT skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto out; + goto unlock; ncls: #endif + if (!skb_emergency_protocol(skb)) + goto drop; + /* Handle special case of bridge or macvlan */ rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { @@ -2900,7 +2944,7 @@ ncls: } skb = rx_handler(skb); if (!skb) - goto out; + goto unlock; } if (vlan_tx_tag_present(skb)) { @@ -2930,6 +2974,7 @@ ncls: if (pt_prev) { ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { +drop: atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); /* Jamal, now you will not able to escape explaining @@ -2937,8 +2982,10 @@ ncls: ret = NET_RX_DROP; } -out: +unlock: rcu_read_unlock(); +out: + tsk_restore_flags(current, pflags, PF_MEMALLOC); return ret; } --- a/net/core/sock.c +++ b/net/core/sock.c @@ -322,6 +322,22 @@ int sk_clear_memalloc(struct sock *sk) return set; } EXPORT_SYMBOL_GPL(sk_clear_memalloc); + +int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + int ret; + unsigned long pflags = current->flags; + + /* these should have been dropped before queueing */ + BUG_ON(!sk_has_memalloc(sk)); + + current->flags |= PF_MEMALLOC; + ret = sk->sk_backlog_rcv(sk, skb); + tsk_restore_flags(current, pflags, PF_MEMALLOC); + + return ret; +} +EXPORT_SYMBOL(__sk_backlog_rcv); #endif static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)