Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove conntrack lookups #67

Merged
merged 2 commits into from
Jul 22, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 28 additions & 82 deletions src/glb-redirect/ipt_GLBREDIRECT.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,16 @@ struct glbgue_stats {
__u64 accepted_syn_packets;
__u64 accepted_last_resort_packets;
__u64 accepted_established_packets;
__u64 accepted_conntracked_packets;
__u64 accepted_syn_cookie_packets;
__u64 forwarded_to_self_packets;
__u64 forwarded_to_alternate_packets;
struct u64_stats_sync syncp;
};

#if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)
# error glb-redirect requires at least v4.4
#endif

struct glbgue_stats __percpu *percpu_stats;

static unsigned int is_valid_locally(struct net *net, struct sk_buff *skb, int inner_ip_ofs, struct iphdr *iph_v4, struct ipv6hdr *iph_v6, struct tcphdr *th);
Expand All @@ -94,22 +97,13 @@ static unsigned int glbredirect_send_forwarded_skb(struct net *net, struct sk_bu
nf_reset(skb);
skb_forward_csum(skb);

if (ip_route_me_harder(
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0)
net,
#endif
skb, RTN_UNSPEC)) {
if (ip_route_me_harder(net, skb, RTN_UNSPEC)) {
kfree_skb(skb);
return NF_STOLEN;
}

PRINT_DEBUG(KERN_ERR " -> forwarded to alternate\n");
ip_local_out(
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0)
net,
skb->sk,
#endif
skb);
ip_local_out(net, skb->sk, skb);

u64_stats_update_begin(&s->syncp);
s->forwarded_to_alternate_packets++;
Expand Down Expand Up @@ -444,10 +438,8 @@ glbredirect_tg4(struct sk_buff *skb, const struct xt_action_param *par)

#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)
struct net *net = xt_net(par);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0)
struct net *net = par->net;
#else
struct net *net = dev_net(skb->dev);
struct net *net = par->net;
#endif

u64_stats_update_begin(&s->syncp);
Expand Down Expand Up @@ -562,68 +554,9 @@ static unsigned int is_valid_locally(struct net *net, struct sk_buff *skb, int i
}
}

PRINT_DEBUG(KERN_ERR " -> checking conntrack for SYN_RECV\n");

/* If we're not ESTABLISHED yet, check conntrack for a SYN_RECV.
* When syncookies aren't enabled, this will let ACKs come in to complete
* a connection.
* Only do this if we know the offset of the inner IP header (so don't
* check ICMP Packet Too Big).
*/
if (likely(inner_ip_ofs > 0)) {
const struct nf_conntrack_tuple_hash *thash;
struct nf_conntrack_tuple tuple;
struct nf_conn *ct;

int ip_proto_ver = NFPROTO_IPV4;
if (iph_v6 != NULL) {
ip_proto_ver = NFPROTO_IPV6;
}

if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb) + inner_ip_ofs, ip_proto_ver,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0)
net,
#endif
&tuple))
goto no_ct_entry;

rcu_read_lock();
/* from now on no_ct_entry_unlock should be used to ensure we release this lock */

thash = nf_conntrack_find_get(net,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)
&nf_ct_zone_dflt,
#else
NF_CT_DEFAULT_ZONE,
#endif
&tuple);
if (thash == NULL)
goto no_ct_entry_unlock;

ct = nf_ct_tuplehash_to_ctrack(thash);
if (ct == NULL)
goto no_ct_entry_unlock;

if (!nf_ct_is_dying(ct) && nf_ct_tuple_equal(&tuple, &thash->tuple)) {
u64_stats_update_begin(&s->syncp);
s->accepted_conntracked_packets++;
u64_stats_update_end(&s->syncp);

nf_ct_put(ct);
rcu_read_unlock();
return 1;
}

nf_ct_put(ct);
no_ct_entry_unlock:
rcu_read_unlock();
}

no_ct_entry:

PRINT_DEBUG(KERN_ERR " -> checking for syncookie\n");

/* Last chance, if syncookies are enabled, then a valid syncookie ACK is also acceptable */
/* If syncookies are enabled, then a valid syncookie ACK is also acceptable */
if (th->ack && !th->fin && !th->rst && !th->syn) {
struct sock *listen_sk;
int ret = 0;
Expand All @@ -635,9 +568,7 @@ static unsigned int is_valid_locally(struct net *net, struct sk_buff *skb, int i
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0)
skb, ip_hdrlen(skb) + __tcp_hdrlen(th),
#endif
#if LINUX_VERSION_CODE > KERNEL_VERSION(3,9,0)
iph_v4->saddr, th->source,
#endif
iph_v4->daddr, th->dest,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0)
inet_iif(skb), 0);
Expand Down Expand Up @@ -677,9 +608,7 @@ static unsigned int is_valid_locally(struct net *net, struct sk_buff *skb, int i
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0)
skb, ip_hdrlen(skb) + __tcp_hdrlen(th),
#endif
#if LINUX_VERSION_CODE > KERNEL_VERSION(3,9,0)
&iph_v6->saddr, th->source,
#endif
&iph_v6->daddr, th->dest,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0)
inet_iif(skb), 0);
Expand Down Expand Up @@ -723,6 +652,26 @@ static unsigned int is_valid_locally(struct net *net, struct sk_buff *skb, int i
return ret;
}

/* There is a false-negative here, which we can't avoid.
* If a TCP socket with non-empty receive buffers is close()'d
* by user space, the kernel sends a RST and immediately transitions
* the sk to TCP_CLOSE state. This leads to the sk being unhashed,
* at which point it's impossible for us to find the original sk.
*
* https://elixir.bootlin.com/linux/v4.19.50/source/net/ipv4/tcp.c#L2365
* https://elixir.bootlin.com/linux/v4.19.50/source/net/ipv4/tcp.c#L2232
*
* This means that subsequent packets from the client will be accepted
* on the last hop instead of on the machine that held the original
* connection. The last hop then responds with RST, since it doesn't
* know the connection. Luckily for us, these RST are identical to
* the ones this machine would end up generating, since they are
* entirely based on the incoming packet. The behaviour from the POV
* of the client is therefore consistent, even though the wrong
* machine ends up generating the RST.
*
* https://elixir.bootlin.com/linux/v4.19.50/source/net/ipv4/tcp_ipv4.c#L1844
*/
return 0;
}

Expand Down Expand Up @@ -790,7 +739,6 @@ static int proc_show(struct seq_file *m, void *v)
tmp.accepted_syn_packets = s->accepted_syn_packets;
tmp.accepted_last_resort_packets = s->accepted_last_resort_packets;
tmp.accepted_established_packets = s->accepted_established_packets;
tmp.accepted_conntracked_packets = s->accepted_conntracked_packets;
tmp.accepted_syn_cookie_packets = s->accepted_syn_cookie_packets;
tmp.forwarded_to_self_packets = s->forwarded_to_self_packets;
tmp.forwarded_to_alternate_packets = s->forwarded_to_alternate_packets;
Expand All @@ -800,7 +748,6 @@ static int proc_show(struct seq_file *m, void *v)
sum.accepted_syn_packets += tmp.accepted_syn_packets;
sum.accepted_last_resort_packets += tmp.accepted_last_resort_packets;
sum.accepted_established_packets += tmp.accepted_established_packets;
sum.accepted_conntracked_packets += tmp.accepted_conntracked_packets;
sum.accepted_syn_cookie_packets += tmp.accepted_syn_cookie_packets;
sum.forwarded_to_self_packets += tmp.forwarded_to_self_packets;
sum.forwarded_to_alternate_packets += tmp.forwarded_to_alternate_packets;
Expand All @@ -810,7 +757,6 @@ static int proc_show(struct seq_file *m, void *v)
seq_printf(m, "accepted_syn_packets: %llu\n", sum.accepted_syn_packets);
seq_printf(m, "accepted_last_resort_packets: %llu\n", sum.accepted_last_resort_packets);
seq_printf(m, "accepted_established_packets: %llu\n", sum.accepted_established_packets);
seq_printf(m, "accepted_conntracked_packets: %llu\n", sum.accepted_conntracked_packets);
seq_printf(m, "accepted_syn_cookie_packets: %llu\n", sum.accepted_syn_cookie_packets);
seq_printf(m, "forwarded_to_self_packets: %llu\n", sum.forwarded_to_self_packets);
seq_printf(m, "forwarded_to_alternate_packets: %llu\n", sum.forwarded_to_alternate_packets);
Expand Down