Skip to content

Commit 8223967

Browse files
author
Alexei Starovoitov
committed
Merge branch 'fix-lpm-map'
Yonghong Song says: ==================== A kernel page fault which happens in lpm map trie_get_next_key is reported by syzbot and Eric. The issue was introduced by commit b471f2f ("bpf: implement MAP_GET_NEXT_KEY command for LPM_TRIE map"). Patch #1 fixed the issue in the kernel and patch #2 adds a multithreaded test case in tools/testing/selftests/bpf/test_lpm_map. ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2 parents 1651e39 + af32efe commit 8223967

File tree

3 files changed

+107
-16
lines changed

3 files changed

+107
-16
lines changed

kernel/bpf/lpm_trie.c

+11-15
Original file line numberDiff line numberDiff line change
@@ -593,11 +593,10 @@ static void trie_free(struct bpf_map *map)
593593

594594
static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
595595
{
596+
struct lpm_trie_node *node, *next_node = NULL, *parent, *search_root;
596597
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
597598
struct bpf_lpm_trie_key *key = _key, *next_key = _next_key;
598-
struct lpm_trie_node *node, *next_node = NULL, *parent;
599599
struct lpm_trie_node **node_stack = NULL;
600-
struct lpm_trie_node __rcu **root;
601600
int err = 0, stack_ptr = -1;
602601
unsigned int next_bit;
603602
size_t matchlen;
@@ -614,22 +613,21 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
614613
*/
615614

616615
/* Empty trie */
617-
if (!rcu_dereference(trie->root))
616+
search_root = rcu_dereference(trie->root);
617+
if (!search_root)
618618
return -ENOENT;
619619

620620
/* For invalid key, find the leftmost node in the trie */
621-
if (!key || key->prefixlen > trie->max_prefixlen) {
622-
root = &trie->root;
621+
if (!key || key->prefixlen > trie->max_prefixlen)
623622
goto find_leftmost;
624-
}
625623

626624
node_stack = kmalloc(trie->max_prefixlen * sizeof(struct lpm_trie_node *),
627625
GFP_ATOMIC | __GFP_NOWARN);
628626
if (!node_stack)
629627
return -ENOMEM;
630628

631629
/* Try to find the exact node for the given key */
632-
for (node = rcu_dereference(trie->root); node;) {
630+
for (node = search_root; node;) {
633631
node_stack[++stack_ptr] = node;
634632
matchlen = longest_prefix_match(trie, node, key);
635633
if (node->prefixlen != matchlen ||
@@ -640,21 +638,19 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
640638
node = rcu_dereference(node->child[next_bit]);
641639
}
642640
if (!node || node->prefixlen != key->prefixlen ||
643-
(node->flags & LPM_TREE_NODE_FLAG_IM)) {
644-
root = &trie->root;
641+
(node->flags & LPM_TREE_NODE_FLAG_IM))
645642
goto find_leftmost;
646-
}
647643

648644
/* The node with the exactly-matching key has been found,
649645
* find the first node in postorder after the matched node.
650646
*/
651647
node = node_stack[stack_ptr];
652648
while (stack_ptr > 0) {
653649
parent = node_stack[stack_ptr - 1];
654-
if (rcu_dereference(parent->child[0]) == node &&
655-
rcu_dereference(parent->child[1])) {
656-
root = &parent->child[1];
657-
goto find_leftmost;
650+
if (rcu_dereference(parent->child[0]) == node) {
651+
search_root = rcu_dereference(parent->child[1]);
652+
if (search_root)
653+
goto find_leftmost;
658654
}
659655
if (!(parent->flags & LPM_TREE_NODE_FLAG_IM)) {
660656
next_node = parent;
@@ -673,7 +669,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
673669
/* Find the leftmost non-intermediate node, all intermediate nodes
674670
* have exact two children, so this function will never return NULL.
675671
*/
676-
for (node = rcu_dereference(*root); node;) {
672+
for (node = search_root; node;) {
677673
if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
678674
next_node = node;
679675
node = rcu_dereference(node->child[0]);

tools/testing/selftests/bpf/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ ifneq ($(wildcard $(GENHDR)),)
1111
endif
1212

1313
CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
14-
LDLIBS += -lcap -lelf -lrt
14+
LDLIBS += -lcap -lelf -lrt -lpthread
1515

1616
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
1717
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user

tools/testing/selftests/bpf/test_lpm_map.c

+95
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <errno.h>
1515
#include <inttypes.h>
1616
#include <linux/bpf.h>
17+
#include <pthread.h>
1718
#include <stdio.h>
1819
#include <stdlib.h>
1920
#include <string.h>
@@ -641,6 +642,98 @@ static void test_lpm_get_next_key(void)
641642
close(map_fd);
642643
}
643644

645+
#define MAX_TEST_KEYS 4
646+
struct lpm_mt_test_info {
647+
int cmd; /* 0: update, 1: delete, 2: lookup, 3: get_next_key */
648+
int iter;
649+
int map_fd;
650+
struct {
651+
__u32 prefixlen;
652+
__u32 data;
653+
} key[MAX_TEST_KEYS];
654+
};
655+
656+
static void *lpm_test_command(void *arg)
657+
{
658+
int i, j, ret, iter, key_size;
659+
struct lpm_mt_test_info *info = arg;
660+
struct bpf_lpm_trie_key *key_p;
661+
662+
key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32);
663+
key_p = alloca(key_size);
664+
for (iter = 0; iter < info->iter; iter++)
665+
for (i = 0; i < MAX_TEST_KEYS; i++) {
666+
/* first half of iterations in forward order,
667+
* and second half in backward order.
668+
*/
669+
j = (iter < (info->iter / 2)) ? i : MAX_TEST_KEYS - i - 1;
670+
key_p->prefixlen = info->key[j].prefixlen;
671+
memcpy(key_p->data, &info->key[j].data, sizeof(__u32));
672+
if (info->cmd == 0) {
673+
__u32 value = j;
674+
/* update must succeed */
675+
assert(bpf_map_update_elem(info->map_fd, key_p, &value, 0) == 0);
676+
} else if (info->cmd == 1) {
677+
ret = bpf_map_delete_elem(info->map_fd, key_p);
678+
assert(ret == 0 || errno == ENOENT);
679+
} else if (info->cmd == 2) {
680+
__u32 value;
681+
ret = bpf_map_lookup_elem(info->map_fd, key_p, &value);
682+
assert(ret == 0 || errno == ENOENT);
683+
} else {
684+
struct bpf_lpm_trie_key *next_key_p = alloca(key_size);
685+
ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p);
686+
assert(ret == 0 || errno == ENOENT || errno == ENOMEM);
687+
}
688+
}
689+
690+
// Pass successful exit info back to the main thread
691+
pthread_exit((void *)info);
692+
}
693+
694+
static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd)
695+
{
696+
info->iter = 2000;
697+
info->map_fd = map_fd;
698+
info->key[0].prefixlen = 16;
699+
inet_pton(AF_INET, "192.168.0.0", &info->key[0].data);
700+
info->key[1].prefixlen = 24;
701+
inet_pton(AF_INET, "192.168.0.0", &info->key[1].data);
702+
info->key[2].prefixlen = 24;
703+
inet_pton(AF_INET, "192.168.128.0", &info->key[2].data);
704+
info->key[3].prefixlen = 24;
705+
inet_pton(AF_INET, "192.168.1.0", &info->key[3].data);
706+
}
707+
708+
static void test_lpm_multi_thread(void)
709+
{
710+
struct lpm_mt_test_info info[4];
711+
size_t key_size, value_size;
712+
pthread_t thread_id[4];
713+
int i, map_fd;
714+
void *ret;
715+
716+
/* create a trie */
717+
value_size = sizeof(__u32);
718+
key_size = sizeof(struct bpf_lpm_trie_key) + value_size;
719+
map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size,
720+
100, BPF_F_NO_PREALLOC);
721+
722+
/* create 4 threads to test update, delete, lookup and get_next_key */
723+
setup_lpm_mt_test_info(&info[0], map_fd);
724+
for (i = 0; i < 4; i++) {
725+
if (i != 0)
726+
memcpy(&info[i], &info[0], sizeof(info[i]));
727+
info[i].cmd = i;
728+
assert(pthread_create(&thread_id[i], NULL, &lpm_test_command, &info[i]) == 0);
729+
}
730+
731+
for (i = 0; i < 4; i++)
732+
assert(pthread_join(thread_id[i], &ret) == 0 && ret == (void *)&info[i]);
733+
734+
close(map_fd);
735+
}
736+
644737
int main(void)
645738
{
646739
struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY };
@@ -667,6 +760,8 @@ int main(void)
667760

668761
test_lpm_get_next_key();
669762

763+
test_lpm_multi_thread();
764+
670765
printf("test_lpm: OK\n");
671766
return 0;
672767
}

0 commit comments

Comments
 (0)