Skip to content

Commit

Permalink
UCT/IB: UCT/GGA: filter out unsupported resources from component list…
Browse files Browse the repository at this point in the history
…, CR2
  • Loading branch information
evgeny-leksikov committed Dec 18, 2024
1 parent ca7c76e commit cc9a420
Show file tree
Hide file tree
Showing 7 changed files with 176 additions and 128 deletions.
20 changes: 15 additions & 5 deletions src/uct/ib/base/ib_md.c
Original file line number Diff line number Diff line change
Expand Up @@ -817,9 +817,10 @@ int uct_ib_device_is_accessible(struct ibv_device *device)
return uct_ib_device_is_supported(device);
}

ucs_status_t uct_ib_query_md_resources(uct_component_t *component,
uct_md_resource_desc_t **resources_p,
unsigned *num_resources_p)
ucs_status_t
uct_ib_query_md_resources_with_pred(uct_ib_device_pred_t predicate,
uct_md_resource_desc_t **resources_p,
unsigned *num_resources_p)
{
int num_resources = 0;
uct_md_resource_desc_t *resources;
Expand Down Expand Up @@ -854,8 +855,8 @@ ucs_status_t uct_ib_query_md_resources(uct_component_t *component,
}

for (i = 0; i < num_devices; ++i) {
/* Skip non-existent and non-accessible devices */
if (!uct_ib_device_is_accessible(device_list[i])) {
/* Skip not applicable devices */
if (!predicate(device_list[i])) {
continue;
}

Expand Down Expand Up @@ -1101,6 +1102,15 @@ uct_ib_fork_init(const uct_ib_md_config_t *md_config, int *fork_init_p)
return UCS_OK;
}

static ucs_status_t
uct_ib_query_md_resources(uct_component_t *component,
uct_md_resource_desc_t **resources_p,
unsigned *num_resources_p)
{
return uct_ib_query_md_resources_with_pred(uct_ib_device_is_accessible,
resources_p, num_resources_p);
}

static ucs_status_t
uct_ib_md_open(uct_component_t *component, const char *md_name,
const uct_md_config_t *uct_md_config, uct_md_h *md_p)
Expand Down
13 changes: 10 additions & 3 deletions src/uct/ib/base/ib_md.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@
uct_ib_device_name(&(_md)->dev), ## __VA_ARGS__)


/**
* Predicate function to filter out not applicable devices
*/
typedef int (*uct_ib_device_pred_t)(struct ibv_device *device);


/**
* IB MD statistics counters
*/
Expand Down Expand Up @@ -406,9 +412,10 @@ ucs_status_t uct_ib_rkey_unpack(uct_component_t *component,
const void *rkey_buffer, uct_rkey_t *rkey_p,
void **handle_p);

ucs_status_t uct_ib_query_md_resources(uct_component_t *component,
uct_md_resource_desc_t **resources_p,
unsigned *num_resources_p);
ucs_status_t
uct_ib_query_md_resources_with_pred(uct_ib_device_pred_t predicate,
uct_md_resource_desc_t **resources_p,
unsigned *num_resources_p);

ucs_status_t uct_ib_get_device_by_name(struct ibv_device **ib_device_list,
int num_devices, const char *md_name,
Expand Down
2 changes: 1 addition & 1 deletion src/uct/ib/mlx5/dv/ib_mlx5_dv.c
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ ucs_status_t uct_ib_mlx5_devx_query_ooo_sl_mask(uct_ib_mlx5_md_t *md,

status = uct_ib_mlx5_devx_general_cmd(md->super.dev.ibv_context, in,
sizeof(in), out, sizeof(out),
"QUERY_HCA_VPORT_CONTEXT", 0);
"QUERY_HCA_VPORT_CONTEXT", UCS_NO);
if (status != UCS_OK) {
return status;
}
Expand Down
131 changes: 68 additions & 63 deletions src/uct/ib/mlx5/dv/ib_mlx5dv_md.c
Original file line number Diff line number Diff line change
Expand Up @@ -1645,7 +1645,7 @@ static void uct_ib_mlx5_devx_check_odp(uct_ib_mlx5_md_t *md,
(UCT_IB_MLX5_CAP_ODP << 1));
status = uct_ib_mlx5_devx_general_cmd(md->super.dev.ibv_context, in,
sizeof(in), out, sizeof(out),
"QUERY_HCA_CAP, ODP", 0);
"QUERY_HCA_CAP, ODP", UCS_NO);
if (status != UCS_OK) {
reason = "failed to query HCA capabilities";
goto no_odp;
Expand Down Expand Up @@ -1730,7 +1730,7 @@ uct_ib_mlx5_devx_query_port_select(uct_ib_mlx5_md_t *md)
UCT_IB_MLX5DV_SET(query_lag_in, in, opcode, UCT_IB_MLX5_CMD_OP_QUERY_LAG);
status = uct_ib_mlx5_devx_general_cmd(md->super.dev.ibv_context, in,
sizeof(in), out, sizeof(out),
"QUERY_LAG", 0);
"QUERY_LAG", UCS_NO);
if (status != UCS_OK) {
return UCT_IB_MLX5_LAG_INVALID_MODE;
}
Expand All @@ -1756,7 +1756,7 @@ uct_ib_mlx5_devx_query_lag(uct_ib_mlx5_md_t *md, uint8_t *state)
UCT_IB_MLX5DV_SET(query_lag_in, in, opcode, UCT_IB_MLX5_CMD_OP_QUERY_LAG);
status = uct_ib_mlx5_devx_general_cmd(md->super.dev.ibv_context, in,
sizeof(in), out, sizeof(out),
"QUERY_LAG", 0);
"QUERY_LAG", UCS_NO);
if (status != UCS_OK) {
return status;
}
Expand All @@ -1765,13 +1765,12 @@ uct_ib_mlx5_devx_query_lag(uct_ib_mlx5_md_t *md, uint8_t *state)
return UCS_OK;
}

static struct ibv_context *
uct_ib_mlx5_devx_open_device(struct ibv_device *ibv_device)
struct ibv_context* uct_ib_mlx5_devx_open_device(struct ibv_device *ibv_device)
{
struct mlx5dv_context_attr dv_attr = {};
struct mlx5dv_devx_event_channel *event_channel;
struct mlx5dv_context_attr dv_attr = {
.flags = MLX5DV_CONTEXT_FLAGS_DEVX
};
struct ibv_context *ctx;
struct ibv_cq *cq;

dv_attr.flags |= MLX5DV_CONTEXT_FLAGS_DEVX;
ctx = mlx5dv_open_device(ibv_device, &dv_attr);
Expand All @@ -1781,11 +1780,19 @@ uct_ib_mlx5_devx_open_device(struct ibv_device *ibv_device)
return NULL;
}

return ctx;
}

static ucs_status_t uct_ib_mlx5_devx_ctx_test_cqec(struct ibv_context *ctx)
{
struct mlx5dv_devx_event_channel *event_channel;
struct ibv_cq *cq;

cq = ibv_create_cq(ctx, 1, NULL, NULL, 0);
if (cq == NULL) {
uct_ib_check_memlock_limit_msg(ctx, UCS_LOG_LEVEL_DEBUG,
"ibv_create_cq()");
goto close_ctx;
return UCS_ERR_UNSUPPORTED;
}

ibv_destroy_cq(cq);
Expand All @@ -1794,17 +1801,12 @@ uct_ib_mlx5_devx_open_device(struct ibv_device *ibv_device)
ctx, MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA);
if (event_channel == NULL) {
ucs_diag("mlx5dv_devx_create_event_channel(%s) failed: %m",
ibv_get_device_name(ibv_device));
goto close_ctx;
ibv_get_device_name(ctx->device));
return UCS_ERR_UNSUPPORTED;
}

mlx5dv_devx_destroy_event_channel(event_channel);

return ctx;

close_ctx:
ibv_close_device(ctx);
return NULL;
return UCS_OK;
}

static uct_ib_md_ops_t uct_ib_mlx5_devx_md_ops;
Expand Down Expand Up @@ -1851,8 +1853,21 @@ static void uct_ib_mlx5_devx_init_flush_mr(uct_ib_mlx5_md_t *md)
md->super.flush_rkey = uct_ib_mlx5_flush_rkey_make();
}

static ucs_status_t
uct_ib_mlx5_devx_query_cap_2(struct ibv_context *ctx, void *out, size_t size)
ucs_status_t uct_ib_mlx5_devx_query_cap(struct ibv_context *ctx, uint32_t opmod,
void *out, size_t size, char *msg_arg,
ucs_ternary_auto_value_t silent)
{
char in[UCT_IB_MLX5DV_ST_SZ_BYTES(query_hca_cap_in)] = {};

UCT_IB_MLX5DV_SET(query_hca_cap_in, in, opcode,
UCT_IB_MLX5_CMD_OP_QUERY_HCA_CAP);
UCT_IB_MLX5DV_SET(query_hca_cap_in, in, op_mod, opmod);
return uct_ib_mlx5_devx_general_cmd(ctx, in, ucs_static_array_size(in),
out, size, msg_arg, silent);
}

ucs_status_t uct_ib_mlx5_devx_query_cap_2(struct ibv_context *ctx,
void *out, size_t size)
{
char in[UCT_IB_MLX5DV_ST_SZ_BYTES(query_hca_cap_in)] = {};

Expand All @@ -1863,11 +1878,10 @@ uct_ib_mlx5_devx_query_cap_2(struct ibv_context *ctx, void *out, size_t size)
UCT_IB_MLX5_HCA_CAP_OPMOD_GET_CUR |
(UCT_IB_MLX5_CAP_2_GENERAL << 1));
return uct_ib_mlx5_devx_general_cmd(ctx, in, sizeof(in), out, size,
"QUERY_HCA_CAP, CAP2", 1);
"QUERY_HCA_CAP, CAP2", UCS_YES);
}

static void uct_ib_mlx5_devx_check_xgvmi(uct_ib_mlx5_md_t *md, void *cap_2,
uct_ib_device_t *dev)
int uct_ib_mlx5_devx_check_xgvmi(void *cap_2, const char *dev_name)
{
uint64_t object_for_other_vhca;
uint32_t object_to_object;
Expand All @@ -1881,13 +1895,11 @@ static void uct_ib_mlx5_devx_check_xgvmi(uct_ib_mlx5_md_t *md, void *cap_2,
UCT_IB_MLX5_HCA_CAPS_2_CROSS_VHCA_OBJ_TO_OBJ_LOCAL_MKEY_TO_REMOTE_MKEY) &&
(object_for_other_vhca &
UCT_IB_MLX5_HCA_CAPS_2_ALLOWED_OBJ_FOR_OTHER_VHCA_ACCESS_MKEY)) {
md->flags |= UCT_IB_MLX5_MD_FLAG_INDIRECT_XGVMI;
md->super.cap_flags |= UCT_MD_FLAG_EXPORTED_MKEY;
ucs_debug("%s: cross gvmi alias mkey is supported",
uct_ib_device_name(dev));
ucs_debug("%s: cross gvmi alias mkey is supported", dev_name);
return 1;
} else {
ucs_debug("%s: crossing_vhca_mkey is not supported",
uct_ib_device_name(dev));
ucs_debug("%s: crossing_vhca_mkey is not supported", dev_name);
return 0;
}
}

Expand Down Expand Up @@ -2157,22 +2169,18 @@ ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device,
const uct_ib_md_config_t *md_config,
uct_ib_md_t **p_md)
{
size_t out_len = UCT_IB_MLX5DV_ST_SZ_BYTES(query_hca_cap_out);
size_t in_len = UCT_IB_MLX5DV_ST_SZ_BYTES(query_hca_cap_in);
size_t total_len = (2 * out_len) + in_len;
char *buf, *out, *in, *cap_2_out;
uint8_t lag_state = 0;
size_t out_len = UCT_IB_MLX5DV_ST_SZ_BYTES(query_hca_cap_out);
size_t total_len = 2 * out_len;
char *buf, *out, *cap_2_out;
void *cap, *cap_2;
ucs_status_t status;
void *cap_2;
uint8_t lag_state = 0;
uint8_t log_max_qp;
uint16_t vhca_id;
struct ibv_context *ctx;
uct_ib_device_t *dev;
uct_ib_mlx5_md_t *md;
unsigned max_rd_atomic_dc;
void *cap;
int ret;
ucs_log_level_t log_level;
ucs_mpool_params_t mp_params;
int ksm_atomic;

Expand All @@ -2184,8 +2192,7 @@ ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device,
}

out = buf;
in = UCS_PTR_BYTE_OFFSET(out, out_len);
cap_2_out = UCS_PTR_BYTE_OFFSET(in, in_len);
cap_2_out = UCS_PTR_BYTE_OFFSET(out, out_len);

if (!mlx5dv_is_supported(ibv_device)) {
status = UCS_ERR_UNSUPPORTED;
Expand All @@ -2203,6 +2210,11 @@ ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device,
goto err_free_buffer;
}

status = uct_ib_mlx5_devx_ctx_test_cqec(ctx);
if (status != UCS_OK) {
goto err;
}

md = ucs_derived_of(uct_ib_md_alloc(sizeof(*md), "ib_mlx5_devx_md", ctx),
uct_ib_mlx5_md_t);
if (md == NULL) {
Expand All @@ -2224,24 +2236,12 @@ ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device,
goto err_lru_cleanup;
}

cap = UCT_IB_MLX5DV_ADDR_OF(query_hca_cap_out, out, capability);
UCT_IB_MLX5DV_SET(query_hca_cap_in, in, opcode, UCT_IB_MLX5_CMD_OP_QUERY_HCA_CAP);
UCT_IB_MLX5DV_SET(query_hca_cap_in, in, op_mod, UCT_IB_MLX5_HCA_CAP_OPMOD_GET_CUR |
(UCT_IB_MLX5_CAP_GENERAL << 1));
ret = mlx5dv_devx_general_cmd(ctx, in, in_len, out, out_len);
if (ret != 0) {
if ((errno == EPERM) || (errno == EPROTONOSUPPORT) ||
(errno == EOPNOTSUPP)) {
status = UCS_ERR_UNSUPPORTED;
log_level = UCS_LOG_LEVEL_DEBUG;
} else {
status = UCS_ERR_IO_ERROR;
log_level = UCS_LOG_LEVEL_ERROR;
}
ucs_log(log_level,
"mlx5dv_devx_general_cmd(QUERY_HCA_CAP) failed,"
" syndrome 0x%x: %m",
UCT_IB_MLX5DV_GET(query_hca_cap_out, out, syndrome));
cap = UCT_IB_MLX5DV_ADDR_OF(query_hca_cap_out, out, capability);
status = uct_ib_mlx5_devx_query_cap(ctx,
UCT_IB_MLX5_HCA_CAP_OPMOD_GET_CUR |
(UCT_IB_MLX5_CAP_GENERAL << 1),
out, out_len, "QUERY_HCA_CAP", UCS_AUTO);
if (status != UCS_OK) {
goto err_lru_cleanup;
}

Expand Down Expand Up @@ -2350,8 +2350,11 @@ ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device,
status = uct_ib_mlx5_devx_query_cap_2(ctx, cap_2_out, out_len);
if (status == UCS_OK) {
cap_2 = UCT_IB_MLX5DV_ADDR_OF(query_hca_cap_out, cap_2_out, capability);
if (uct_ib_mlx5_devx_check_xgvmi(cap_2, uct_ib_device_name(dev))) {
md->flags |= UCT_IB_MLX5_MD_FLAG_INDIRECT_XGVMI;
md->super.cap_flags |= UCT_MD_FLAG_EXPORTED_MKEY;
}

uct_ib_mlx5_devx_check_xgvmi(md, cap_2, dev);
uct_ib_mlx5_devx_check_mkey_by_name(md, cap_2, dev);
} else {
cap_2 = NULL;
Expand All @@ -2367,10 +2370,11 @@ ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device,
uint8_t arg_size;
int cap_ops, mode8b;

UCT_IB_MLX5DV_SET(query_hca_cap_in, in, op_mod, UCT_IB_MLX5_HCA_CAP_OPMOD_GET_CUR |
(UCT_IB_MLX5_CAP_ATOMIC << 1));
status = uct_ib_mlx5_devx_general_cmd(ctx, in, in_len, out, out_len,
"QUERY_HCA_CAP, ATOMIC", 0);
status = uct_ib_mlx5_devx_query_cap(ctx,
UCT_IB_MLX5_HCA_CAP_OPMOD_GET_CUR |
(UCT_IB_MLX5_CAP_ATOMIC << 1),
out, out_len,
"QUERY_HCA_CAP, ATOMIC", UCS_NO);
if (status != UCS_OK) {
goto err_lru_cleanup;
}
Expand Down Expand Up @@ -2661,7 +2665,8 @@ uct_ib_mlx5_devx_allow_xgvmi_access(uct_ib_mlx5_md_t *md,

return uct_ib_mlx5_devx_general_cmd(md->super.dev.ibv_context, in,
sizeof(in), out, sizeof(out),
"ALLOW_OTHER_VHCA_ACCESS", silent);
"ALLOW_OTHER_VHCA_ACCESS",
silent ? UCS_YES : UCS_NO);
}

static ucs_status_t uct_ib_mlx5_devx_xgvmi_umem_mr(uct_ib_mlx5_md_t *md,
Expand Down
Loading

0 comments on commit cc9a420

Please sign in to comment.