Skip to content

Commit

Permalink
Adding e2e tests for i1 mask attentions
Browse files Browse the repository at this point in the history
To test actual i1 handling with attention op.

Signed-off-by: Alan Li <me@alanli.org>
  • Loading branch information
lialan committed Dec 5, 2024
1 parent df34911 commit f2587f5
Show file tree
Hide file tree
Showing 5 changed files with 277 additions and 0 deletions.
26 changes: 26 additions & 0 deletions tests/e2e/linalg_ext_ops/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ ALL_SRCS = enforce_glob(
"winograd_output.mlir",
],
include = ["*.mlir"],
exclude = [
"attention_i1_mask.mlir",
],
)

iree_check_single_backend_test_suite(
Expand All @@ -39,6 +42,24 @@ iree_check_single_backend_test_suite(
target_backend = "llvm-cpu",
)

iree_check_single_backend_test_suite(
name = "check_llvm-cpu_local-task_i1",
srcs = [
"attention_i1_mask.mlir",
],
compiler_flags = [
"--iree-llvmcpu-target-cpu=generic",
"--iree-experimental-packed-i1-storage",
],
driver = "local-task",
tags = [
# attention fails with a wasm target, just disable the tests there for now
# error: Yield operand #2 is not equivalent to the corresponding iter bbArg
"nowasm",
],
target_backend = "llvm-cpu",
)

VMVX_SRCS = enforce_glob(
# keep sorted
[
Expand All @@ -52,6 +73,7 @@ VMVX_SRCS = enforce_glob(
include = ["*.mlir"],
exclude = [
"attention.mlir",
"attention_i1_mask.mlir",
],
)

Expand All @@ -75,6 +97,7 @@ LLVM_GPU_SRCS = enforce_glob(
include = ["*.mlir"],
exclude = [
"attention.mlir",
"attention_i1_mask.mlir",
],
)

Expand Down Expand Up @@ -107,6 +130,7 @@ ROCM_HIP_SRCS = enforce_glob(
exclude = [
"top-k.mlir",
"attention.mlir",
"attention_i1_mask.mlir",
],
)

Expand All @@ -131,6 +155,7 @@ iree_check_single_backend_test_suite(
include = ["*.mlir"],
exclude = [
"attention.mlir",
"attention_i1_mask.mlir",
"top-k.mlir",
],
),
Expand All @@ -152,6 +177,7 @@ iree_check_single_backend_test_suite(
include = ["*.mlir"],
exclude = [
"attention.mlir",
"attention_i1_mask.mlir",
"top-k.mlir",
],
),
Expand Down
16 changes: 16 additions & 0 deletions tests/e2e/linalg_ext_ops/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,22 @@ iree_check_single_backend_test_suite(
"nowasm"
)

iree_check_single_backend_test_suite(
NAME
check_llvm-cpu_local-task
SRCS
"attention_i1_mask.mlir"
TARGET_BACKEND
"llvm-cpu"
DRIVER
"local-task"
COMPILER_FLAGS
"--iree-llvmcpu-target-cpu=generic"
"--iree-experimental-packed-i1-storage"
LABELS
"nowasm"
)

iree_check_single_backend_test_suite(
NAME
check_vmvx_local-task
Expand Down
42 changes: 42 additions & 0 deletions tests/e2e/linalg_ext_ops/attention.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,48 @@ func.func @causal_attention1x3x4() {
return
}

func.func @attention1x4x4_i1_mask_all_ones() {
%init = tensor.empty() : tensor<1x4x4xf32>
%query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
%value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%mask = util.unfoldable_constant dense<[[[true, true, true, true],
[true, true, true, true],
[true, true, true, true],
[true, true, true, true]]]> : tensor<1x4x4xi1>

%scale = arith.constant 0.5 : f32
%1 = iree_linalg_ext.attention {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
affine_map<(d0, d1, d2, d3, d4) -> ()>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
^bb0(%arg0: f32):
iree_linalg_ext.yield %arg0 : f32
} -> tensor<1x4x4xf32>
check.expect_almost_eq_const(
%1,
dense<[[[0.798884, 0.898884, 0.998884, 1.09888],
[0.941939, 1.04194, 1.14194, 1.24194],
[1.05371, 1.15371, 1.25371, 1.35371],
[1.13295, 1.23295, 1.33295, 1.43295]]]> : tensor<1x4x4xf32>
) : tensor<1x4x4xf32>
return
}

func.func @softcap_attention1x3x4() {
%init = tensor.empty() : tensor<1x3x4xf32>
Expand Down
122 changes: 122 additions & 0 deletions tests/e2e/linalg_ext_ops/attention_i1_mask.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
func.func @attention1x4x4_i1_mask() {
%init = tensor.empty() : tensor<1x4x4xf32>
%query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
%value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%i8mask = util.unfoldable_constant dense<[165, 165]> : tensor<2xi8>
%mask = flow.tensor.bitcast %i8mask : tensor<2xi8> -> tensor<1x4x4xi1>

%scale = arith.constant 0.5 : f32
%1 = iree_linalg_ext.attention {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
affine_map<(d0, d1, d2, d3, d4) -> ()>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
^bb0(%arg0: f32):
iree_linalg_ext.yield %arg0 : f32
} -> tensor<1x4x4xf32>
check.expect_almost_eq_const(
%1,
dense<[[[0.57895, 0.67895, 0.77895, 0.87895],
[1.09108, 1.19108, 1.29108, 1.39108],
[0.774324, 0.874324, 0.974324, 1.07432],
[1.22842, 1.32842, 1.42842, 1.52842]]]> : tensor<1x4x4xf32>
) : tensor<1x4x4xf32>
return
}

func.func @attention1x4x4_i1_mask_all_ones() {
%init = tensor.empty() : tensor<1x4x4xf32>
%query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
%value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%i8mask = util.unfoldable_constant dense<[255, 255]> : tensor<2xi8>
%mask = flow.tensor.bitcast %i8mask : tensor<2xi8> -> tensor<1x4x4xi1>

%scale = arith.constant 0.5 : f32
%1 = iree_linalg_ext.attention {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
affine_map<(d0, d1, d2, d3, d4) -> ()>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
^bb0(%arg0: f32):
iree_linalg_ext.yield %arg0 : f32
} -> tensor<1x4x4xf32>
check.expect_almost_eq_const(
%1,
dense<[[[0.798884, 0.898884, 0.998884, 1.09888],
[0.941939, 1.04194, 1.14194, 1.24194],
[1.05371, 1.15371, 1.25371, 1.35371],
[1.13295, 1.23295, 1.33295, 1.43295]]]> : tensor<1x4x4xf32>
) : tensor<1x4x4xf32>
return
}

func.func @attention1x4x4_i1_mask_tril() {
%init = tensor.empty() : tensor<1x4x4xf32>
%query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
%value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%i8mask = util.unfoldable_constant dense<[140, 239]> : tensor<2xi8>
%mask = flow.tensor.bitcast %i8mask : tensor<2xi8> -> tensor<1x4x4xi1>

%scale = arith.constant 0.5 : f32
%1 = iree_linalg_ext.attention {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
affine_map<(d0, d1, d2, d3, d4) -> ()>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
^bb0(%arg0: f32):
iree_linalg_ext.yield %arg0 : f32
} -> tensor<1x4x4xf32>
check.expect_almost_eq_const(
%1,
dense<[[[1.11993, 1.21993, 1.31993, 1.41993],
[1.3, 1.4, 1.5, 1.6],
[1.05371, 1.15371, 1.25371, 1.35371],
[1.15549, 1.25549, 1.35549, 1.45549]]]> : tensor<1x4x4xf32>
) : tensor<1x4x4xf32>
return
}
71 changes: 71 additions & 0 deletions tests/e2e/subbyte_types/subbyte_types.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,74 @@ func.func @i1_type_slice() {
check.expect_eq_const(%tensor_res, dense<[255]> : tensor<1xi8>) : tensor<1xi8>
return
}

func.func @i1_representation() {
%mask = util.unfoldable_constant dense<[140]> : tensor<1xi8>
%casted = flow.tensor.bitcast %mask : tensor<1xi8> -> tensor<2x4xi1>
%bar = util.optimization_barrier %casted : tensor<2x4xi1>
%tensor_res = flow.tensor.bitcast %bar : tensor<2x4xi1> -> tensor<1xi8>
check.expect_eq_const(%tensor_res, dense<[140]> : tensor<1xi8>) : tensor<1xi8>
return
}

func.func @i1_representation_2() {
%mask = util.unfoldable_constant dense<[140, 77]> : tensor<2xi8>
%casted = flow.tensor.bitcast %mask : tensor<2xi8> -> tensor<2x8xi1>
%bar = util.optimization_barrier %casted : tensor<2x8xi1>
%tensor_res = flow.tensor.bitcast %bar : tensor<2x8xi1> -> tensor<2xi8>
check.expect_eq_const(%tensor_res, dense<[140, 77]> : tensor<2xi8>) : tensor<2xi8>
return
}

func.func @i1_representation_3() {
%mask = util.unfoldable_constant dense<[140, 77]> : tensor<2xi8>
%casted = flow.tensor.bitcast %mask : tensor<2xi8> -> tensor<4x4xi1>
%bar = util.optimization_barrier %casted : tensor<4x4xi1>
%tensor_res = flow.tensor.bitcast %bar : tensor<4x4xi1> -> tensor<2xi8>
check.expect_eq_const(%tensor_res, dense<[140, 77]> : tensor<2xi8>) : tensor<2xi8>
return
}

func.func @truncate_i1() {
%mask = util.unfoldable_constant dense<[1, 1, 0, 0,
0, 0, 1, 1]> : tensor<8xi8>
%nm = tensor.empty() : tensor<8xi1>
%truncm = linalg.generic
{indexing_maps = [
affine_map<(d0) -> (d0)>,
affine_map<(d0) -> (d0)>],
iterator_types = ["parallel"]}
ins(%mask: tensor<8xi8>)
outs(%nm: tensor<8xi1>) {
^bb0(%in: i8, %out: i1):
%zero = arith.constant 0 : i8
%truncated = arith.cmpi "sgt", %in, %zero : i8
linalg.yield %truncated : i1
} -> tensor<8xi1>
%tensor_res = flow.tensor.bitcast %truncm : tensor<8xi1> -> tensor<1xi8>
check.expect_eq_const(%tensor_res, dense<[195]> : tensor<1xi8>) : tensor<1xi8>
return
}

func.func @truncate_i1_2() {
%mask = util.unfoldable_constant dense<[[0, 0, 1, 1],
[1, 1, 0, 0],
[1, 1, 0, 0],
[0, 0, 1, 1]]> : tensor<4x4xi8>
%nm = tensor.empty() : tensor<4x4xi1>
%truncm = linalg.generic
{indexing_maps = [
affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0, d1)>],
iterator_types = ["parallel", "parallel"]}
ins(%mask: tensor<4x4xi8>)
outs(%nm: tensor<4x4xi1>) {
^bb0(%in: i8, %out: i1):
%zero = arith.constant 0 : i8
%truncated = arith.cmpi "sgt", %in, %zero : i8
linalg.yield %truncated : i1
} -> tensor<4x4xi1>
%tensor_res = flow.tensor.bitcast %truncm : tensor<4x4xi1> -> tensor<2xi8>
check.expect_eq_const(%tensor_res, dense<[60, 195]> : tensor<2xi8>) : tensor<2xi8>
return
}

0 comments on commit f2587f5

Please sign in to comment.