@@ -559,15 +559,13 @@ struct simd16uint16 {
559
559
}
560
560
561
561
// Checks whether the other holds exactly the same bytes.
562
- bool is_same_as (simd16uint16 other) const {
563
- const bool equal0 =
564
- (vminvq_u16 (vceqq_u16 (data.val [0 ], other.data .val [0 ])) ==
565
- 0xffff );
566
- const bool equal1 =
567
- (vminvq_u16 (vceqq_u16 (data.val [1 ], other.data .val [1 ])) ==
568
- 0xffff );
569
-
570
- return equal0 && equal1;
562
+ template <typename T>
563
+ bool is_same_as (T other) const {
564
+ const auto o = detail::simdlib::reinterpret_u16 (other.data );
565
+ const auto equals = detail::simdlib::binary_func (data, o)
566
+ .template call <&vceqq_u16>();
567
+ const auto equal = vandq_u16 (equals.val [0 ], equals.val [1 ]);
568
+ return vminvq_u16 (equal) == 0xffffu ;
571
569
}
572
570
573
571
simd16uint16 operator ~() const {
@@ -689,13 +687,12 @@ inline void cmplt_min_max_fast(
689
687
simd16uint16& minIndices,
690
688
simd16uint16& maxValues,
691
689
simd16uint16& maxIndices) {
692
- const uint16x8x2_t comparison = uint16x8x2_t {
693
- vcltq_u16 (candidateValues.data .val [0 ], currentValues.data .val [0 ]),
694
- vcltq_u16 (candidateValues.data .val [1 ], currentValues.data .val [1 ])};
690
+ const uint16x8x2_t comparison =
691
+ detail::simdlib::binary_func (
692
+ candidateValues.data , currentValues.data )
693
+ .call <&vcltq_u16>();
695
694
696
- minValues.data = uint16x8x2_t {
697
- vminq_u16 (candidateValues.data .val [0 ], currentValues.data .val [0 ]),
698
- vminq_u16 (candidateValues.data .val [1 ], currentValues.data .val [1 ])};
695
+ minValues = min (candidateValues, currentValues);
699
696
minIndices.data = uint16x8x2_t {
700
697
vbslq_u16 (
701
698
comparison.val [0 ],
@@ -706,9 +703,7 @@ inline void cmplt_min_max_fast(
706
703
candidateIndices.data .val [1 ],
707
704
currentIndices.data .val [1 ])};
708
705
709
- maxValues.data = uint16x8x2_t {
710
- vmaxq_u16 (candidateValues.data .val [0 ], currentValues.data .val [0 ]),
711
- vmaxq_u16 (candidateValues.data .val [1 ], currentValues.data .val [1 ])};
706
+ maxValues = max (candidateValues, currentValues);
712
707
maxIndices.data = uint16x8x2_t {
713
708
vbslq_u16 (
714
709
comparison.val [0 ],
@@ -869,13 +864,13 @@ struct simd32uint8 {
869
864
}
870
865
871
866
// Checks whether the other holds exactly the same bytes.
872
- bool is_same_as (simd32uint8 other) const {
873
- const bool equal0 =
874
- ( vminvq_u8 ( vceqq_u8 (data. val [ 0 ], other.data . val [ 0 ])) == 0xff );
875
- const bool equal1 =
876
- ( vminvq_u8 ( vceqq_u8 (data. val [ 1 ], other. data . val [ 1 ])) == 0xff );
877
-
878
- return equal0 && equal1 ;
867
+ template < typename T>
868
+ bool is_same_as (T other) const {
869
+ const auto o = detail::simdlib::reinterpret_u8 ( other.data );
870
+ const auto equals = detail::simdlib::binary_func (data, o)
871
+ . template call <&vceqq_u8>( );
872
+ const auto equal = vandq_u8 (equals. val [ 0 ], equals. val [ 1 ]);
873
+ return vminvq_u8 (equal) == 0xffu ;
879
874
}
880
875
};
881
876
@@ -960,27 +955,28 @@ struct simd8uint32 {
960
955
return *this ;
961
956
}
962
957
963
- bool operator ==(simd8uint32 other) const {
964
- const auto equals = detail::simdlib::binary_func (data, other.data )
965
- .call <&vceqq_u32>();
966
- const auto equal = vandq_u32 (equals.val [0 ], equals.val [1 ]);
967
- return vminvq_u32 (equal) == 0xffffffff ;
958
+ simd8uint32 operator ==(simd8uint32 other) const {
959
+ return simd8uint32{detail::simdlib::binary_func (data, other.data )
960
+ .call <&vceqq_u32>()};
968
961
}
969
962
970
- bool operator !=(simd8uint32 other) const {
971
- return !(*this == other);
963
+ simd8uint32 operator ~() const {
964
+ return simd8uint32{
965
+ detail::simdlib::unary_func (data).call <&vmvnq_u32>()};
972
966
}
973
967
974
- // Checks whether the other holds exactly the same bytes.
975
- bool is_same_as (simd8uint32 other) const {
976
- const bool equal0 =
977
- (vminvq_u32 (vceqq_u32 (data.val [0 ], other.data .val [0 ])) ==
978
- 0xffffffff );
979
- const bool equal1 =
980
- (vminvq_u32 (vceqq_u32 (data.val [1 ], other.data .val [1 ])) ==
981
- 0xffffffff );
968
+ simd8uint32 operator !=(simd8uint32 other) const {
969
+ return ~(*this == other);
970
+ }
982
971
983
- return equal0 && equal1;
972
+ // Checks whether the other holds exactly the same bytes.
973
+ template <typename T>
974
+ bool is_same_as (T other) const {
975
+ const auto o = detail::simdlib::reinterpret_u32 (other.data );
976
+ const auto equals = detail::simdlib::binary_func (data, o)
977
+ .template call <&vceqq_u32>();
978
+ const auto equal = vandq_u32 (equals.val [0 ], equals.val [1 ]);
979
+ return vminvq_u32 (equal) == 0xffffffffu ;
984
980
}
985
981
986
982
void clear () {
@@ -1053,13 +1049,14 @@ inline void cmplt_min_max_fast(
1053
1049
simd8uint32& minIndices,
1054
1050
simd8uint32& maxValues,
1055
1051
simd8uint32& maxIndices) {
1056
- const uint32x4x2_t comparison = uint32x4x2_t {
1057
- vcltq_u32 (candidateValues.data .val [0 ], currentValues.data .val [0 ]),
1058
- vcltq_u32 (candidateValues.data .val [1 ], currentValues.data .val [1 ])};
1059
-
1060
- minValues.data = uint32x4x2_t {
1061
- vminq_u32 (candidateValues.data .val [0 ], currentValues.data .val [0 ]),
1062
- vminq_u32 (candidateValues.data .val [1 ], currentValues.data .val [1 ])};
1052
+ const uint32x4x2_t comparison =
1053
+ detail::simdlib::binary_func (
1054
+ candidateValues.data , currentValues.data )
1055
+ .call <&vcltq_u32>();
1056
+
1057
+ minValues.data = detail::simdlib::binary_func (
1058
+ candidateValues.data , currentValues.data )
1059
+ .call <&vminq_u32>();
1063
1060
minIndices.data = uint32x4x2_t {
1064
1061
vbslq_u32 (
1065
1062
comparison.val [0 ],
@@ -1070,9 +1067,9 @@ inline void cmplt_min_max_fast(
1070
1067
candidateIndices.data .val [1 ],
1071
1068
currentIndices.data .val [1 ])};
1072
1069
1073
- maxValues.data = uint32x4x2_t {
1074
- vmaxq_u32 ( candidateValues.data . val [ 0 ] , currentValues.data . val [ 0 ]),
1075
- vmaxq_u32 (candidateValues. data . val [ 1 ], currentValues. data . val [ 1 ])} ;
1070
+ maxValues.data = detail::simdlib::binary_func (
1071
+ candidateValues.data , currentValues.data )
1072
+ . call <&vmaxq_u32>() ;
1076
1073
maxIndices.data = uint32x4x2_t {
1077
1074
vbslq_u32 (
1078
1075
comparison.val [0 ],
@@ -1167,28 +1164,25 @@ struct simd8float32 {
1167
1164
return *this ;
1168
1165
}
1169
1166
1170
- bool operator ==(simd8float32 other) const {
1171
- const auto equals =
1167
+ simd8uint32 operator ==(simd8float32 other) const {
1168
+ return simd8uint32{
1172
1169
detail::simdlib::binary_func<::uint32x4x2_t >(data, other.data )
1173
- .call <&vceqq_f32>();
1174
- const auto equal = vandq_u32 (equals.val [0 ], equals.val [1 ]);
1175
- return vminvq_u32 (equal) == 0xffffffff ;
1170
+ .call <&vceqq_f32>()};
1176
1171
}
1177
1172
1178
- bool operator !=(simd8float32 other) const {
1179
- return ! (*this == other);
1173
+ simd8uint32 operator !=(simd8float32 other) const {
1174
+ return ~ (*this == other);
1180
1175
}
1181
1176
1182
1177
// Checks whether the other holds exactly the same bytes.
1183
- bool is_same_as (simd8float32 other) const {
1184
- const bool equal0 =
1185
- (vminvq_u32 (vceqq_f32 (data.val [0 ], other.data .val [0 ])) ==
1186
- 0xffffffff );
1187
- const bool equal1 =
1188
- (vminvq_u32 (vceqq_f32 (data.val [1 ], other.data .val [1 ])) ==
1189
- 0xffffffff );
1190
-
1191
- return equal0 && equal1;
1178
+ template <typename T>
1179
+ bool is_same_as (T other) const {
1180
+ const auto o = detail::simdlib::reinterpret_f32 (other.data );
1181
+ const auto equals =
1182
+ detail::simdlib::binary_func<::uint32x4x2_t >(data, o)
1183
+ .template call <&vceqq_f32>();
1184
+ const auto equal = vandq_u32 (equals.val [0 ], equals.val [1 ]);
1185
+ return vminvq_u32 (equal) == 0xffffffffu ;
1192
1186
}
1193
1187
1194
1188
std::string tostring () const {
@@ -1302,13 +1296,14 @@ inline void cmplt_min_max_fast(
1302
1296
simd8uint32& minIndices,
1303
1297
simd8float32& maxValues,
1304
1298
simd8uint32& maxIndices) {
1305
- const uint32x4x2_t comparison = uint32x4x2_t {
1306
- vcltq_f32 (candidateValues.data .val [0 ], currentValues.data .val [0 ]),
1307
- vcltq_f32 (candidateValues.data .val [1 ], currentValues.data .val [1 ])};
1308
-
1309
- minValues.data = float32x4x2_t {
1310
- vminq_f32 (candidateValues.data .val [0 ], currentValues.data .val [0 ]),
1311
- vminq_f32 (candidateValues.data .val [1 ], currentValues.data .val [1 ])};
1299
+ const uint32x4x2_t comparison =
1300
+ detail::simdlib::binary_func<::uint32x4x2_t >(
1301
+ candidateValues.data , currentValues.data )
1302
+ .call <&vcltq_f32>();
1303
+
1304
+ minValues.data = detail::simdlib::binary_func (
1305
+ candidateValues.data , currentValues.data )
1306
+ .call <&vminq_f32>();
1312
1307
minIndices.data = uint32x4x2_t {
1313
1308
vbslq_u32 (
1314
1309
comparison.val [0 ],
@@ -1319,9 +1314,9 @@ inline void cmplt_min_max_fast(
1319
1314
candidateIndices.data .val [1 ],
1320
1315
currentIndices.data .val [1 ])};
1321
1316
1322
- maxValues.data = float32x4x2_t {
1323
- vmaxq_f32 ( candidateValues.data . val [ 0 ] , currentValues.data . val [ 0 ]),
1324
- vmaxq_f32 (candidateValues. data . val [ 1 ], currentValues. data . val [ 1 ])} ;
1317
+ maxValues.data = detail::simdlib::binary_func (
1318
+ candidateValues.data , currentValues.data )
1319
+ . call <&vmaxq_f32>() ;
1325
1320
maxIndices.data = uint32x4x2_t {
1326
1321
vbslq_u32 (
1327
1322
comparison.val [0 ],
0 commit comments