You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
The following line in the llvm vectorizer makes it print that vectorization is not beneficial. Also, the debug mode prints that it is not beneficial.
However, sometimes this message is not correct. So, the cost model predicts it is not beneficial, but when you actually run the program with vectorization it turns out to be beneficial. For example, when we run the following program (this program is based on example4b in https://github.com/llvm/llvm-test-suite/blob/master/SingleSource/UnitTests/Vectorizer/gcc-loops.cpp):
// example.c
#include "header.h"
int result[64] ALIGNED16;
int in1[64] ALIGNED16;
int in2[64] ALIGNED16;
__attribute__((noinline))
void example4b () {
int i;
/* feature: support for read accesses with a compile time known misalignment */
#pragma clang loop vectorize(enable) interleave(enable)
for (i=0; i<64-4; i+=2){
result[i] = in1[i+1] +in2[i+3];
result[i+1]=in1[i] +in2[i+2];
}
}
int main(int argc,char* argv[]){
init_memory(&result[0], &result[64]);
init_memory(&in1[0], &in1[64]);
init_memory(&in2[0], &in2[64]);
BENCH("Example4b", example4b(), Mi*2/64*512, digest_memory(&result[0], &result[64]));
return 0;
}
// header.h
#include <sys/time.h>
#include <stdio.h>
#include <stdbool.h>
#define N 512
#define M 32
#define K 4
#define L 32
#define ALIGNED16 __attribute__((aligned(16)))
typedef int aint __attribute__ ((__aligned__(16)));
// Warmup and then measure.
#define BENCH(NAME, RUN_LINE, ITER, DIGEST_LINE) {\
struct timeval Start, End;\
RUN_LINE;\
gettimeofday(&Start, 0);for (int i=0; i < (ITER); ++i) RUN_LINE;\
gettimeofday(&End, 0);\
unsigned r = DIGEST_LINE;\
long mtime, s,us;\
s = End.tv_sec - Start.tv_sec;\
us = End.tv_usec - Start.tv_usec;\
mtime = (s*1000 + us/1000.0)+0.5;\
if (print_times)\
printf("%ld",mtime);\
}
#define Mi 1048576 //1<<20
#define print_times true//argc > 1;
void init_memory(void *start, void* end);
void init_memory_float(float *start, float* end);
unsigned digest_memory(void *start, void* end);
void atimer(const char* title, bool print);
// header.c
#include "header.h"
void init_memory(void *start, void* end) {
unsigned char state = 1;
while (start != end) {
state *= 7; state ^= 0x27; state += 1;
*((unsigned char*)start) = state;
start = ((char*)start) + 1;
}
}
void init_memory_float(float *start, float* end) {
float state = 1.0;
while (start != end) {
state *= 1.1;
*start = state;
start++;
}
}
unsigned digest_memory(void *start, void* end) {
unsigned state = 1;
while (start != end) {
//printf("%u\n",state);
state *= 3;
state ^= *((unsigned char*)start);
state = (state >> 8 ^ state << 8);
start = ((char*)start) + 1;
}
return state;
}
void atimer(const char* title, bool print) {
struct timeval Start, End;
gettimeofday(&Start, 0);
gettimeofday(&End, 0);
long mtime, s,us;
s = End.tv_sec - Start.tv_sec;
us = End.tv_usec - Start.tv_usec;
mtime = (s*1000 + us/1000.0)+0.5;
if (print)
//printf("%s, %ld msec \n",title,mtime);
printf("%ld",mtime);
}
// clang -O3 header.c example.c
you get better execution time than without the pragma, yet it prints that it is not beneficial when you have the pragma. Also the behavior is different between when the pragma to enable vectorization is present versus when it is not. From here, it makes the behavior of the vectorizer somewhat ambiguous. I think that it would be nice to print a clear error message that explains this behavior and perhaps have this behavior documented somewhere. It would also be great to improve the cost model so that when it predicts that it is not beneficial, it would turn out to be not beneficial when the code is executed.
Thanks!
The text was updated successfully, but these errors were encountered:
The following line in the llvm vectorizer makes it print that vectorization is not beneficial. Also, the debug mode prints that it is not beneficial.
However, sometimes this message is not correct. So, the cost model predicts it is not beneficial, but when you actually run the program with vectorization it turns out to be beneficial. For example, when we run the following program (this program is based on example4b in
https://github.com/llvm/llvm-test-suite/blob/master/SingleSource/UnitTests/Vectorizer/gcc-loops.cpp
):// example.c
// header.h
// header.c
// clang -O3 header.c example.c
you get better execution time than without the pragma, yet it prints that it is not beneficial when you have the pragma. Also the behavior is different between when the pragma to enable vectorization is present versus when it is not. From here, it makes the behavior of the vectorizer somewhat ambiguous. I think that it would be nice to print a clear error message that explains this behavior and perhaps have this behavior documented somewhere. It would also be great to improve the cost model so that when it predicts that it is not beneficial, it would turn out to be not beneficial when the code is executed.
Thanks!
The text was updated successfully, but these errors were encountered: