@@ -67,7 +67,7 @@ void varMangle(g1p_t *target, size_t size, unsigned step);
67
67
* @brief Executes a many-row tests on FK20. Behavior is similar to fk20test.cu
68
68
* but using many GPU blocks, each one executing one known-answer test. All tests
69
69
* are different. KATS are statically linked in the binary.
70
- *
70
+ *
71
71
* @param argc Command line argument cont
72
72
* @param argv Command line argument values
73
73
* @return int 0
@@ -113,21 +113,21 @@ int main(int argc, char **argv) {
113
113
114
114
/* *
115
115
* NOTE ON DEPRECATED FUNCTIONS
116
- *
116
+ *
117
117
* In the main call, some tests are commented out, namely:
118
118
* -hext_fft2h_fft_512
119
119
* -fk20_poly2toeplitz_coefficients_fft_test
120
120
* Those tests are regarding fk20 functions that execute more than one step in
121
121
* a single kernel. They cover a unimplemented (possible) future optimization.
122
- *
122
+ *
123
123
*/
124
124
/* *****************************************************************************/
125
125
126
126
/* *
127
- * @brief Executes many FK20 computations on a single row, with a check on
127
+ * @brief Executes many FK20 computations on a single row, with a check on
128
128
* each step. A computation failure will not cause a cascade effect, eliminating
129
129
* false-fails due to data dependencies.
130
- *
130
+ *
131
131
* @param rows number of blocks in the range [1,512]
132
132
*/
133
133
void fullTest_512 (unsigned rows){
@@ -137,7 +137,7 @@ void fullTest_512(unsigned rows){
137
137
138
138
// Setup
139
139
140
- // SET_SHAREDMEM(fr_sharedmem, fr_fft_wrapper);
140
+ SET_SHAREDMEM (fr_sharedmem, fr_fft_wrapper);
141
141
SET_SHAREDMEM (g1p_sharedmem, g1p_fft_wrapper);
142
142
SET_SHAREDMEM (g1p_sharedmem, g1p_ift_wrapper);
143
143
@@ -227,9 +227,9 @@ void fullTest_512(unsigned rows){
227
227
/* *
228
228
* @brief Similar to fullTest, but polynomial is has changes done to it. The
229
229
* function checks for false-positive in the tests.
230
- *
230
+ *
231
231
* polynomial is restored after execution.
232
- *
232
+ *
233
233
* @param rows number of blocks in the range [1,512]
234
234
*/
235
235
void fullTestFalseability_512 (unsigned rows){
@@ -239,7 +239,7 @@ void fullTestFalseability_512(unsigned rows){
239
239
240
240
// Setup
241
241
242
- // SET_SHAREDMEM(fr_sharedmem, fr_fft_wrapper);
242
+ SET_SHAREDMEM (fr_sharedmem, fr_fft_wrapper);
243
243
SET_SHAREDMEM (g1p_sharedmem, g1p_fft_wrapper);
244
244
SET_SHAREDMEM (g1p_sharedmem, g1p_ift_wrapper);
245
245
@@ -332,7 +332,7 @@ The testing functions follow an common template, described in ./doc/fk20test.md
332
332
333
333
/* *
334
334
* @brief Test for fr_fft: toeplitz_coefficients -> toeplitz_coefficients_fft
335
- *
335
+ *
336
336
* @param rows number of blocks in the range [1,512]
337
337
*/
338
338
void toeplitz_coefficients2toeplitz_coefficients_fft_512 (unsigned rows){
@@ -370,7 +370,7 @@ void toeplitz_coefficients2toeplitz_coefficients_fft_512(unsigned rows){
370
370
371
371
/* *
372
372
* @brief Test for g1p_fft: h -> h_fft"
373
- *
373
+ *
374
374
* @param rows number of blocks in the range [1,512]
375
375
*/
376
376
void h2h_fft_512 (unsigned rows){
@@ -410,7 +410,7 @@ void h2h_fft_512(unsigned rows){
410
410
411
411
/* *
412
412
* @brief Test for g1p_ift: h_fft -> h
413
- *
413
+ *
414
414
* @param rows number of blocks in the range [1,512]
415
415
*/
416
416
void h_fft2h_512 (unsigned rows){
@@ -451,7 +451,7 @@ void h_fft2h_512(unsigned rows){
451
451
452
452
/* *
453
453
* @brief Test for g1p_ift: hext_fft -> h
454
- *
454
+ *
455
455
* @param rows number of blocks in the range [1,512]
456
456
*/
457
457
void hext_fft2h_512 (unsigned rows){
@@ -467,9 +467,9 @@ void hext_fft2h_512(unsigned rows){
467
467
468
468
CLOCKSTART;
469
469
g1p_ift_wrapper<<<rows, 256 , g1p_sharedmem>>> (g1p_tmp, hext_fft);
470
- CUDASYNC (" g1p_ift_wrapper" );
470
+ CUDASYNC (" g1p_ift_wrapper" );
471
471
fk20_hext2h<<<rows, 256 >>> (g1p_tmp);
472
- CUDASYNC (" fk20_hext2h" );
472
+ CUDASYNC (" fk20_hext2h" );
473
473
CLOCKEND;
474
474
475
475
clearRes;
@@ -491,7 +491,7 @@ void hext_fft2h_512(unsigned rows){
491
491
492
492
/* *
493
493
* @brief Test for fk20_poly2toeplitz_coefficients: polynomial -> toeplitz_coefficients
494
- *
494
+ *
495
495
* @param rows number of blocks in the range [1,512]
496
496
*/
497
497
void fk20_poly2toeplitz_coefficients_512 (unsigned rows) {
@@ -529,7 +529,7 @@ void fk20_poly2toeplitz_coefficients_512(unsigned rows) {
529
529
530
530
/* *
531
531
* @brief Test for fk20_poly2hext_fft: polynomial -> hext_fft
532
- *
532
+ *
533
533
* @param rows number of blocks in the range [1,512]
534
534
*/
535
535
void fk20_poly2hext_fft_512 (unsigned rows){
@@ -540,7 +540,7 @@ void fk20_poly2hext_fft_512(unsigned rows){
540
540
541
541
pass = true ;
542
542
543
- // SET_SHAREDMEM(g1p_sharedmem, fk20_poly2hext_fft);
543
+ SET_SHAREDMEM (g1p_sharedmem, fk20_poly2hext_fft);
544
544
545
545
printf (" === RUN %s\n " , " fk20_poly2hext_fft: polynomial -> hext_fft" );
546
546
for (int testIDX=0 ; testIDX<=1 ; testIDX++){
@@ -569,7 +569,7 @@ void fk20_poly2hext_fft_512(unsigned rows){
569
569
570
570
/* *
571
571
* @brief Test for fk20_poly2h_fft: polynomial -> h_fft
572
- *
572
+ *
573
573
* @param rows number of blocks in the range [1,512]
574
574
*/
575
575
void fk20_poly2h_fft_512 (unsigned rows){
@@ -606,7 +606,7 @@ void fk20_poly2h_fft_512(unsigned rows){
606
606
607
607
/* *
608
608
* @brief Test for hext_fft2h_fft_512: hext_fft -> h_fft
609
- *
609
+ *
610
610
* @param rows number of blocks in the range [1,512]
611
611
*/
612
612
void hext_fft2h_fft_512 (unsigned rows){
@@ -644,7 +644,7 @@ void hext_fft2h_fft_512(unsigned rows){
644
644
645
645
/* *
646
646
* @brief Test for fk20_msm: Toeplitz_coefficients+xext_fft -> hext_fft
647
- *
647
+ *
648
648
* @param rows number of blocks in the range [1,512]
649
649
*/
650
650
void fk20_msmloop_512 (unsigned rows){
@@ -693,7 +693,7 @@ void fk20_msmloop_512(unsigned rows){
693
693
CLOCKSTART;
694
694
fk20_poly2toeplitz_coefficients_fft<<<rows, 256>>>(fr_tmp_, polynomial);
695
695
err = cudaDeviceSynchronize();
696
- CUDASYNC("fk20_poly2toeplitz_coefficients_fft");
696
+ CUDASYNC("fk20_poly2toeplitz_coefficients_fft");
697
697
CLOCKEND;
698
698
clearRes;
699
699
fr_eq_wrapper<<<16, 256>>>(cmp, rows*16*512, fr_tmp_, (fr_t *)toeplitz_coefficients_fft);
@@ -722,7 +722,7 @@ void fk20_msmloop_512(unsigned rows){
722
722
/* *
723
723
* @brief swap elements at positions multiple of step. Nondestructive, call
724
724
* a second time to undo the changes
725
- *
725
+ *
726
726
* @param[out] target Pointer to array
727
727
* @param[in] size length of the array
728
728
* @param[in] step distance between elements swapped.
@@ -744,7 +744,7 @@ void varMangle(fr_t *target, size_t size, unsigned step){
744
744
/* *
745
745
* @brief swap elements at positions multiple of step. Nondestructive, call
746
746
* a second time to undo the changes
747
- *
747
+ *
748
748
* @param[out] target Pointer to array
749
749
* @param[in] size length of the array
750
750
* @param[in] step distance between elements swapped.
0 commit comments