Skip to content

Commit

Permalink
Merge pull request #420 from hpc/fix-mdtest-verify
Browse files Browse the repository at this point in the history
Bugfix: mdtest-verification, posix tolerate short read
  • Loading branch information
JulianKunkel authored Jun 5, 2022
2 parents c4465be + 5f6bf7b commit 07e2feb
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 9 deletions.
21 changes: 13 additions & 8 deletions src/aiori-POSIX.c
Original file line number Diff line number Diff line change
Expand Up @@ -720,21 +720,26 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer
#ifdef HAVE_GPU_DIRECT
}
#endif
if (rc == 0)
ERRF("read(%d, %p, %lld) returned EOF prematurely",
fd, (void*)ptr, remaining);
if (rc == -1)
ERRF("read(%d, %p, %lld) failed",
fd, (void*)ptr, remaining);
if (rc == 0){
WARNF("read(%d, %p, %lld) returned EOF prematurely", fd, (void*)ptr, remaining);
return length - remaining;
}

if (rc == -1){
WARNF("read(%d, %p, %lld) failed", fd, (void*)ptr, remaining);
return length - remaining;
}
}
if (rc < remaining) {
WARNF("task %d, partial %s, %lld of %lld bytes at offset %lld\n",
rank,
access == WRITE ? "write()" : "read()",
rc, remaining,
offset + length - remaining);
if (xferRetries > MAX_RETRY || hints->singleXferAttempt)
ERR("too many retries -- aborting");
if (xferRetries > MAX_RETRY || hints->singleXferAttempt){
WARN("too many retries -- aborting");
return length - remaining;
}
}
assert(rc >= 0);
assert(rc <= remaining);
Expand Down
3 changes: 2 additions & 1 deletion src/mdtest.c
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,7 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) {
read_buffer[0] = 42;
if (o.read_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) read_buffer, o.read_bytes, 0, o.backend_options)) {
WARNF("unable to read file %s", item);
o.verification_error += 1;
continue;
}
int pretend_rank = (2 * o.nstride + rank) % o.size;
Expand Down Expand Up @@ -2578,7 +2579,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
if(rank == 0 && total_errors){
VERBOSE(0, -1, "\nERROR: verifying the data on read (%lld errors)! Take the performance values with care!\n", total_errors);
}

aggregated_results->total_errors += total_errors;
MPI_Comm_free(&testComm);
}

Expand Down
1 change: 1 addition & 0 deletions src/mdtest.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ typedef struct
double time[MDTEST_LAST_NUM]; /* Time */
double time_before_barrier[MDTEST_TREE_CREATE_NUM]; /* individual time before executing the barrier */
uint64_t items[MDTEST_LAST_NUM]; /* Number of operations done in this process*/
uint64_t total_errors;

/* Statistics when hitting the stonewall */
double stonewall_time[MDTEST_LAST_NUM]; /* Max runtime of any process until completion / hit of the stonewall */
Expand Down

0 comments on commit 07e2feb

Please sign in to comment.