Skip to content

Commit

Permalink
Handle cases where a segment reaches timeout without being in either …
Browse files Browse the repository at this point in the history
…RUNNING or DONE state.

So far, it would hang the repair until Reaper was restarted.
Now, segments will be aborted and rescheduled.
  • Loading branch information
adejanovski committed Feb 6, 2018
1 parent 0091a4b commit 35694c3
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,16 @@ private void processTriggeredSegment(final RepairSegment segment, final JmxProxy
Seconds.secondsBetween(resultingSegment.getStartTime(), resultingSegment.getEndTime()).getSeconds());

SEGMENT_RUNNERS.remove(resultingSegment.getId());
} else {
// Something went wrong on the coordinator node and we never got the RUNNING notification
// or we are in an undetermined state.
// Let's just abort and reschedule the segment.
LOG.info(
"Repair command {} on segment {} never managed to start within timeout.",
commandId,
segmentId);
segmentFailed.set(true);
abort(resultingSegment, coordinator);
}
// Repair is still running, we'll renew lead on the segment when using Cassandra as storage backend
renewLead();
Expand Down Expand Up @@ -835,7 +845,9 @@ private boolean handleJmxNotificationForCassandra21(
// This gets called through the JMX proxy at the end
// regardless of succeeded or failed sessions.
LOG.debug(
"repair session finished for segment with id '{}' and repair number '{}'", segmentId, repairNumber);
"repair session finished for segment with id '{}' and repair number '{}'",
segmentId,
repairNumber);
condition.signalAll();
break;
default:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@
import io.cassandrareaper.jmx.JmxConnectionFactory;
import io.cassandrareaper.jmx.JmxProxy;
import io.cassandrareaper.jmx.RepairStatusHandler;
import io.cassandrareaper.service.RepairRunner;
import io.cassandrareaper.service.RingRange;
import io.cassandrareaper.service.SegmentRunner;
import io.cassandrareaper.storage.IStorage;
import io.cassandrareaper.storage.MemoryStorage;

Expand Down Expand Up @@ -388,7 +385,7 @@ protected JmxProxy connect(final Optional<RepairStatusHandler> handler, String h
executor.shutdown();

assertEquals(RepairSegment.State.NOT_STARTED, storage.getRepairSegment(runId, segmentId).get().getState());
assertEquals(1, storage.getRepairSegment(runId, segmentId).get().getFailCount());
assertEquals(2, storage.getRepairSegment(runId, segmentId).get().getFailCount());
}

@Test
Expand Down

0 comments on commit 35694c3

Please sign in to comment.