From 52cda6bb7a484453292ff64eef2bb5c3c3ecc6f5 Mon Sep 17 00:00:00 2001 From: Alexander Dejanovski Date: Thu, 1 Jun 2017 15:35:37 +0200 Subject: [PATCH] Cassandra backend improvements (#109) * Cassandra performance: Replace sequence ids with time-based UUIDs Makes the schema changes in a separate migration step, so that data in the repair_unit and repair_schedule tables can be migrated over. ref: - https://github.com/thelastpickle/cassandra-reaper/pull/99 - https://github.com/thelastpickle/cassandra-reaper/issues/94 - https://github.com/thelastpickle/cassandra-reaper/pull/99#discussion_r116239594 * Simplify the creation of repair runs and their segments. Repair runs and their segments are one unit of work in concept and the persistence layer should be designed accordingly. Previous they were separated because the concern of sequence generation for IDs were exposed in the code. This is now encapsulated within storage implementations. This work allows the CassandraStorage to implement segments as clustering keys within the repair_run table. ref: - https://github.com/thelastpickle/cassandra-reaper/issues/94 - https://github.com/thelastpickle/cassandra-reaper/pull/101 * In CassandraStorage implement segments as clustering keys within the repair_run table. Change required in IStorage so to identify a segment both by runId and segmentId. ref: - https://github.com/thelastpickle/cassandra-reaper/issues/94 - https://github.com/thelastpickle/cassandra-reaper/pull/102 * Fix number of parallel repair computation Downgrade to Dropwizard 1.0.7 and Guava 19.0 to fix dependency issues Make repair manager schedule cycle configurable (was 30s hardcoded) ref: https://github.com/thelastpickle/cassandra-reaper/pull/108 * In CassandraStorage replace the table scan on `repair_run` with a async break-down of per cluster run-throughs of known run IDs. ref: https://github.com/thelastpickle/cassandra-reaper/pull/105 --- pom.xml | 2 +- resource/cassandra-reaper-cassandra-ssl.yaml | 1 + resource/cassandra-reaper-cassandra.yaml | 1 + resource/cassandra-reaper-h2.yaml | 1 + resource/cassandra-reaper-memory.yaml | 1 + resource/cassandra-reaper-postgres.yaml | 1 + resource/cassandra-reaper.yaml | 1 + .../com/spotify/reaper/ReaperApplication.java | 2 +- .../ReaperApplicationConfiguration.java | 48 ++- .../spotify/reaper/cassandra/JmxProxy.java | 64 +-- .../com/spotify/reaper/core/RepairRun.java | 17 +- .../spotify/reaper/core/RepairSchedule.java | 27 +- .../spotify/reaper/core/RepairSegment.java | 30 +- .../com/spotify/reaper/core/RepairUnit.java | 9 +- .../spotify/reaper/resources/CommonTools.java | 157 +++---- .../reaper/resources/RepairRunResource.java | 9 +- .../resources/RepairScheduleResource.java | 7 +- .../resources/view/RepairRunStatus.java | 9 +- .../resources/view/RepairScheduleStatus.java | 9 +- .../spotify/reaper/service/RepairManager.java | 7 +- .../spotify/reaper/service/RepairRunner.java | 47 +- .../reaper/service/SchedulingManager.java | 5 +- .../spotify/reaper/service/SegmentRunner.java | 27 +- .../reaper/storage/CassandraStorage.java | 404 ++++++++---------- .../com/spotify/reaper/storage/IStorage.java | 32 +- .../spotify/reaper/storage/MemoryStorage.java | 94 ++-- .../reaper/storage/PostgresStorage.java | 111 ++--- .../storage/cassandra/Migration003.java | 91 ++++ .../postgresql/LongCollectionSQLType.java | 7 +- .../storage/postgresql/RepairRunMapper.java | 9 +- .../postgresql/RepairRunStatusMapper.java | 8 +- .../postgresql/RepairScheduleMapper.java | 17 +- .../RepairScheduleStatusMapper.java | 8 +- .../postgresql/RepairSegmentMapper.java | 24 +- .../storage/postgresql/RepairUnitMapper.java | 8 +- .../postgresql/UuidArgumentFactory.java | 27 ++ .../db/cassandra/003_switch_to_uuids.cql | 112 +++++ .../com/spotify/reaper/AssertionTest.java | 22 + .../reaper/acceptance/ReaperCassandraIT.java | 9 +- .../reaper/acceptance/TestContext.java | 3 +- .../view/RepairScheduleStatusTest.java | 3 +- .../unit/resources/RepairRunResourceTest.java | 16 +- .../reaper/unit/service/RepairRunnerTest.java | 130 ++++-- .../unit/service/SegmentRunnerTest.java | 64 ++- .../cassandra-reaper-cassandra-at.yaml | 1 + 45 files changed, 1002 insertions(+), 680 deletions(-) create mode 100644 src/main/java/com/spotify/reaper/storage/cassandra/Migration003.java create mode 100644 src/main/java/com/spotify/reaper/storage/postgresql/UuidArgumentFactory.java create mode 100644 src/main/resources/db/cassandra/003_switch_to_uuids.cql create mode 100644 src/test/java/com/spotify/reaper/AssertionTest.java diff --git a/pom.xml b/pom.xml index 3c2faa29b..4758f8034 100755 --- a/pom.xml +++ b/pom.xml @@ -11,7 +11,7 @@ UTF-8 - 1.1.0 + 1.0.7 4.1.0 2.2.7 1.2.5 diff --git a/resource/cassandra-reaper-cassandra-ssl.yaml b/resource/cassandra-reaper-cassandra-ssl.yaml index 56c983c8e..70833a6f7 100644 --- a/resource/cassandra-reaper-cassandra-ssl.yaml +++ b/resource/cassandra-reaper-cassandra-ssl.yaml @@ -12,6 +12,7 @@ enableCrossOrigin: true incrementalRepair: false allowUnreachableNodes: false enableDynamicSeedList: true +repairManagerSchedulingIntervalSeconds: 30 jmxPorts: 127.0.0.1: 7198 diff --git a/resource/cassandra-reaper-cassandra.yaml b/resource/cassandra-reaper-cassandra.yaml index 115301742..b2bd95ade 100644 --- a/resource/cassandra-reaper-cassandra.yaml +++ b/resource/cassandra-reaper-cassandra.yaml @@ -12,6 +12,7 @@ enableCrossOrigin: true incrementalRepair: false allowUnreachableNodes: false enableDynamicSeedList: true +repairManagerSchedulingIntervalSeconds: 30 jmxPorts: 127.0.0.1: 7100 diff --git a/resource/cassandra-reaper-h2.yaml b/resource/cassandra-reaper-h2.yaml index 4979ec7d9..2bce73ac3 100644 --- a/resource/cassandra-reaper-h2.yaml +++ b/resource/cassandra-reaper-h2.yaml @@ -12,6 +12,7 @@ enableCrossOrigin: true incrementalRepair: false allowUnreachableNodes: false enableDynamicSeedList: true +repairManagerSchedulingIntervalSeconds: 30 jmxPorts: 127.0.0.1: 7100 diff --git a/resource/cassandra-reaper-memory.yaml b/resource/cassandra-reaper-memory.yaml index dd465c39d..f5af93ef6 100644 --- a/resource/cassandra-reaper-memory.yaml +++ b/resource/cassandra-reaper-memory.yaml @@ -12,6 +12,7 @@ enableCrossOrigin: true incrementalRepair: false allowUnreachableNodes: false enableDynamicSeedList: true +repairManagerSchedulingIntervalSeconds: 30 jmxPorts: 127.0.0.1: 7100 diff --git a/resource/cassandra-reaper-postgres.yaml b/resource/cassandra-reaper-postgres.yaml index aa0fb6969..cab780048 100644 --- a/resource/cassandra-reaper-postgres.yaml +++ b/resource/cassandra-reaper-postgres.yaml @@ -12,6 +12,7 @@ enableCrossOrigin: true incrementalRepair: false allowUnreachableNodes: false enableDynamicSeedList: true +repairManagerSchedulingIntervalSeconds: 30 jmxPorts: 127.0.0.1: 7100 diff --git a/resource/cassandra-reaper.yaml b/resource/cassandra-reaper.yaml index b068ce031..d37c1b97f 100644 --- a/resource/cassandra-reaper.yaml +++ b/resource/cassandra-reaper.yaml @@ -12,6 +12,7 @@ enableCrossOrigin: true incrementalRepair: false allowUnreachableNodes: false enableDynamicSeedList: true +repairManagerSchedulingIntervalSeconds: 30 jmxPorts: 127.0.0.1: 7100 diff --git a/src/main/java/com/spotify/reaper/ReaperApplication.java b/src/main/java/com/spotify/reaper/ReaperApplication.java index 6a047a4a6..f13d3ccff 100644 --- a/src/main/java/com/spotify/reaper/ReaperApplication.java +++ b/src/main/java/com/spotify/reaper/ReaperApplication.java @@ -122,7 +122,7 @@ public void run(ReaperApplicationConfiguration config, context.repairManager.initializeThreadPool( config.getRepairRunThreadCount(), config.getHangingRepairTimeoutMins(), TimeUnit.MINUTES, - 30, TimeUnit.SECONDS); + config.getRepairManagerSchedulingIntervalSeconds(), TimeUnit.SECONDS); if (context.storage == null) { LOG.info("initializing storage of type: {}", config.getStorageType()); diff --git a/src/main/java/com/spotify/reaper/ReaperApplicationConfiguration.java b/src/main/java/com/spotify/reaper/ReaperApplicationConfiguration.java index 0fdeec709..46ddc18d8 100644 --- a/src/main/java/com/spotify/reaper/ReaperApplicationConfiguration.java +++ b/src/main/java/com/spotify/reaper/ReaperApplicationConfiguration.java @@ -73,7 +73,7 @@ public class ReaperApplicationConfiguration extends Configuration { private String enableCrossOrigin; - + @JsonProperty private DataSourceFactory database = new DataSourceFactory(); @@ -86,14 +86,17 @@ public class ReaperApplicationConfiguration extends Configuration { @JsonProperty @DefaultValue("false") private Boolean allowUnreachableNodes; - + @JsonProperty private AutoSchedulingConfiguration autoScheduling; - + @JsonProperty @DefaultValue("true") private Boolean enableDynamicSeedList; + @JsonProperty + private Integer repairManagerSchedulingIntervalSeconds; + public int getSegmentCount() { return segmentCount; } @@ -117,7 +120,7 @@ public double getRepairIntensity() { public void setRepairIntensity(double repairIntensity) { this.repairIntensity = repairIntensity; } - + public Boolean getIncrementalRepair() { return incrementalRepair; } @@ -125,7 +128,7 @@ public Boolean getIncrementalRepair() { public void setIncrementalRepair(Boolean incrementalRepair) { this.incrementalRepair = incrementalRepair; } - + public Integer getScheduleDaysBetween() { return scheduleDaysBetween; } @@ -170,13 +173,13 @@ public void setDataSourceFactory(DataSourceFactory database) { this.database = database; } - public int getHangingRepairTimeoutMins() { - return hangingRepairTimeoutMins; + public int getRepairManagerSchedulingIntervalSeconds() { + return this.repairManagerSchedulingIntervalSeconds==null?30:this.repairManagerSchedulingIntervalSeconds; } @JsonProperty - public void setHangingRepairTimeoutMins(int hangingRepairTimeoutMins) { - this.hangingRepairTimeoutMins = hangingRepairTimeoutMins; + public void setRepairManagerSchedulingIntervalSeconds(int repairManagerSchedulingIntervalSeconds) { + this.repairManagerSchedulingIntervalSeconds = repairManagerSchedulingIntervalSeconds; } public Map getJmxPorts() { @@ -206,11 +209,11 @@ public AutoSchedulingConfiguration getAutoScheduling() { public void setAutoScheduling(AutoSchedulingConfiguration autoRepairScheduling) { this.autoScheduling = autoRepairScheduling; } - + public void setEnableDynamicSeedList(Boolean enableDynamicSeedList) { this.enableDynamicSeedList = enableDynamicSeedList; } - + public Boolean getEnableDynamicSeedList() { return this.enableDynamicSeedList==null?Boolean.TRUE:this.enableDynamicSeedList; } @@ -239,8 +242,8 @@ public String getPassword() { } } - - + + private CassandraFactory cassandra = new CassandraFactory(); @JsonProperty("cassandra") @@ -252,15 +255,24 @@ public CassandraFactory getCassandraFactory() { public void setCassandraFactory(CassandraFactory cassandra) { this.cassandra = cassandra; } - + public Boolean getAllowUnreachableNodes() { - return allowUnreachableNodes; + return allowUnreachableNodes != null ? allowUnreachableNodes : false; } public void setAllowUnreachableNodes(Boolean allow) { this.allowUnreachableNodes = allow; } - + + public int getHangingRepairTimeoutMins() { + return hangingRepairTimeoutMins; + } + + @JsonProperty + public void setHangingRepairTimeoutMins(int hangingRepairTimeoutMins) { + this.hangingRepairTimeoutMins = hangingRepairTimeoutMins; + } + public static class AutoSchedulingConfiguration { @JsonProperty @@ -277,7 +289,7 @@ public static class AutoSchedulingConfiguration { @JsonProperty private Duration scheduleSpreadPeriod; - + @JsonProperty private List excludedKeyspaces = Collections.emptyList(); @@ -324,7 +336,7 @@ public void setScheduleSpreadPeriod(Duration scheduleSpreadPeriod) { public boolean hasScheduleSpreadPeriod() { return scheduleSpreadPeriod != null; } - + public void setExcludedKeyspaces(List excludedKeyspaces) { this.excludedKeyspaces = excludedKeyspaces; } diff --git a/src/main/java/com/spotify/reaper/cassandra/JmxProxy.java b/src/main/java/com/spotify/reaper/cassandra/JmxProxy.java index b474ac52a..6cd17ab4b 100644 --- a/src/main/java/com/spotify/reaper/cassandra/JmxProxy.java +++ b/src/main/java/com/spotify/reaper/cassandra/JmxProxy.java @@ -170,7 +170,7 @@ static JmxProxy connect(Optional handler, String host, int if(cassandraVersion.startsWith("2.0") || cassandraVersion.startsWith("1.")){ ssProxy = JMX.newMBeanProxy(mbeanServerConn, ssMbeanName, StorageServiceMBean20.class); } - + CompactionManagerMBean cmProxy = JMX.newMBeanProxy(mbeanServerConn, cmMbeanName, CompactionManagerMBean.class); JmxProxy proxy = new JmxProxy(handler, host, jmxUrl, jmxConn, ssProxy, ssMbeanName, @@ -235,6 +235,18 @@ public List tokenRangeToEndpoint(String keyspace, RingRange tokenRange) return Lists.newArrayList(); } + /** + * @return all hosts in the ring with their host id + */ + @NotNull + public Map getEndpointToHostId() { + checkNotNull(ssProxy, "Looks like the proxy is not connected"); + Map hosts = + ((StorageServiceMBean) ssProxy).getEndpointToHostId(); + + return hosts; + } + /** * @return full class name of Cassandra's partitioner. */ @@ -309,8 +321,8 @@ public int getPendingCompactions() { public boolean isRepairRunning() { return isRepairRunningPre22() || isRepairRunningPost22() || isValidationCompactionRunning(); } - - + + /** * @return true if any repairs are running on the node. */ @@ -336,7 +348,7 @@ public boolean isRepairRunningPre22() { // If uncertain, assume it's running return true; } - + /** * @return true if any repairs are running on the node. */ @@ -345,7 +357,7 @@ public boolean isValidationCompactionRunning() { try { int activeCount = (Integer) mbeanServer.getAttribute(new ObjectName(VALIDATION_ACTIVE_OBJECT_NAME), VALUE_ATTRIBUTE); long pendingCount = (Long) mbeanServer.getAttribute(new ObjectName(VALIDATION_PENDING_OBJECT_NAME), VALUE_ATTRIBUTE); - + return activeCount + pendingCount != 0; } catch (IOException ignored) { LOG.warn(FAILED_TO_CONNECT_TO_USING_JMX, host, ignored); @@ -360,15 +372,15 @@ public boolean isValidationCompactionRunning() { // If uncertain, assume it's not running return false; } - + /** * New way of determining if a repair is running after C* 2.2 - * + * * @return true if any repairs are running on the node. */ public boolean isRepairRunningPost22() { try { - // list all mbeans in search of one with the name Repair#?? + // list all mbeans in search of one with the name Repair#?? // This is the replacement for AntiEntropySessions since Cassandra 2.2 Set beanSet = mbeanServer.queryNames(new ObjectName("org.apache.cassandra.internal:*"), null); for(Object bean:beanSet) { @@ -425,7 +437,7 @@ public boolean tableExists(String ks, String cf) { } return true; } - + public String getCassandraVersion(){ return ((StorageServiceMBean) ssProxy).getReleaseVersion(); } @@ -436,7 +448,7 @@ public String getCassandraVersion(){ * For time being, we don't allow local nor snapshot repairs. * * @return Repair command number, or 0 if nothing to repair - * @throws ReaperException + * @throws ReaperException */ public int triggerRepair(BigInteger beginToken, BigInteger endToken, String keyspace, RepairParallelism repairParallelism, Collection columnFamilies, boolean fullRepair) throws ReaperException { @@ -474,11 +486,11 @@ public int triggerRepair(BigInteger beginToken, BigInteger endToken, String keys throw new ReaperException(e); } } - - + + public int triggerRepairPost2dot2(boolean fullRepair, RepairParallelism repairParallelism, String keyspace, Collection columnFamilies, BigInteger beginToken, BigInteger endToken, String cassandraVersion) { Map options = new HashMap<>(); - + options.put(RepairOption.PARALLELISM_KEY, repairParallelism.getName()); //options.put(RepairOption.PRIMARY_RANGE_KEY, Boolean.toString(primaryRange)); options.put(RepairOption.INCREMENTAL_KEY, Boolean.toString(!fullRepair)); @@ -489,22 +501,22 @@ public int triggerRepairPost2dot2(boolean fullRepair, RepairParallelism repairPa if (fullRepair) { options.put(RepairOption.RANGES_KEY, beginToken.toString() + ":" + endToken.toString()); } - + //options.put(RepairOption.DATACENTERS_KEY, StringUtils.join(specificDataCenters, ",")); //options.put(RepairOption.HOSTS_KEY, StringUtils.join(specificHosts, ",")); - + return ((StorageServiceMBean) ssProxy).repairAsync(keyspace, options); } - + public int triggerRepair2dot1(boolean fullRepair, RepairParallelism repairParallelism, String keyspace, Collection columnFamilies, BigInteger beginToken, BigInteger endToken, String cassandraVersion) { if (fullRepair) { // full repair if (repairParallelism.equals(RepairParallelism.DATACENTER_AWARE)) { return ((StorageServiceMBean) ssProxy).forceRepairRangeAsync(beginToken.toString(), endToken.toString(), keyspace, repairParallelism.ordinal(), cassandraVersion.startsWith("2.2")?new HashSet():null, cassandraVersion.startsWith("2.2")?new HashSet():null, fullRepair, - columnFamilies.toArray(new String[columnFamilies.size()])); + columnFamilies.toArray(new String[columnFamilies.size()])); } - + boolean snapshotRepair = repairParallelism.equals(RepairParallelism.SEQUENTIAL); return ((StorageServiceMBean) ssProxy).forceRepairRangeAsync(beginToken.toString(), endToken.toString(), @@ -512,24 +524,24 @@ public int triggerRepair2dot1(boolean fullRepair, RepairParallelism repairParall cassandraVersion.startsWith("2.2")?new HashSet():null, cassandraVersion.startsWith("2.2")?new HashSet():null, fullRepair, columnFamilies.toArray(new String[columnFamilies.size()])); - } + } // incremental repair return ((StorageServiceMBean) ssProxy).forceRepairAsync(keyspace, Boolean.FALSE, Boolean.FALSE, Boolean.FALSE, fullRepair, columnFamilies.toArray(new String[columnFamilies.size()])); } - + public int triggerRepairPre2dot1(RepairParallelism repairParallelism, String keyspace, Collection columnFamilies, BigInteger beginToken, BigInteger endToken) { // Cassandra 1.2 and 2.0 compatibility if (repairParallelism.equals(RepairParallelism.DATACENTER_AWARE)) { return ((StorageServiceMBean20) ssProxy).forceRepairRangeAsync(beginToken.toString(), endToken.toString(), keyspace, repairParallelism.ordinal(), null, null, - columnFamilies.toArray(new String[columnFamilies.size()])); + columnFamilies.toArray(new String[columnFamilies.size()])); } boolean snapshotRepair = repairParallelism.equals(RepairParallelism.SEQUENTIAL); return ((StorageServiceMBean20) ssProxy).forceRepairRangeAsync(beginToken.toString(), endToken.toString(), keyspace, snapshotRepair, false, columnFamilies.toArray(new String[columnFamilies.size()])); - + } @@ -549,12 +561,12 @@ public void handleNotification(Notification notification, Object handback) { if (repairStatusHandler.isPresent() && ("repair").equals(type)) { processOldApiNotification(notification); } - + if (repairStatusHandler.isPresent() && ("progress").equals(type)) { processNewApiNotification(notification); } } - + /** * Handles notifications from the old repair API (forceRepairAsync) */ @@ -574,7 +586,7 @@ private void processOldApiNotification(Notification notification) { LOG.error("Error while processing JMX notification", e); } } - + /** * Handles notifications from the new repair API (repairAsync) */ @@ -696,7 +708,7 @@ public void clearSnapshot(String repairId, String keyspaceName) throws ReaperExc throw new ReaperException(e); } } - + public List getLiveNodes() throws ReaperException { checkNotNull(ssProxy, "Looks like the proxy is not connected"); diff --git a/src/main/java/com/spotify/reaper/core/RepairRun.java b/src/main/java/com/spotify/reaper/core/RepairRun.java index 39b6007a8..6ab04b3b1 100644 --- a/src/main/java/com/spotify/reaper/core/RepairRun.java +++ b/src/main/java/com/spotify/reaper/core/RepairRun.java @@ -14,6 +14,7 @@ package com.spotify.reaper.core; import java.util.Objects; +import java.util.UUID; import org.apache.cassandra.repair.RepairParallelism; import org.joda.time.DateTime; @@ -21,7 +22,7 @@ public class RepairRun implements Comparable { - private final long id; + private final UUID id; // IDEA: maybe we want to have start and stop token for parallel runners on same repair run? //private final long startToken; @@ -30,7 +31,7 @@ public class RepairRun implements Comparable { private final String cause; private final String owner; private final String clusterName; - private final long repairUnitId; + private final UUID repairUnitId; private final RunState runState; private final DateTime creationTime; private final DateTime startTime; @@ -41,7 +42,7 @@ public class RepairRun implements Comparable { private final int segmentCount; private final RepairParallelism repairParallelism; - private RepairRun(Builder builder, long id) { + private RepairRun(Builder builder, UUID id) { this.id = id; this.clusterName = builder.clusterName; this.repairUnitId = builder.repairUnitId; @@ -58,11 +59,11 @@ private RepairRun(Builder builder, long id) { this.repairParallelism = builder.repairParallelism; } - public long getId() { + public UUID getId() { return id; } - public long getRepairUnitId() { + public UUID getRepairUnitId() { return repairUnitId; } @@ -174,7 +175,7 @@ public boolean isTerminated() { public static class Builder { public final String clusterName; - public final long repairUnitId; + public final UUID repairUnitId; private RunState runState; private DateTime creationTime; private double intensity; @@ -188,7 +189,7 @@ public static class Builder { private int segmentCount; private RepairParallelism repairParallelism; - public Builder(String clusterName, long repairUnitId, DateTime creationTime, + public Builder(String clusterName, UUID repairUnitId, DateTime creationTime, double intensity, int segmentCount, RepairParallelism repairParallelism) { this.clusterName = clusterName; this.repairUnitId = repairUnitId; @@ -270,7 +271,7 @@ public Builder repairParallelism(RepairParallelism repairParallelism) { return this; } - public RepairRun build(long id) { + public RepairRun build(UUID id) { return new RepairRun(this, id); } } diff --git a/src/main/java/com/spotify/reaper/core/RepairSchedule.java b/src/main/java/com/spotify/reaper/core/RepairSchedule.java index 654644289..5de15928e 100644 --- a/src/main/java/com/spotify/reaper/core/RepairSchedule.java +++ b/src/main/java/com/spotify/reaper/core/RepairSchedule.java @@ -16,19 +16,20 @@ import com.google.common.collect.ImmutableList; import com.spotify.reaper.core.RepairSegment.State; import com.spotify.reaper.storage.postgresql.LongCollectionSQLType; +import java.util.UUID; import org.apache.cassandra.repair.RepairParallelism; import org.joda.time.DateTime; public class RepairSchedule { - private final long id; + private final UUID id; - private final long repairUnitId; + private final UUID repairUnitId; private final State state; private final int daysBetween; private final DateTime nextActivation; - private final ImmutableList runHistory; + private final ImmutableList runHistory; private final int segmentCount; private final RepairParallelism repairParallelism; private final double intensity; @@ -36,7 +37,7 @@ public class RepairSchedule { private final String owner; private final DateTime pauseTime; - private RepairSchedule(Builder builder, long id) { + private RepairSchedule(Builder builder, UUID id) { this.id = id; this.repairUnitId = builder.repairUnitId; this.state = builder.state; @@ -51,11 +52,11 @@ private RepairSchedule(Builder builder, long id) { this.pauseTime = builder.pauseTime; } - public long getId() { + public UUID getId() { return id; } - public long getRepairUnitId() { + public UUID getRepairUnitId() { return repairUnitId; } @@ -75,7 +76,7 @@ public DateTime getNextActivation() { return nextActivation; } - public ImmutableList getRunHistory() { + public ImmutableList getRunHistory() { return runHistory; } @@ -123,11 +124,11 @@ public enum State { public static class Builder { - public final long repairUnitId; + public final UUID repairUnitId; private State state; private int daysBetween; private DateTime nextActivation; - private ImmutableList runHistory; + private ImmutableList runHistory; private int segmentCount; private RepairParallelism repairParallelism; private double intensity; @@ -135,8 +136,8 @@ public static class Builder { private String owner; private DateTime pauseTime; - public Builder(long repairUnitId, State state, int daysBetween, DateTime nextActivation, - ImmutableList runHistory, int segmentCount, + public Builder(UUID repairUnitId, State state, int daysBetween, DateTime nextActivation, + ImmutableList runHistory, int segmentCount, RepairParallelism repairParallelism, double intensity, DateTime creationTime) { this.repairUnitId = repairUnitId; @@ -181,7 +182,7 @@ public Builder nextActivation(DateTime nextActivation) { return this; } - public Builder runHistory(ImmutableList runHistory) { + public Builder runHistory(ImmutableList runHistory) { this.runHistory = runHistory; return this; } @@ -216,7 +217,7 @@ public Builder pauseTime(DateTime pauseTime) { return this; } - public RepairSchedule build(long id) { + public RepairSchedule build(UUID id) { return new RepairSchedule(this, id); } } diff --git a/src/main/java/com/spotify/reaper/core/RepairSegment.java b/src/main/java/com/spotify/reaper/core/RepairSegment.java index 6643782e0..2188af594 100644 --- a/src/main/java/com/spotify/reaper/core/RepairSegment.java +++ b/src/main/java/com/spotify/reaper/core/RepairSegment.java @@ -18,12 +18,13 @@ import org.joda.time.DateTime; import java.math.BigInteger; +import java.util.UUID; public class RepairSegment { - private final long id; - private final long runId; - private final long repairUnitId; + private final UUID id; + private final UUID runId; + private final UUID repairUnitId; private final RingRange tokenRange; private final int failCount; private final State state; @@ -32,7 +33,7 @@ public class RepairSegment { private final DateTime startTime; private final DateTime endTime; - private RepairSegment(Builder builder, long id) { + private RepairSegment(Builder builder, UUID id) { this.id = id; this.runId = builder.runId; this.repairUnitId = builder.repairUnitId; @@ -45,15 +46,15 @@ private RepairSegment(Builder builder, long id) { this.endTime = builder.endTime; } - public long getId() { + public UUID getId() { return id; } - public long getRunId() { + public UUID getRunId() { return runId; } - public long getRepairUnitId() { + public UUID getRepairUnitId() { return repairUnitId; } @@ -105,9 +106,10 @@ public enum State { public static class Builder { - public final long runId; + public final RingRange tokenRange; - private final long repairUnitId; + private final UUID repairUnitId; + private UUID runId; private int failCount; private State state; private String coordinatorHost; @@ -115,8 +117,7 @@ public static class Builder { private DateTime startTime; private DateTime endTime; - public Builder(long runId, RingRange tokenRange, long repairUnitId) { - this.runId = runId; + public Builder(RingRange tokenRange, UUID repairUnitId) { this.repairUnitId = repairUnitId; this.tokenRange = tokenRange; this.failCount = 0; @@ -135,6 +136,11 @@ private Builder(RepairSegment original) { endTime = original.endTime; } + public Builder withRunId(UUID runId){ + this.runId = runId; + return this; + } + public Builder failCount(int failCount) { this.failCount = failCount; return this; @@ -165,7 +171,7 @@ public Builder endTime(DateTime endTime) { return this; } - public RepairSegment build(long id) { + public RepairSegment build(UUID id) { return new RepairSegment(this, id); } } diff --git a/src/main/java/com/spotify/reaper/core/RepairUnit.java b/src/main/java/com/spotify/reaper/core/RepairUnit.java index f0feed47d..f5a0e1019 100644 --- a/src/main/java/com/spotify/reaper/core/RepairUnit.java +++ b/src/main/java/com/spotify/reaper/core/RepairUnit.java @@ -14,16 +14,17 @@ package com.spotify.reaper.core; import java.util.Set; +import java.util.UUID; public class RepairUnit { - private final long id; + private final UUID id; private final String clusterName; private final String keyspaceName; private final Set columnFamilies; private final Boolean incrementalRepair; - private RepairUnit(Builder builder, long id) { + private RepairUnit(Builder builder, UUID id) { this.id = id; this.clusterName = builder.clusterName; this.keyspaceName = builder.keyspaceName; @@ -31,7 +32,7 @@ private RepairUnit(Builder builder, long id) { this.incrementalRepair = builder.incrementalRepair; } - public long getId() { + public UUID getId() { return id; } @@ -76,7 +77,7 @@ private Builder(RepairUnit original) { incrementalRepair = original.incrementalRepair; } - public RepairUnit build(long id) { + public RepairUnit build(UUID id) { return new RepairUnit(this, id); } } diff --git a/src/main/java/com/spotify/reaper/resources/CommonTools.java b/src/main/java/com/spotify/reaper/resources/CommonTools.java index e4b64aa8b..a167cc214 100644 --- a/src/main/java/com/spotify/reaper/resources/CommonTools.java +++ b/src/main/java/com/spotify/reaper/resources/CommonTools.java @@ -39,8 +39,9 @@ import com.spotify.reaper.core.RepairUnit; import com.spotify.reaper.service.RingRange; import com.spotify.reaper.service.SegmentGenerator; +import java.util.UUID; -public class CommonTools { +public final class CommonTools { private static final Logger LOG = LoggerFactory.getLogger(CommonTools.class); @@ -69,25 +70,34 @@ public static RepairRun registerRepairRun(AppContext context, Cluster cluster, Map nodes = getClusterNodes(context, cluster, repairUnit); // the next step is to prepare a repair run object - RepairRun repairRun = storeNewRepairRun(context, cluster, repairUnit, cause, owner, nodes.keySet().size(), - repairParallelism, intensity); - checkNotNull(repairRun, "failed preparing repair run"); + segments = repairUnit.getIncrementalRepair() ? nodes.keySet().size() : tokenSegments.size(); - // Notice that our RepairRun core object doesn't contain pointer to - // the set of RepairSegments in the run, as they are accessed separately. - // However, RepairSegment has a pointer to the RepairRun it lives in + RepairRun.Builder runBuilder + = createNewRepairRun(cluster, repairUnit, cause, owner, segments, repairParallelism, intensity); // the last preparation step is to generate actual repair segments - if(!repairUnit.getIncrementalRepair()) { - return storeNewRepairSegments(context, tokenSegments, repairRun, repairUnit); - } else { - return storeNewRepairSegmentsForIncrementalRepair(context, nodes, repairRun, repairUnit); + List segmentBuilders = repairUnit.getIncrementalRepair() + ? createRepairSegmentsForIncrementalRepair(nodes, repairUnit) + : createRepairSegments(tokenSegments, repairUnit); + + RepairRun repairRun = context.storage.addRepairRun(runBuilder, segmentBuilders); + + if (null == repairRun){ + String errMsg = String.format( + "failed storing repair run for cluster \"%s\", keyspace \"%s\", and column families: %s", + cluster.getName(), + repairUnit.getKeyspaceName(), + repairUnit.getColumnFamilies()); + + LOG.error(errMsg); + throw new ReaperException(errMsg); } + return repairRun; } /** * Splits a token range for given table into segments - * @param incrementalRepair + * @param incrementalRepair * * @return the created segments * @throws ReaperException when fails to discover seeds for the cluster or fails to connect to @@ -97,7 +107,7 @@ private static List generateSegments(AppContext context, Cluster targ int segmentCount, Boolean incrementalRepair) throws ReaperException { List segments = null; - Preconditions.checkState(targetCluster.getPartitioner() != null, + Preconditions.checkState(targetCluster.getPartitioner() != null, "no partitioner for cluster: " + targetCluster.getName()); SegmentGenerator sg = new SegmentGenerator(targetCluster.getPartitioner()); Set seedHosts = targetCluster.getSeedHosts(); @@ -116,7 +126,7 @@ private static List generateSegments(AppContext context, Cluster targ LOG.warn("couldn't connect to host: {}, will try next one", host, e); } } - + if (segments == null) { String errMsg = String.format("failed to generate repair segments for cluster \"%s\"", targetCluster.getName()); @@ -132,80 +142,49 @@ private static List generateSegments(AppContext context, Cluster targ * @return the new, just stored RepairRun instance * @throws ReaperException when fails to store the RepairRun. */ - private static RepairRun storeNewRepairRun(AppContext context, Cluster cluster, - RepairUnit repairUnit, Optional cause, - String owner, int segments, - RepairParallelism repairParallelism, Double intensity) - throws ReaperException { - RepairRun.Builder runBuilder = new RepairRun.Builder(cluster.getName(), repairUnit.getId(), - DateTime.now(), intensity, - segments, repairParallelism); - runBuilder.cause(cause.isPresent() ? cause.get() : "no cause specified"); - runBuilder.owner(owner); - RepairRun newRepairRun = context.storage.addRepairRun(runBuilder); - if (newRepairRun == null) { - String errMsg = String.format("failed storing repair run for cluster \"%s\", " - + "keyspace \"%s\", and column families: %s", - cluster.getName(), repairUnit.getKeyspaceName(), - repairUnit.getColumnFamilies()); - LOG.error(errMsg); - throw new ReaperException(errMsg); - } - return newRepairRun; + private static RepairRun.Builder createNewRepairRun( + Cluster cluster, + RepairUnit repairUnit, + Optional cause, + String owner, + int segments, + RepairParallelism repairParallelism, + Double intensity) throws ReaperException { + + return new RepairRun.Builder(cluster.getName(), repairUnit.getId(), DateTime.now(), intensity, segments, repairParallelism) + .cause(cause.isPresent() ? cause.get() : "no cause specified") + .owner(owner); } /** * Creates the repair runs linked to given RepairRun and stores them directly in the storage * backend. */ - private static RepairRun storeNewRepairSegments(AppContext context, List tokenSegments, - RepairRun repairRun, RepairUnit repairUnit) { + private static List createRepairSegments(List tokenSegments, RepairUnit repairUnit){ + List repairSegmentBuilders = Lists.newArrayList(); - for (RingRange range : tokenSegments) { - RepairSegment.Builder repairSegment = new RepairSegment.Builder(repairRun.getId(), range, - repairUnit.getId()); - repairSegmentBuilders.add(repairSegment); - } - context.storage.addRepairSegments(repairSegmentBuilders, repairRun.getId()); - if (repairRun.getSegmentCount() != tokenSegments.size()) { - LOG.debug("created segment amount differs from expected default {} != {}", - repairRun.getSegmentCount(), tokenSegments.size()); - RepairRun newRepairRun = repairRun.with().segmentCount(tokenSegments.size()).build(repairRun.getId()); - context.storage.updateRepairRun(newRepairRun); - - return newRepairRun; - } - - return repairRun; + tokenSegments.forEach(range -> repairSegmentBuilders.add(new RepairSegment.Builder(range, repairUnit.getId()))); + return repairSegmentBuilders; } - - + + /** * Creates the repair runs linked to given RepairRun and stores them directly in the storage * backend in case of incrementalRepair */ - private static RepairRun storeNewRepairSegmentsForIncrementalRepair(AppContext context, Map nodes, - RepairRun repairRun, RepairUnit repairUnit) { + private static List createRepairSegmentsForIncrementalRepair( + Map nodes, + RepairUnit repairUnit) { + List repairSegmentBuilders = Lists.newArrayList(); - for (Entry range : nodes.entrySet()) { - RepairSegment.Builder repairSegment = new RepairSegment.Builder(repairRun.getId(), range.getValue(), - repairUnit.getId()); - repairSegment.coordinatorHost(range.getKey()); - repairSegmentBuilders.add(repairSegment); - } - context.storage.addRepairSegments(repairSegmentBuilders, repairRun.getId()); - if (repairRun.getSegmentCount() != nodes.keySet().size()) { - LOG.debug("created segment amount differs from expected default {} != {}", - repairRun.getSegmentCount(), nodes.keySet().size()); - RepairRun newRepairRun = repairRun.with().segmentCount(nodes.keySet().size()).build(repairRun.getId()); - context.storage.updateRepairRun(newRepairRun); - - return newRepairRun; - } - - return repairRun; + + nodes.entrySet().forEach(range + -> repairSegmentBuilders.add( + new RepairSegment.Builder(range.getValue(), repairUnit.getId()).coordinatorHost(range.getKey()))); + + return repairSegmentBuilders; } - + private static Map getClusterNodes(AppContext context, Cluster targetCluster, RepairUnit repairUnit) throws ReaperException { Set nodes = Sets.newHashSet(); ConcurrentHashMap nodesWithRanges = new ConcurrentHashMap(); @@ -216,27 +195,27 @@ private static Map getClusterNodes(AppContext context, Clust LOG.error(errMsg); throw new ReaperException(errMsg); } - - + + Map, List> rangeToEndpoint = Maps.newHashMap(); for (String host : seedHosts) { try (JmxProxy jmxProxy = context.jmxConnectionFactory.connect(host)) { - rangeToEndpoint = jmxProxy.getRangeToEndpointMap(repairUnit.getKeyspaceName()); + rangeToEndpoint = jmxProxy.getRangeToEndpointMap(repairUnit.getKeyspaceName()); break; } catch (ReaperException e) { LOG.warn("couldn't connect to host: {}, will try next one", host, e); } } - + for(Entry, List> tokenRangeToEndpoint:rangeToEndpoint.entrySet()) { String node = tokenRangeToEndpoint.getValue().get(0); RingRange range = new RingRange(tokenRangeToEndpoint.getKey().get(0), tokenRangeToEndpoint.getKey().get(1)); - RingRange added = nodesWithRanges.putIfAbsent(node, range); + RingRange added = nodesWithRanges.putIfAbsent(node, range); } - + return nodesWithRanges; } - + /** * Instantiates a RepairSchedule and stores it in the storage backend. @@ -257,11 +236,11 @@ public static RepairSchedule storeNewRepairSchedule( throws ReaperException { RepairSchedule.Builder scheduleBuilder = new RepairSchedule.Builder(repairUnit.getId(), RepairSchedule.State.ACTIVE, daysBetween, - nextActivation, ImmutableList.of(), segments, + nextActivation, ImmutableList.of(), segments, repairParallelism, intensity, DateTime.now()); scheduleBuilder.owner(owner); - + Collection repairSchedules = context.storage.getRepairSchedulesForClusterAndKeyspace(repairUnit.getClusterName(), repairUnit.getKeyspaceName()); for(RepairSchedule sched:repairSchedules){ Optional repairUnitForSched = context.storage.getRepairUnit(sched.getRepairUnitId()); @@ -276,7 +255,7 @@ public static RepairSchedule storeNewRepairSchedule( } } } - + RepairSchedule newRepairSchedule = context.storage.addRepairSchedule(scheduleBuilder); if (newRepairSchedule == null) { String errMsg = String.format("failed storing repair schedule for cluster \"%s\", " @@ -288,13 +267,13 @@ public static RepairSchedule storeNewRepairSchedule( } return newRepairSchedule; } - + private static final boolean aConflictingScheduleAlreadyExists(RepairUnit newRepairUnit, RepairUnit existingRepairUnit){ return (newRepairUnit.getColumnFamilies().isEmpty() && existingRepairUnit.getColumnFamilies().isEmpty()) || newRepairUnit.getColumnFamilies().isEmpty() && !existingRepairUnit.getColumnFamilies().isEmpty() || !newRepairUnit.getColumnFamilies().isEmpty() && existingRepairUnit.getColumnFamilies().isEmpty() || !Sets.intersection(existingRepairUnit.getColumnFamilies(),newRepairUnit.getColumnFamilies()).isEmpty(); - + } public static final Splitter COMMA_SEPARATED_LIST_SPLITTER = @@ -331,7 +310,7 @@ public static RepairUnit getNewOrExistingRepairUnit(AppContext context, Cluster Optional storedRepairUnit = context.storage.getRepairUnit(cluster.getName(), keyspace, tableNames); RepairUnit theRepairUnit; - + Optional cassandraVersion = Optional.absent(); for (String host : cluster.getSeedHosts()) { try (JmxProxy jmxProxy = context.jmxConnectionFactory.connect(host)) { @@ -347,7 +326,7 @@ public static RepairUnit getNewOrExistingRepairUnit(AppContext context, Cluster LOG.error(errMsg); throw new ReaperException(errMsg); } - + if (storedRepairUnit.isPresent() && storedRepairUnit.get().getIncrementalRepair().equals(incrementalRepair)) { LOG.info("use existing repair unit for cluster '{}', keyspace '{}', and column families: {}", cluster.getName(), keyspace, tableNames); @@ -377,4 +356,6 @@ public static Set parseSeedHosts(String seedHost) { return Arrays.stream(seedHost.split(",")).map(String::trim).collect(Collectors.toSet()); } + private CommonTools(){} + } diff --git a/src/main/java/com/spotify/reaper/resources/RepairRunResource.java b/src/main/java/com/spotify/reaper/resources/RepairRunResource.java index 1afa08f62..88ea2ccc5 100644 --- a/src/main/java/com/spotify/reaper/resources/RepairRunResource.java +++ b/src/main/java/com/spotify/reaper/resources/RepairRunResource.java @@ -56,6 +56,7 @@ import com.spotify.reaper.core.RepairSegment; import com.spotify.reaper.core.RepairUnit; import com.spotify.reaper.resources.view.RepairRunStatus; +import java.util.UUID; @Path("/repair_run") @Produces(MediaType.APPLICATION_JSON) @@ -253,7 +254,7 @@ public static Response checkRequestForAddRepair( @Path("/{id}") public Response modifyRunState( @Context UriInfo uriInfo, - @PathParam("id") Long repairRunId, + @PathParam("id") UUID repairRunId, @QueryParam("state") Optional state) throws ReaperException { LOG.info("modify repair run state called with: id = {}, state = {}", repairRunId, state); @@ -282,7 +283,7 @@ public Response modifyRunState( Collection repairRuns = context.storage.getRepairRunsForUnit(repairRun.get().getRepairUnitId()); for(RepairRun run:repairRuns){ - if(run.getId()!=repairRunId && run.getRunState().equals(RunState.RUNNING)){ + if(!run.getId().equals(repairRunId) && run.getRunState().equals(RunState.RUNNING)){ String errMsg = "repair unit already has run " + run.getId() + " in RUNNING state"; LOG.error(errMsg); return Response.status(Response.Status.CONFLICT).entity(errMsg).build(); @@ -373,7 +374,7 @@ private Response abortRun(RepairRun repairRun, RepairUnit repairUnit, int segmen */ @GET @Path("/{id}") - public Response getRepairRun(@PathParam("id") Long repairRunId) { + public Response getRepairRun(@PathParam("id") UUID repairRunId) { LOG.debug("get repair_run called with: id = {}", repairRunId); Optional repairRun = context.storage.getRepairRun(repairRunId); if (repairRun.isPresent()) { @@ -497,7 +498,7 @@ public Set splitStateParam(Optional state) { */ @DELETE @Path("/{id}") - public Response deleteRepairRun(@PathParam("id") Long runId, + public Response deleteRepairRun(@PathParam("id") UUID runId, @QueryParam("owner") Optional owner) { LOG.info("delete repair run called with runId: {}, and owner: {}", runId, owner); if (!owner.isPresent()) { diff --git a/src/main/java/com/spotify/reaper/resources/RepairScheduleResource.java b/src/main/java/com/spotify/reaper/resources/RepairScheduleResource.java index 21b943642..cc8f42671 100644 --- a/src/main/java/com/spotify/reaper/resources/RepairScheduleResource.java +++ b/src/main/java/com/spotify/reaper/resources/RepairScheduleResource.java @@ -52,6 +52,7 @@ import com.spotify.reaper.core.RepairUnit; import com.spotify.reaper.resources.view.RepairScheduleStatus; import com.spotify.reaper.service.SchedulingManager; +import java.util.UUID; @Path("/repair_schedule") @Produces(MediaType.APPLICATION_JSON) @@ -212,7 +213,7 @@ public Response addRepairSchedule( @Path("/{id}") public Response modifyState( @Context UriInfo uriInfo, - @PathParam("id") Long repairScheduleId, + @PathParam("id") UUID repairScheduleId, @QueryParam("state") Optional state) { LOG.info("modify repair schedule state called with: id = {}, state = {}", @@ -290,7 +291,7 @@ private Response resumeSchedule(RepairSchedule repairSchedule, RepairUnit repair */ @GET @Path("/{id}") - public Response getRepairSchedule(@PathParam("id") Long repairScheduleId) { + public Response getRepairSchedule(@PathParam("id") UUID repairScheduleId) { LOG.debug("get repair_schedule called with: id = {}", repairScheduleId); Optional repairSchedule = context.storage.getRepairSchedule(repairScheduleId); if (repairSchedule.isPresent()) { @@ -399,7 +400,7 @@ private Collection getScheduleList(Optional clusterName, */ @DELETE @Path("/{id}") - public Response deleteRepairSchedule(@PathParam("id") Long repairScheduleId, + public Response deleteRepairSchedule(@PathParam("id") UUID repairScheduleId, @QueryParam("owner") Optional owner) { LOG.info("delete repair schedule called with repairScheduleId: {}, and owner: {}", repairScheduleId, owner); diff --git a/src/main/java/com/spotify/reaper/resources/view/RepairRunStatus.java b/src/main/java/com/spotify/reaper/resources/view/RepairRunStatus.java index 2171e25d0..dc1e1872b 100644 --- a/src/main/java/com/spotify/reaper/resources/view/RepairRunStatus.java +++ b/src/main/java/com/spotify/reaper/resources/view/RepairRunStatus.java @@ -26,6 +26,7 @@ import org.joda.time.format.ISODateTimeFormat; import java.util.Collection; +import java.util.UUID; /** * Contains the data to be shown when querying repair run status. @@ -39,7 +40,7 @@ public class RepairRunStatus { private String owner; @JsonProperty - private long id; + private UUID id; @JsonProperty("cluster_name") private String clusterName; @@ -95,7 +96,7 @@ public class RepairRunStatus { public RepairRunStatus() { } - public RepairRunStatus(long runId, String clusterName, String keyspaceName, + public RepairRunStatus(UUID runId, String clusterName, String keyspaceName, Collection columnFamilies, int segmentsRepaired, int totalSegments, RepairRun.RunState state, DateTime startTime, DateTime endTime, String cause, String owner, String lastEvent, DateTime creationTime, DateTime pauseTime, double intensity, boolean incrementalRepair, @@ -229,11 +230,11 @@ public void setOwner(String owner) { this.owner = owner; } - public long getId() { + public UUID getId() { return id; } - public void setId(long id) { + public void setId(UUID id) { this.id = id; } diff --git a/src/main/java/com/spotify/reaper/resources/view/RepairScheduleStatus.java b/src/main/java/com/spotify/reaper/resources/view/RepairScheduleStatus.java index 6fa4faff0..7055882ec 100644 --- a/src/main/java/com/spotify/reaper/resources/view/RepairScheduleStatus.java +++ b/src/main/java/com/spotify/reaper/resources/view/RepairScheduleStatus.java @@ -24,11 +24,12 @@ import org.joda.time.format.ISODateTimeFormat; import java.util.Collection; +import java.util.UUID; public class RepairScheduleStatus { @JsonProperty - private long id; + private UUID id; @JsonProperty private String owner; @@ -75,7 +76,7 @@ public class RepairScheduleStatus { public RepairScheduleStatus() { } - public RepairScheduleStatus(long id, String owner, String clusterName, String keyspaceName, + public RepairScheduleStatus(UUID id, String owner, String clusterName, String keyspaceName, Collection columnFamilies, RepairSchedule.State state, DateTime creationTime, DateTime nextActivation, DateTime pauseTime, double intensity, boolean incrementalRepair, int segmentCount, RepairParallelism repairParallelism, @@ -115,11 +116,11 @@ public RepairScheduleStatus(RepairSchedule repairSchedule, RepairUnit repairUnit ); } - public long getId() { + public UUID getId() { return id; } - public void setId(long id) { + public void setId(UUID id) { this.id = id; } diff --git a/src/main/java/com/spotify/reaper/service/RepairManager.java b/src/main/java/com/spotify/reaper/service/RepairManager.java index c32af3f3f..3fc327be5 100644 --- a/src/main/java/com/spotify/reaper/service/RepairManager.java +++ b/src/main/java/com/spotify/reaper/service/RepairManager.java @@ -21,6 +21,7 @@ import com.spotify.reaper.cassandra.JmxProxy; import com.spotify.reaper.core.RepairRun; import com.spotify.reaper.core.RepairSegment; +import java.util.UUID; public class RepairManager { @@ -36,7 +37,7 @@ public long getRepairTimeoutMillis() { // Caching all active RepairRunners. @VisibleForTesting - public Map repairRunners = Maps.newConcurrentMap(); + public Map repairRunners = Maps.newConcurrentMap(); public void initializeThreadPool(int threadAmount, long repairTimeout, TimeUnit repairTimeoutTimeUnit, long retryDelay, @@ -81,7 +82,7 @@ public void resumeRunningRepairRuns(AppContext context) throws ReaperException { public RepairRun startRepairRun(AppContext context, RepairRun runToBeStarted) throws ReaperException { assert null != executor : "you need to initialize the thread pool first"; - long runId = runToBeStarted.getId(); + UUID runId = runToBeStarted.getId(); LOG.info("Starting a run with id #{} with current state '{}'", runId, runToBeStarted.getRunState()); switch (runToBeStarted.getRunState()) { @@ -128,7 +129,7 @@ public RepairRun startRepairRun(AppContext context, RepairRun runToBeStarted) th } } - private void startRunner(AppContext context, long runId) { + private void startRunner(AppContext context, UUID runId) { if (!repairRunners.containsKey(runId)) { LOG.info("scheduling repair for repair run #{}", runId); try { diff --git a/src/main/java/com/spotify/reaper/service/RepairRunner.java b/src/main/java/com/spotify/reaper/service/RepairRunner.java index 5c3dd9f96..09478a71b 100644 --- a/src/main/java/com/spotify/reaper/service/RepairRunner.java +++ b/src/main/java/com/spotify/reaper/service/RepairRunner.java @@ -17,7 +17,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.concurrent.atomic.AtomicLongArray; +import java.util.concurrent.atomic.AtomicReferenceArray; import org.apache.cassandra.repair.RepairParallelism; import org.joda.time.DateTime; @@ -39,19 +39,20 @@ import com.spotify.reaper.core.RepairRun; import com.spotify.reaper.core.RepairSegment; import com.spotify.reaper.core.RepairUnit; +import java.util.UUID; public class RepairRunner implements Runnable { private static final Logger LOG = LoggerFactory.getLogger(RepairRunner.class); private final AppContext context; - private final long repairRunId; + private final UUID repairRunId; private final String clusterName; private JmxProxy jmxConnection; - private final AtomicLongArray currentlyRunningSegments; + private final AtomicReferenceArray currentlyRunningSegments; private final List parallelRanges; - public RepairRunner(AppContext context, long repairRunId) + public RepairRunner(AppContext context, UUID repairRunId) throws ReaperException { LOG.debug("Creating RepairRunner for run with ID {}", repairRunId); this.context = context; @@ -69,14 +70,14 @@ public RepairRunner(AppContext context, long repairRunId) JmxProxy jmx = this.context.jmxConnectionFactory.connectAny(cluster.get()); String keyspace = repairUnitOpt.get().getKeyspaceName(); - int parallelRepairs = getPossibleParallelRepairsCount(jmx.getRangeToEndpointMap(keyspace)); + int parallelRepairs = getPossibleParallelRepairsCount(jmx.getRangeToEndpointMap(keyspace), jmx.getEndpointToHostId()); if(repairUnitOpt.isPresent() && repairUnitOpt.get().getIncrementalRepair()) { // with incremental repair, can't have more parallel repairs than nodes parallelRepairs = 1; } - currentlyRunningSegments = new AtomicLongArray(parallelRepairs); + currentlyRunningSegments = new AtomicReferenceArray(parallelRepairs); for (int i = 0; i < parallelRepairs; i++) { - currentlyRunningSegments.set(i, -1); + currentlyRunningSegments.set(i, null); } parallelRanges = getParallelRanges( @@ -91,19 +92,20 @@ public RingRange apply(RepairSegment input) { }))); } - public long getRepairRunId() { + public UUID getRepairRunId() { return repairRunId; } @VisibleForTesting - public static int getPossibleParallelRepairsCount(Map, List> ranges) + public static int getPossibleParallelRepairsCount(Map, List> ranges, Map hostsInRing) throws ReaperException { if (ranges.isEmpty()) { String msg = "Repairing 0-sized cluster."; LOG.error(msg); throw new ReaperException(msg); } - return ranges.size() / ranges.values().iterator().next().size(); + + return Math.min(ranges.size() / ranges.values().iterator().next().size(), Math.max(1, hostsInRing.keySet().size()/ranges.values().iterator().next().size())); } @VisibleForTesting @@ -223,12 +225,12 @@ private void startNextSegment() throws ReaperException { for (int rangeIndex = 0; rangeIndex < currentlyRunningSegments.length(); rangeIndex++) { - if (currentlyRunningSegments.get(rangeIndex) != -1L) { + if (currentlyRunningSegments.get(rangeIndex) != null) { anythingRunningStill = true; // Just checking that no currently running segment runner is stuck. RepairSegment supposedlyRunningSegment = - context.storage.getRepairSegment(currentlyRunningSegments.get(rangeIndex)).get(); + context.storage.getRepairSegment(repairRunId, currentlyRunningSegments.get(rangeIndex)).get(); DateTime startTime = supposedlyRunningSegment.getStartTime(); if (startTime != null && startTime.isBefore(DateTime.now().minusDays(1))) { LOG.warn("Looks like segment #{} has been running more than a day. Start time: {}", @@ -253,8 +255,8 @@ private void startNextSegment() throws ReaperException { } else { LOG.info("Next segment to run : {}", nextRepairSegment.get().getId()); - long segmentId = nextRepairSegment.get().getId(); - boolean wasSet = currentlyRunningSegments.compareAndSet(rangeIndex, -1, segmentId); + UUID segmentId = nextRepairSegment.get().getId(); + boolean wasSet = currentlyRunningSegments.compareAndSet(rangeIndex, null, segmentId); if (!wasSet) { LOG.debug("Didn't set segment id `{}` to slot {} because it was busy", segmentId, rangeIndex); @@ -292,8 +294,8 @@ private void startNextSegment() throws ReaperException { * @param tokenRange token range of the segment to repair. * @return Boolean indicating whether rescheduling next run is needed. */ - private boolean repairSegment(final int rangeIndex, final long segmentId, RingRange tokenRange) { - final long unitId; + private boolean repairSegment(final int rangeIndex, final UUID segmentId, RingRange tokenRange) { + final UUID unitId; final double intensity; final RepairParallelism validationParallelism; { @@ -311,7 +313,7 @@ private boolean repairSegment(final int rangeIndex, final long segmentId, RingRa confirmJMXConnectionIsOpen(); } catch (ReaperException e) { LOG.warn("Failed to reestablish JMX connection in runner {}, retrying", repairRunId, e); - currentlyRunningSegments.set(rangeIndex, -1); + currentlyRunningSegments.set(rangeIndex, null); return true; } @@ -342,7 +344,8 @@ private boolean repairSegment(final int rangeIndex, final long segmentId, RingRa } } else { - potentialCoordinators = Arrays.asList(context.storage.getRepairSegment(segmentId).get().getCoordinatorHost()); + potentialCoordinators + = Arrays.asList(context.storage.getRepairSegment(repairRunId, segmentId).get().getCoordinatorHost()); } SegmentRunner segmentRunner = new SegmentRunner(context, segmentId, potentialCoordinators, @@ -353,13 +356,13 @@ private boolean repairSegment(final int rangeIndex, final long segmentId, RingRa Futures.addCallback(segmentResult, new FutureCallback() { @Override public void onSuccess(Object ignored) { - currentlyRunningSegments.set(rangeIndex, -1); + currentlyRunningSegments.set(rangeIndex, null); handleResult(segmentId); } @Override public void onFailure(Throwable t) { - currentlyRunningSegments.set(rangeIndex, -1); + currentlyRunningSegments.set(rangeIndex, null); LOG.error("Executing SegmentRunner failed: {}", t.getMessage()); } }); @@ -367,8 +370,8 @@ public void onFailure(Throwable t) { return true; } - private void handleResult(long segmentId) { - RepairSegment segment = context.storage.getRepairSegment(segmentId).get(); + private void handleResult(UUID segmentId) { + RepairSegment segment = context.storage.getRepairSegment(repairRunId, segmentId).get(); RepairSegment.State segmentState = segment.getState(); LOG.debug("In repair run #{}, triggerRepair on segment {} ended with state {}", repairRunId, segmentId, segmentState); diff --git a/src/main/java/com/spotify/reaper/service/SchedulingManager.java b/src/main/java/com/spotify/reaper/service/SchedulingManager.java index a2197d58d..001932468 100644 --- a/src/main/java/com/spotify/reaper/service/SchedulingManager.java +++ b/src/main/java/com/spotify/reaper/service/SchedulingManager.java @@ -18,6 +18,7 @@ import java.util.Collection; import java.util.Timer; import java.util.TimerTask; +import java.util.UUID; public class SchedulingManager extends TimerTask { @@ -73,7 +74,7 @@ private SchedulingManager(AppContext context) { @Override public void run() { LOG.debug("Checking for repair schedules..."); - long lastId = -1; + UUID lastId = null; try { Collection schedules = context.storage.getAllRepairSchedules(); boolean anyRunStarted = false; @@ -130,7 +131,7 @@ private boolean manageSchedule(RepairSchedule schedule) { if (startNewRun) { try { RepairRun startedRun = startNewRunForUnit(schedule, repairUnit); - ImmutableList newRunHistory = new ImmutableList.Builder() + ImmutableList newRunHistory = new ImmutableList.Builder() .addAll(schedule.getRunHistory()).add(startedRun.getId()).build(); context.storage.updateRepairSchedule(schedule.with() .runHistory(newRunHistory) diff --git a/src/main/java/com/spotify/reaper/service/SegmentRunner.java b/src/main/java/com/spotify/reaper/service/SegmentRunner.java index a4c2140dd..e642a6021 100644 --- a/src/main/java/com/spotify/reaper/service/SegmentRunner.java +++ b/src/main/java/com/spotify/reaper/service/SegmentRunner.java @@ -48,6 +48,7 @@ import com.spotify.reaper.core.RepairUnit; import com.spotify.reaper.utils.SimpleCondition; import com.sun.management.UnixOperatingSystemMXBean; +import java.util.UUID; public final class SegmentRunner implements RepairStatusHandler, Runnable { @@ -59,7 +60,7 @@ public final class SegmentRunner implements RepairStatusHandler, Runnable { Pattern.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"); private final AppContext context; - private final long segmentId; + private final UUID segmentId; private final Condition condition = new SimpleCondition(); private final Collection potentialCoordinators; private final long timeoutMillis; @@ -73,11 +74,13 @@ public final class SegmentRunner implements RepairStatusHandler, Runnable { // Caching all active SegmentRunners. @VisibleForTesting - public static Map segmentRunners = Maps.newConcurrentMap(); + public static Map segmentRunners = Maps.newConcurrentMap(); - public SegmentRunner(AppContext context, long segmentId, Collection potentialCoordinators, + public SegmentRunner(AppContext context, UUID segmentId, Collection potentialCoordinators, long timeoutMillis, double intensity, RepairParallelism validationParallelism, String clusterName, RepairUnit repairUnit, RepairRunner repairRunner) { + + assert !segmentRunners.containsKey(segmentId) : "SegmentRunner already exists for segment with ID: " + segmentId; this.context = context; this.segmentId = segmentId; this.potentialCoordinators = potentialCoordinators; @@ -92,7 +95,7 @@ public SegmentRunner(AppContext context, long segmentId, Collection pote @Override public void run() { - final RepairSegment segment = context.storage.getRepairSegment(segmentId).get(); + final RepairSegment segment = context.storage.getRepairSegment(repairRunner.getRepairRunId(), segmentId).get(); Thread.currentThread().setName(clusterName + ":" + segment.getRunId() + ":" + segmentId); runRepair(); @@ -128,7 +131,7 @@ public static void abort(AppContext context, RepairSegment segment, JmxProxy jmx */ public void postponeCurrentSegment() { synchronized (condition) { - RepairSegment segment = context.storage.getRepairSegment(segmentId).get(); + RepairSegment segment = context.storage.getRepairSegment(repairRunner.getRepairRunId(), segmentId).get(); postpone(context, segment, context.storage.getRepairUnit(segment.getRepairUnitId())); } } @@ -148,7 +151,7 @@ private long getOpenFilesAmount() { private void runRepair() { LOG.debug("Run repair for segment #{}", segmentId); - final RepairSegment segment = context.storage.getRepairSegment(segmentId).get(); + final RepairSegment segment = context.storage.getRepairSegment(repairRunner.getRepairRunId(), segmentId).get(); try (JmxProxy coordinator = context.jmxConnectionFactory .connectAny(Optional.fromNullable(this), potentialCoordinators)) { @@ -209,7 +212,7 @@ protected Set initialize() { .coordinatorHost(coordinator.getHost()) .repairCommandId(commandId) .build(segmentId)); - String eventMsg = String.format("Triggered repair of segment %d via host %s", + String eventMsg = String.format("Triggered repair of segment %s via host %s", segment.getId(), coordinator.getHost()); repairRunner.updateLastEvent(eventMsg); LOG.info("Repair for segment {} started, status wait will timeout in {} millis", segmentId, @@ -219,7 +222,7 @@ protected Set initialize() { } catch (InterruptedException e) { LOG.warn("Repair command {} on segment {} interrupted", commandId, segmentId, e); } finally { - RepairSegment resultingSegment = context.storage.getRepairSegment(segmentId).get(); + RepairSegment resultingSegment = context.storage.getRepairSegment(repairRunner.getRepairRunId(), segmentId).get(); LOG.info("Repair command {} on segment {} returned with state {}", commandId, segmentId, resultingSegment.getState()); if (resultingSegment.getState() == RepairSegment.State.RUNNING) { @@ -354,7 +357,7 @@ private boolean IsRepairRunningOnOneNode(RepairSegment segment) { } - private boolean repairHasSegmentRunning(long repairRunId) { + private boolean repairHasSegmentRunning(UUID repairRunId) { Collection segments = context.storage.getRepairSegmentsForRun(repairRunId); for(RepairSegment segment:segments) { if(segment.getState() == RepairSegment.State.RUNNING) { @@ -381,7 +384,7 @@ private void abort(RepairSegment segment, JmxProxy jmxConnection) { */ @Override public void handle(int repairNumber, Optional status, Optional progress, String message) { - final RepairSegment segment = context.storage.getRepairSegment(segmentId).get(); + final RepairSegment segment = context.storage.getRepairSegment(repairRunner.getRepairRunId(), segmentId).get(); Thread.currentThread().setName(clusterName + ":" + segment.getRunId() + ":" + segmentId); LOG.debug( "handle called for repairCommandId {}, outcome {} / {} and message: {}", @@ -395,7 +398,7 @@ public void handle(int repairNumber, Optional status boolean failOutsideSynchronizedBlock = false; // DO NOT ADD EXTERNAL CALLS INSIDE THIS SYNCHRONIZED BLOCK (JMX PROXY ETC) synchronized (condition) { - RepairSegment currentSegment = context.storage.getRepairSegment(segmentId).get(); + RepairSegment currentSegment = context.storage.getRepairSegment(repairRunner.getRepairRunId(), segmentId).get(); // See status explanations at: https://wiki.apache.org/cassandra/RepairAsyncAPI // Old repair API if(status.isPresent()) { @@ -523,7 +526,7 @@ public static String parseRepairId(String message) { * @return the delay in milliseconds. */ long intensityBasedDelayMillis(double intensity) { - RepairSegment repairSegment = context.storage.getRepairSegment(segmentId).get(); + RepairSegment repairSegment = context.storage.getRepairSegment(repairRunner.getRepairRunId(), segmentId).get(); if (repairSegment.getEndTime() == null && repairSegment.getStartTime() == null) { return 0; } else if (repairSegment.getEndTime() != null && repairSegment.getStartTime() != null) { diff --git a/src/main/java/com/spotify/reaper/storage/CassandraStorage.java b/src/main/java/com/spotify/reaper/storage/CassandraStorage.java index fc1a952d3..031ff9026 100644 --- a/src/main/java/com/spotify/reaper/storage/CassandraStorage.java +++ b/src/main/java/com/spotify/reaper/storage/CassandraStorage.java @@ -1,7 +1,6 @@ package com.spotify.reaper.storage; import java.math.BigInteger; -import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; @@ -9,8 +8,6 @@ import java.util.List; import java.util.Set; import java.util.UUID; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; import java.util.stream.Collectors; import org.slf4j.Logger; @@ -24,8 +21,8 @@ import com.datastax.driver.core.ResultSetFuture; import com.datastax.driver.core.Row; import com.datastax.driver.core.Session; +import com.datastax.driver.core.utils.UUIDs; import com.google.common.base.Optional; -import com.google.common.base.Preconditions; import com.google.common.collect.ComparisonChain; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; @@ -44,6 +41,7 @@ import com.spotify.reaper.service.RepairParameters; import com.spotify.reaper.service.RingRange; import com.spotify.reaper.storage.cassandra.DateTimeCodec; +import com.spotify.reaper.storage.cassandra.Migration003; import org.apache.cassandra.repair.RepairParallelism; import org.cognitor.cassandra.migration.Database; @@ -53,15 +51,11 @@ import io.dropwizard.setup.Environment; -public class CassandraStorage implements IStorage { +public final class CassandraStorage implements IStorage { private static final Logger LOG = LoggerFactory.getLogger(CassandraStorage.class); com.datastax.driver.core.Cluster cassandra = null; Session session; - /** simple cache of repair_id. - * not accurate, only provides a floor value to shortcut looking for next appropriate id */ - private final ConcurrentMap repairIds = new ConcurrentHashMap<>(); - /* Simple statements */ private final String getClustersStmt = "SELECT * FROM cluster"; @@ -82,19 +76,13 @@ public class CassandraStorage implements IStorage { private PreparedStatement getRepairUnitPrepStmt; private PreparedStatement insertRepairSegmentPrepStmt; private PreparedStatement getRepairSegmentPrepStmt; - private PreparedStatement insertRepairSegmentByRunPrepStmt; - private PreparedStatement getRepairSegmentByRunIdPrepStmt; + private PreparedStatement getRepairSegmentsByRunIdPrepStmt; private PreparedStatement insertRepairSchedulePrepStmt; private PreparedStatement getRepairSchedulePrepStmt; private PreparedStatement getRepairScheduleByClusterAndKsPrepStmt; private PreparedStatement insertRepairScheduleByClusterAndKsPrepStmt; private PreparedStatement deleteRepairSchedulePrepStmt; private PreparedStatement deleteRepairScheduleByClusterAndKsPrepStmt; - private PreparedStatement deleteRepairSegmentPrepStmt; - private PreparedStatement deleteRepairSegmentByRunId; - private PreparedStatement insertRepairId; - private PreparedStatement selectRepairId; - private PreparedStatement updateRepairId; public CassandraStorage(ReaperApplicationConfiguration config, Environment environment) { cassandra = config.getCassandraFactory().build(environment).register(QueryLogger.builder().build()); @@ -106,7 +94,7 @@ public CassandraStorage(ReaperApplicationConfiguration config, Environment envir Database database = new Database(cassandra, config.getCassandraFactory().getKeyspace()); MigrationTask migration = new MigrationTask(database, new MigrationRepository("db/cassandra")); migration.migrate(); - + Migration003.migrate(session); prepareStatements(); } @@ -117,29 +105,23 @@ private void prepareStatements(){ insertRepairRunPrepStmt = session.prepare("INSERT INTO repair_run(id, cluster_name, repair_unit_id, cause, owner, state, creation_time, start_time, end_time, pause_time, intensity, last_event, segment_count, repair_parallelism) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); insertRepairRunClusterIndexPrepStmt = session.prepare("INSERT INTO repair_run_by_cluster(cluster_name, id) values(?, ?)"); insertRepairRunUnitIndexPrepStmt = session.prepare("INSERT INTO repair_run_by_unit(repair_unit_id, id) values(?, ?)"); - getRepairRunPrepStmt = session.prepare("SELECT * FROM repair_run WHERE id = ?"); + getRepairRunPrepStmt = session.prepare("SELECT id,cluster_name,repair_unit_id,cause,owner,state,creation_time,start_time,end_time,pause_time,intensity,last_event,segment_count,repair_parallelism FROM repair_run WHERE id = ? LIMIT 1"); getRepairRunForClusterPrepStmt = session.prepare("SELECT * FROM repair_run_by_cluster WHERE cluster_name = ?"); getRepairRunForUnitPrepStmt = session.prepare("SELECT * FROM repair_run_by_unit WHERE repair_unit_id = ?"); deleteRepairRunPrepStmt = session.prepare("DELETE FROM repair_run WHERE id = ?"); deleteRepairRunByClusterPrepStmt = session.prepare("DELETE FROM repair_run_by_cluster WHERE id = ? and cluster_name = ?"); deleteRepairRunByUnitPrepStmt = session.prepare("DELETE FROM repair_run_by_unit WHERE id = ? and repair_unit_id= ?"); - deleteRepairSegmentPrepStmt = session.prepare("DELETE FROM repair_segment WHERE id = ?"); - deleteRepairSegmentByRunId = session.prepare("DELETE FROM repair_segment_by_run_id WHERE run_id = ?"); - insertRepairUnitPrepStmt = session.prepare("INSERT INTO repair_unit(id, cluster_name, keyspace_name, column_families, incremental_repair) VALUES(?, ?, ?, ?, ?)"); - getRepairUnitPrepStmt = session.prepare("SELECT * FROM repair_unit WHERE id = ?"); - insertRepairSegmentPrepStmt = session.prepare("INSERT INTO repair_segment(id, repair_unit_id, run_id, start_token, end_token, state, coordinator_host, start_time, end_time, fail_count) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); - getRepairSegmentPrepStmt = session.prepare("SELECT * FROM repair_segment WHERE id = ?"); - insertRepairSegmentByRunPrepStmt = session.prepare("INSERT INTO repair_segment_by_run_id(run_id, segment_id) VALUES(?, ?)"); - getRepairSegmentByRunIdPrepStmt = session.prepare("SELECT * FROM repair_segment_by_run_id WHERE run_id = ?"); - insertRepairSchedulePrepStmt = session.prepare("INSERT INTO repair_schedule(id, repair_unit_id, state, days_between, next_activation, run_history, segment_count, repair_parallelism, intensity, creation_time, owner, pause_time) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); - getRepairSchedulePrepStmt = session.prepare("SELECT * FROM repair_schedule WHERE id = ?"); - insertRepairScheduleByClusterAndKsPrepStmt = session.prepare("INSERT INTO repair_schedule_by_cluster_and_keyspace(cluster_name, keyspace_name, repair_schedule_id) VALUES(?, ?, ?)"); + insertRepairUnitPrepStmt = session.prepare("INSERT INTO repair_unit_v1(id, cluster_name, keyspace_name, column_families, incremental_repair) VALUES(?, ?, ?, ?, ?)"); + getRepairUnitPrepStmt = session.prepare("SELECT * FROM repair_unit_v1 WHERE id = ?"); + insertRepairSegmentPrepStmt = session.prepare("INSERT INTO repair_run(id, segment_id, repair_unit_id, start_token, end_token, segment_state, coordinator_host, segment_start_time, segment_end_time, fail_count) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); + getRepairSegmentPrepStmt = session.prepare("SELECT id,repair_unit_id,segment_id,start_token,end_token,segment_state,coordinator_host,segment_start_time,segment_end_time,fail_count FROM repair_run WHERE id = ? and segment_id = ?"); + getRepairSegmentsByRunIdPrepStmt = session.prepare("SELECT id,repair_unit_id,segment_id,start_token,end_token,segment_state,coordinator_host,segment_start_time,segment_end_time,fail_count FROM repair_run WHERE id = ?"); + insertRepairSchedulePrepStmt = session.prepare("INSERT INTO repair_schedule_v1(id, repair_unit_id, state, days_between, next_activation, run_history, segment_count, repair_parallelism, intensity, creation_time, owner, pause_time) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); + getRepairSchedulePrepStmt = session.prepare("SELECT * FROM repair_schedule_v1 WHERE id = ?"); + insertRepairScheduleByClusterAndKsPrepStmt = session.prepare("INSERT INTO repair_schedule_by_cluster_and_keyspace(cluster_name, keyspace_name, repair_schedule_id) VALUES(?, ?, ?)"); getRepairScheduleByClusterAndKsPrepStmt = session.prepare("SELECT repair_schedule_id FROM repair_schedule_by_cluster_and_keyspace WHERE cluster_name = ? and keyspace_name = ?"); - deleteRepairSchedulePrepStmt = session.prepare("DELETE FROM repair_schedule WHERE id = ?"); + deleteRepairSchedulePrepStmt = session.prepare("DELETE FROM repair_schedule_v1 WHERE id = ?"); deleteRepairScheduleByClusterAndKsPrepStmt = session.prepare("DELETE FROM repair_schedule_by_cluster_and_keyspace WHERE cluster_name = ? and keyspace_name = ? and repair_schedule_id = ?"); - insertRepairId = session.prepare("INSERT INTO repair_id (id_type, id) VALUES(?, 0) IF NOT EXISTS"); - selectRepairId = session.prepare("SELECT id FROM repair_id WHERE id_type = ?"); - updateRepairId = session.prepare("UPDATE repair_id SET id=? WHERE id_type =? IF id = ?"); } @Override @@ -191,27 +173,52 @@ public Optional deleteCluster(String clusterName) { } @Override - public RepairRun addRepairRun(Builder repairRun) { - RepairRun newRepairRun = repairRun.build(getNewRepairId("repair_run")); + public RepairRun addRepairRun(Builder repairRun, Collection newSegments) { + RepairRun newRepairRun = repairRun.build(UUIDs.timeBased()); BatchStatement batch = new BatchStatement(); - batch.add(insertRepairRunPrepStmt.bind(newRepairRun.getId(), - newRepairRun.getClusterName(), - newRepairRun.getRepairUnitId(), - newRepairRun.getCause(), - newRepairRun.getOwner(), - newRepairRun.getRunState().toString(), - newRepairRun.getCreationTime()==null?null:newRepairRun.getCreationTime(), - newRepairRun.getStartTime()==null?null:newRepairRun.getStartTime(), - newRepairRun.getEndTime()==null?null:newRepairRun.getEndTime(), - newRepairRun.getPauseTime()==null?null:newRepairRun.getPauseTime(), - newRepairRun.getIntensity(), - newRepairRun.getLastEvent(), - newRepairRun.getSegmentCount(), - newRepairRun.getRepairParallelism().toString()) - ); + BatchStatement repairRunBatch = new BatchStatement(BatchStatement.Type.UNLOGGED); + + repairRunBatch.add(insertRepairRunPrepStmt.bind( + newRepairRun.getId(), + newRepairRun.getClusterName(), + newRepairRun.getRepairUnitId(), + newRepairRun.getCause(), + newRepairRun.getOwner(), + newRepairRun.getRunState().toString(), + newRepairRun.getCreationTime()==null?null:newRepairRun.getCreationTime(), + newRepairRun.getStartTime()==null?null:newRepairRun.getStartTime(), + newRepairRun.getEndTime()==null?null:newRepairRun.getEndTime(), + newRepairRun.getPauseTime()==null?null:newRepairRun.getPauseTime(), + newRepairRun.getIntensity(), + newRepairRun.getLastEvent(), + newRepairRun.getSegmentCount(), + newRepairRun.getRepairParallelism().toString())); + batch.add(insertRepairRunClusterIndexPrepStmt.bind(newRepairRun.getClusterName(), newRepairRun.getId())); batch.add(insertRepairRunUnitIndexPrepStmt.bind(newRepairRun.getRepairUnitId(), newRepairRun.getId())); session.execute(batch); + + for(RepairSegment.Builder builder:newSegments){ + RepairSegment segment = builder.withRunId(newRepairRun.getId()).build(UUIDs.timeBased()); + + repairRunBatch.add(insertRepairSegmentPrepStmt.bind( + segment.getRunId(), + segment.getId(), + segment.getRepairUnitId(), + segment.getStartToken(), + segment.getEndToken(), + segment.getState().ordinal(), + segment.getCoordinatorHost(), + segment.getStartTime(), + segment.getEndTime(), + segment.getFailCount())); + + if(100 == repairRunBatch.size()){ + session.execute(repairRunBatch); + repairRunBatch = new BatchStatement(BatchStatement.Type.UNLOGGED); + } + } + session.execute(repairRunBatch); return newRepairRun; } @@ -222,7 +229,7 @@ public boolean updateRepairRun(RepairRun repairRun) { } @Override - public Optional getRepairRun(long id) { + public Optional getRepairRun(UUID id) { RepairRun repairRun = null; Row repairRunResult = session.execute(getRepairRunPrepStmt.bind(id)).one(); if(repairRunResult != null){ @@ -237,9 +244,9 @@ public Collection getRepairRunsForCluster(String clusterName) { List repairRunFutures = Lists.newArrayList(); // Grab all ids for the given cluster name - Collection repairRunIds = getRepairRunIdsForCluster(clusterName); + Collection repairRunIds = getRepairRunIdsForCluster(clusterName); // Grab repair runs asynchronously for all the ids returned by the index table - for(Long repairRunId:repairRunIds){ + for(UUID repairRunId:repairRunIds){ repairRunFutures.add(session.executeAsync(getRepairRunPrepStmt.bind(repairRunId))); } @@ -247,7 +254,7 @@ public Collection getRepairRunsForCluster(String clusterName) { } @Override - public Collection getRepairRunsForUnit(long repairUnitId) { + public Collection getRepairRunsForUnit(UUID repairUnitId) { Collection repairRuns = Lists.newArrayList(); List repairRunFutures = Lists.newArrayList(); @@ -256,7 +263,7 @@ public Collection getRepairRunsForUnit(long repairUnitId) { // Grab repair runs asynchronously for all the ids returned by the index table for(Row repairRunId:repairRunIds){ - repairRunFutures.add(session.executeAsync(getRepairRunPrepStmt.bind(repairRunId.getLong("id")))); + repairRunFutures.add(session.executeAsync(getRepairRunPrepStmt.bind(repairRunId.getUUID("id")))); } repairRuns = getRepairRunsAsync(repairRunFutures); @@ -268,7 +275,7 @@ public Collection getRepairRunsForUnit(long repairUnitId) { /** * Create a collection of RepairRun objects out of a list of ResultSetFuture. * Used to handle async queries on the repair_run table with a list of ids. - * + * * @param repairRunFutures * @return */ @@ -278,7 +285,7 @@ private Collection getRepairRunsAsync(List repairRun for(ResultSetFuture repairRunFuture:repairRunFutures){ Row repairRunResult = repairRunFuture.getUninterruptibly().one(); if(repairRunResult != null){ - RepairRun repairRun = buildRepairRunFromRow(repairRunResult, repairRunResult.getLong("id")); + RepairRun repairRun = buildRepairRunFromRow(repairRunResult, repairRunResult.getUUID("id")); repairRuns.add(repairRun); } } @@ -288,21 +295,25 @@ private Collection getRepairRunsAsync(List repairRun @Override public Collection getRepairRunsWithState(RunState runState) { - // There shouldn't be many repair runs, so we'll brute force this one - // We'll switch to 2i if performance sucks IRL - Collection repairRuns = Lists.newArrayList(); - ResultSet repairRunResults = session.execute("SELECT * FROM repair_run"); - for(Row repairRun:repairRunResults){ - if(RunState.valueOf(repairRun.getString("state")).equals(runState)){ - repairRuns.add(buildRepairRunFromRow(repairRun, repairRun.getLong("id"))); - } - } - return repairRuns; + return getClusters().stream() + // Grab all ids for the given cluster name + .map(cluster -> getRepairRunIdsForCluster(cluster.getName())) + // Grab repair runs asynchronously for all the ids returned by the index table + .flatMap(repairRunIds + -> repairRunIds.stream() + .map(repairRunId -> session.executeAsync(getRepairRunPrepStmt.bind(repairRunId)))) + // wait for results + .map((ResultSetFuture future) -> { + Row repairRunResult = future.getUninterruptibly().one(); + return buildRepairRunFromRow(repairRunResult, repairRunResult.getUUID("id"));}) + // filter on runState + .filter(repairRun -> repairRun.getRunState() == runState) + .collect(Collectors.toSet()); } @Override - public Optional deleteRepairRun(long id) { + public Optional deleteRepairRun(UUID id) { Optional repairRun = getRepairRun(id); if(repairRun.isPresent()){ BatchStatement batch = new BatchStatement(); @@ -311,33 +322,18 @@ public Optional deleteRepairRun(long id) { batch.add(deleteRepairRunByUnitPrepStmt.bind(id, repairRun.get().getRepairUnitId())); session.execute(batch); } - - // Delete all segments for the run we've deleted - List futures= Lists.newArrayList(); - Collection segments = getRepairSegmentsForRun(id); - int i=0; - final int nbSegments = segments.size(); - futures.add(session.executeAsync(deleteRepairSegmentByRunId.bind(id))); - for(RepairSegment segment:segments){ - futures.add(session.executeAsync(deleteRepairSegmentPrepStmt.bind(segment.getId()))); - i++; - if(i%100==0 || i==nbSegments-1){ - futures.stream().forEach(f -> f.getUninterruptibly()); - } - } - return repairRun; } @Override - public RepairUnit addRepairUnit(com.spotify.reaper.core.RepairUnit.Builder newRepairUnit) { - RepairUnit repairUnit = newRepairUnit.build(getNewRepairId("repair_unit")); + public RepairUnit addRepairUnit(RepairUnit.Builder newRepairUnit) { + RepairUnit repairUnit = newRepairUnit.build(UUIDs.timeBased()); session.execute(insertRepairUnitPrepStmt.bind(repairUnit.getId(), repairUnit.getClusterName(), repairUnit.getKeyspaceName(), repairUnit.getColumnFamilies(), repairUnit.getIncrementalRepair())); return repairUnit; } @Override - public Optional getRepairUnit(long id) { + public Optional getRepairUnit(UUID id) { RepairUnit repairUnit = null; Row repairUnitRow = session.execute(getRepairUnitPrepStmt.bind(id)).one(); if(repairUnitRow!=null){ @@ -350,12 +346,12 @@ public Optional getRepairUnit(long id) { public Optional getRepairUnit(String cluster, String keyspace, Set columnFamilyNames) { // brute force again RepairUnit repairUnit=null; - ResultSet results = session.execute("SELECT * FROM repair_unit"); + ResultSet results = session.execute("SELECT * FROM repair_unit_v1"); for(Row repairUnitRow:results){ if(repairUnitRow.getString("cluster_name").equals(cluster) && repairUnitRow.getString("keyspace_name").equals(keyspace) && repairUnitRow.getSet("column_families", String.class).equals(columnFamilyNames)){ - repairUnit = new RepairUnit.Builder(repairUnitRow.getString("cluster_name"), repairUnitRow.getString("keyspace_name"), repairUnitRow.getSet("column_families", String.class), repairUnitRow.getBool("incremental_repair")).build(repairUnitRow.getLong("id")); + repairUnit = new RepairUnit.Builder(repairUnitRow.getString("cluster_name"), repairUnitRow.getString("keyspace_name"), repairUnitRow.getSet("column_families", String.class), repairUnitRow.getBool("incremental_repair")).build(repairUnitRow.getUUID("id")); // exit the loop once we find a match break; } @@ -364,49 +360,31 @@ public Optional getRepairUnit(String cluster, String keyspace, Set newSegments, long runId) { - List insertFutures = Lists.newArrayList(); - BatchStatement batch = new BatchStatement(); - for(com.spotify.reaper.core.RepairSegment.Builder builder:newSegments){ - RepairSegment segment = builder.build(getNewRepairId("repair_segment")); - insertFutures.add(session.executeAsync(insertRepairSegmentPrepStmt.bind(segment.getId(), segment.getRepairUnitId(), segment.getRunId(), segment.getStartToken(), segment.getEndToken(), segment.getState().ordinal(), segment.getCoordinatorHost(), segment.getStartTime(), segment.getEndTime(), segment.getFailCount()))); - batch.add(insertRepairSegmentByRunPrepStmt.bind(segment.getRunId(), segment.getId())); - if(insertFutures.size()%100==0){ - // cluster ddos protection - session.execute(batch); - batch.clear(); - for(ResultSetFuture insertFuture:insertFutures){ - insertFuture.getUninterruptibly(); - } - insertFutures = Lists.newArrayList(); - } - } - - // Wait for last queries to ack - if(batch.size()>0) { - session.execute(batch); - } - - for(ResultSetFuture insertFuture:insertFutures){ - insertFuture.getUninterruptibly(); - } - } - @Override public boolean updateRepairSegment(RepairSegment newRepairSegment) { Date startTime = null; if (newRepairSegment.getStartTime() != null) { startTime = newRepairSegment.getStartTime().toDate(); } - session.executeAsync(insertRepairSegmentPrepStmt.bind(newRepairSegment.getId(), newRepairSegment.getRepairUnitId(), newRepairSegment.getRunId(), newRepairSegment.getStartToken(), newRepairSegment.getEndToken(), newRepairSegment.getState().ordinal(), newRepairSegment.getCoordinatorHost(), startTime, newRepairSegment.getEndTime().toDate(), newRepairSegment.getFailCount())); + session.executeAsync(insertRepairSegmentPrepStmt.bind( + newRepairSegment.getRunId(), + newRepairSegment.getId(), + newRepairSegment.getRepairUnitId(), + newRepairSegment.getStartToken(), + newRepairSegment.getEndToken(), + newRepairSegment.getState().ordinal(), + newRepairSegment.getCoordinatorHost(), + startTime, + newRepairSegment.getEndTime().toDate(), + newRepairSegment.getFailCount())); + return true; } @Override - public Optional getRepairSegment(long id) { + public Optional getRepairSegment(UUID runId, UUID segmentId) { RepairSegment segment = null; - Row segmentRow = session.execute(getRepairSegmentPrepStmt.bind(id)).one(); + Row segmentRow = session.execute(getRepairSegmentPrepStmt.bind(runId, segmentId)).one(); if(segmentRow != null){ segment = createRepairSegmentFromRow(segmentRow); } @@ -415,55 +393,39 @@ public Optional getRepairSegment(long id) { } @Override - public Collection getRepairSegmentsForRun(long runId) { - List segmentsFuture = Lists.newArrayList(); + public Collection getRepairSegmentsForRun(UUID runId) { Collection segments = Lists.newArrayList(); - // First gather segments ids - ResultSet segmentsIdResultSet = session.execute(getRepairSegmentByRunIdPrepStmt.bind(runId)); - int i=0; - for(Row segmentIdResult:segmentsIdResultSet) { - // Then get segments by id - segmentsFuture.add(session.executeAsync(getRepairSegmentPrepStmt.bind(segmentIdResult.getLong("segment_id")))); - i++; - if(i%100==0 || segmentsIdResultSet.isFullyFetched()) { - segments.addAll(fetchRepairSegmentFromFutures(segmentsFuture)); - segmentsFuture = Lists.newArrayList(); - } + ResultSet segmentsIdResultSet = session.execute(getRepairSegmentsByRunIdPrepStmt.bind(runId)); + for(Row segmentRow : segmentsIdResultSet) { + segments.add(createRepairSegmentFromRow(segmentRow)); } return segments; } - - private Collection fetchRepairSegmentFromFutures(List segmentsFuture){ - Collection segments = Lists.newArrayList(); - - for(ResultSetFuture segmentResult:segmentsFuture) { - Row segmentRow = segmentResult.getUninterruptibly().one(); - if(segmentRow!=null){ - segments.add(createRepairSegmentFromRow(segmentRow)); - } - } - - return segments; - - } - private RepairSegment createRepairSegmentFromRow(Row segmentRow){ - return createRepairSegmentFromRow(segmentRow, segmentRow.getLong("id")); + private boolean segmentIsWithinRange(RepairSegment segment, RingRange range) { + return range.encloses(new RingRange(segment.getStartToken(), segment.getEndToken())); + } - private RepairSegment createRepairSegmentFromRow(Row segmentRow, long segmentId){ - return new RepairSegment.Builder(segmentRow.getLong("run_id"), new RingRange(new BigInteger(segmentRow.getVarint("start_token") +""), new BigInteger(segmentRow.getVarint("end_token")+"")), segmentRow.getLong("repair_unit_id")) + + private static RepairSegment createRepairSegmentFromRow(Row segmentRow){ + return new RepairSegment.Builder( + new RingRange( + new BigInteger(segmentRow.getVarint("start_token") +""), + new BigInteger(segmentRow.getVarint("end_token")+"")), + segmentRow.getUUID("repair_unit_id")) + .withRunId(segmentRow.getUUID("id")) .coordinatorHost(segmentRow.getString("coordinator_host")) - .endTime(new DateTime(segmentRow.getTimestamp("end_time"))) + .endTime(new DateTime(segmentRow.getTimestamp("segment_end_time"))) .failCount(segmentRow.getInt("fail_count")) - .startTime(new DateTime(segmentRow.getTimestamp("start_time"))) - .state(State.values()[segmentRow.getInt("state")]) - .build(segmentRow.getLong("id")); + .startTime(new DateTime(segmentRow.getTimestamp("segment_start_time"))) + .state(State.values()[segmentRow.getInt("segment_state")]) + .build(segmentRow.getUUID("segment_id")); } - public Optional getSegment(long runId, Optional range){ + public Optional getSegment(UUID runId, Optional range){ RepairSegment segment = null; List segments = Lists.newArrayList(); segments.addAll(getRepairSegmentsForRun(runId)); @@ -480,8 +442,8 @@ public int compare(RepairSegment seg1, RepairSegment seg2) { for(RepairSegment seg:segments){ if(seg.getState().equals(State.NOT_STARTED) // State condition - && ((range.isPresent() && - (range.get().getStart().compareTo(seg.getStartToken())>=0 || range.get().getEnd().compareTo(seg.getEndToken())<=0) + && ((range.isPresent() && + (segmentIsWithinRange(seg, range.get())) ) || !range.isPresent()) // Token range condition ){ segment = seg; @@ -491,18 +453,19 @@ public int compare(RepairSegment seg1, RepairSegment seg2) { return Optional.fromNullable(segment); } + @Override - public Optional getNextFreeSegment(long runId) { + public Optional getNextFreeSegment(UUID runId) { return getSegment(runId, Optional.absent()); } @Override - public Optional getNextFreeSegmentInRange(long runId, RingRange range) { + public Optional getNextFreeSegmentInRange(UUID runId, RingRange range) { return getSegment(runId, Optional.fromNullable(range)); } @Override - public Collection getSegmentsWithState(long runId, State segmentState) { + public Collection getSegmentsWithState(UUID runId, State segmentState) { Collection foundSegments = Lists.newArrayList(); List segments = Lists.newArrayList(); @@ -537,29 +500,29 @@ public Collection getOngoingRepairsInCluster(String clusterNam } @Override - public Collection getRepairRunIdsForCluster(String clusterName) { - Collection repairRunIds = Lists.newArrayList(); + public Collection getRepairRunIdsForCluster(String clusterName) { + Collection repairRunIds = Lists.newArrayList(); ResultSet results = session.execute(getRepairRunForClusterPrepStmt.bind(clusterName)); for(Row result:results){ - repairRunIds.add(result.getLong("id")); + repairRunIds.add(result.getUUID("id")); } return repairRunIds; } @Override - public int getSegmentAmountForRepairRun(long runId) { + public int getSegmentAmountForRepairRun(UUID runId) { return getRepairSegmentsForRun(runId).size(); } @Override - public int getSegmentAmountForRepairRunWithState(long runId, State state) { + public int getSegmentAmountForRepairRunWithState(UUID runId, State state) { return getSegmentsWithState(runId, state).size(); } @Override public RepairSchedule addRepairSchedule(com.spotify.reaper.core.RepairSchedule.Builder repairSchedule) { - RepairSchedule schedule = repairSchedule.build(getNewRepairId("repairSchedule")); + RepairSchedule schedule = repairSchedule.build(UUIDs.timeBased()); updateRepairSchedule(schedule); return schedule; @@ -568,7 +531,7 @@ public RepairSchedule addRepairSchedule(com.spotify.reaper.core.RepairSchedule.B @Override - public Optional getRepairSchedule(long repairScheduleId) { + public Optional getRepairSchedule(UUID repairScheduleId) { RepairSchedule schedule = null; Row sched = session.execute(getRepairSchedulePrepStmt.bind(repairScheduleId)).one(); if(sched!=null){ @@ -578,17 +541,17 @@ public Optional getRepairSchedule(long repairScheduleId) { } private RepairSchedule createRepairScheduleFromRow(Row repairScheduleRow){ - return new RepairSchedule.Builder(repairScheduleRow.getLong("repair_unit_id"), - RepairSchedule.State.valueOf(repairScheduleRow.getString("state")), - repairScheduleRow.getInt("days_between"), - new DateTime(repairScheduleRow.getTimestamp("next_activation")), - ImmutableList.copyOf(repairScheduleRow.getSet("run_history", Long.class)), - repairScheduleRow.getInt("segment_count"), - RepairParallelism.fromName(repairScheduleRow.getString("repair_parallelism")), - repairScheduleRow.getDouble("intensity"), + return new RepairSchedule.Builder(repairScheduleRow.getUUID("repair_unit_id"), + RepairSchedule.State.valueOf(repairScheduleRow.getString("state")), + repairScheduleRow.getInt("days_between"), + new DateTime(repairScheduleRow.getTimestamp("next_activation")), + ImmutableList.copyOf(repairScheduleRow.getSet("run_history", UUID.class)), + repairScheduleRow.getInt("segment_count"), + RepairParallelism.fromName(repairScheduleRow.getString("repair_parallelism")), + repairScheduleRow.getDouble("intensity"), new DateTime(repairScheduleRow.getTimestamp("creation_time"))) .owner(repairScheduleRow.getString("owner")) - .pauseTime(new DateTime(repairScheduleRow.getTimestamp("pause_time"))).build(repairScheduleRow.getLong("id")); + .pauseTime(new DateTime(repairScheduleRow.getTimestamp("pause_time"))).build(repairScheduleRow.getUUID("id")); } @@ -598,7 +561,7 @@ public Collection getRepairSchedulesForCluster(String clusterNam Collection schedules = Lists.newArrayList(); ResultSet scheduleIds = session.execute(getRepairScheduleByClusterAndKsPrepStmt.bind(clusterName, " ")); for(Row scheduleId:scheduleIds){ - Optional schedule = getRepairSchedule(scheduleId.getLong("repair_schedule_id")); + Optional schedule = getRepairSchedule(scheduleId.getUUID("repair_schedule_id")); if(schedule.isPresent()){ schedules.add(schedule.get()); } @@ -612,7 +575,7 @@ public Collection getRepairSchedulesForKeyspace(String keyspaceN Collection schedules = Lists.newArrayList(); ResultSet scheduleIds = session.execute(getRepairScheduleByClusterAndKsPrepStmt.bind(" ", keyspaceName)); for(Row scheduleId:scheduleIds){ - Optional schedule = getRepairSchedule(scheduleId.getLong("repair_schedule_id")); + Optional schedule = getRepairSchedule(scheduleId.getUUID("repair_schedule_id")); if(schedule.isPresent()){ schedules.add(schedule.get()); } @@ -626,7 +589,7 @@ public Collection getRepairSchedulesForClusterAndKeyspace(String Collection schedules = Lists.newArrayList(); ResultSet scheduleIds = session.execute(getRepairScheduleByClusterAndKsPrepStmt.bind(clusterName, keyspaceName)); for(Row scheduleId:scheduleIds){ - Optional schedule = getRepairSchedule(scheduleId.getLong("repair_schedule_id")); + Optional schedule = getRepairSchedule(scheduleId.getUUID("repair_schedule_id")); if(schedule.isPresent()){ schedules.add(schedule.get()); } @@ -638,7 +601,7 @@ public Collection getRepairSchedulesForClusterAndKeyspace(String @Override public Collection getAllRepairSchedules() { Collection schedules = Lists.newArrayList(); - ResultSet scheduleResults = session.execute("SELECT * FROM repair_schedule"); + ResultSet scheduleResults = session.execute("SELECT * FROM repair_schedule_v1"); for(Row scheduleRow:scheduleResults){ schedules.add(createRepairScheduleFromRow(scheduleRow)); @@ -650,20 +613,20 @@ public Collection getAllRepairSchedules() { @Override public boolean updateRepairSchedule(RepairSchedule newRepairSchedule) { BatchStatement batch = new BatchStatement(); - final Set repairHistory = Sets.newHashSet(); + final Set repairHistory = Sets.newHashSet(); repairHistory.addAll(newRepairSchedule.getRunHistory()); - batch.add(insertRepairSchedulePrepStmt.bind(newRepairSchedule.getId(), - newRepairSchedule.getRepairUnitId(), - newRepairSchedule.getState().toString(), - newRepairSchedule.getDaysBetween(), - newRepairSchedule.getNextActivation(), - repairHistory, + batch.add(insertRepairSchedulePrepStmt.bind(newRepairSchedule.getId(), + newRepairSchedule.getRepairUnitId(), + newRepairSchedule.getState().toString(), + newRepairSchedule.getDaysBetween(), + newRepairSchedule.getNextActivation(), + repairHistory, newRepairSchedule.getSegmentCount(), - newRepairSchedule.getRepairParallelism().toString(), - newRepairSchedule.getIntensity(), - newRepairSchedule.getCreationTime(), - newRepairSchedule.getOwner(), + newRepairSchedule.getRepairParallelism().toString(), + newRepairSchedule.getIntensity(), + newRepairSchedule.getCreationTime(), + newRepairSchedule.getOwner(), newRepairSchedule.getPauseTime()) ); RepairUnit repairUnit = getRepairUnit(newRepairSchedule.getRepairUnitId()).get(); @@ -676,7 +639,7 @@ public boolean updateRepairSchedule(RepairSchedule newRepairSchedule) { } @Override - public Optional deleteRepairSchedule(long id) { + public Optional deleteRepairSchedule(UUID id) { Optional repairSchedule = getRepairSchedule(id); if(repairSchedule.isPresent()){ RepairUnit repairUnit = getRepairUnit(repairSchedule.get().getRepairUnitId()).get(); @@ -698,58 +661,39 @@ public Collection getClusterRunStatuses(String clusterName, int for (RepairRun repairRun:repairRuns){ Collection segments = getRepairSegmentsForRun(repairRun.getId()); Optional repairUnit = getRepairUnit(repairRun.getRepairUnitId()); - + int segmentsRepaired = (int) segments.stream() .filter(seg -> seg.getState().equals(RepairSegment.State.DONE)) .count(); - + repairRunStatuses.add(new RepairRunStatus(repairRun, repairUnit.get(), segmentsRepaired)); } - + return repairRunStatuses; } @Override public Collection getClusterScheduleStatuses(String clusterName) { Collection repairSchedules = getRepairSchedulesForCluster(clusterName); - - Collection repairScheduleStatuses = repairSchedules.stream() - .map(sched -> new RepairScheduleStatus(sched, getRepairUnit(sched.getRepairUnitId()).get())) - .collect(Collectors.toList()); - + + Collection repairScheduleStatuses = repairSchedules + .stream() + .map(sched -> new RepairScheduleStatus(sched, getRepairUnit(sched.getRepairUnitId()).get())) + .collect(Collectors.toList()); + return repairScheduleStatuses; } - public long getNewRepairId(String idType){ - if (!repairIds.containsKey(idType)){ - repairIds.putIfAbsent(idType, 0L); - // Create id counter if it doesn't exist yet - session.execute(insertRepairId.bind(idType)); - } - long idValue = repairIds.get(idType); - int attempts = 0; - - // Increment and perform CAS, if it fails then fetch current value of the counter and repeat - while(true){ - idValue++; - ResultSet casResult = session.execute(updateRepairId.bind(idValue, idType, (idValue-1))); - if(casResult.wasApplied()){ - break; - }else{ - idValue = session.execute(selectRepairId.bind(idType)).one().getLong("id"); - Preconditions.checkState(idValue < Long.MAX_VALUE); - attempts++; - if(10 <= attempts && 0 == attempts % 10){ - LOG.warn("still cant find a new repairId after " + attempts + " attempts"); - } - } - } - repairIds.put(idType, Math.max(idValue, repairIds.get(idType))); - return idValue; - } - private RepairRun buildRepairRunFromRow(Row repairRunResult, long id){ - return new RepairRun.Builder(repairRunResult.getString("cluster_name"), repairRunResult.getLong("repair_unit_id"), new DateTime(repairRunResult.getTimestamp("creation_time")), repairRunResult.getDouble("intensity"), repairRunResult.getInt("segment_count"), RepairParallelism.fromName(repairRunResult.getString("repair_parallelism"))) + + private RepairRun buildRepairRunFromRow(Row repairRunResult, UUID id){ + return new RepairRun.Builder( + repairRunResult.getString("cluster_name"), + repairRunResult.getUUID("repair_unit_id"), + new DateTime(repairRunResult.getTimestamp("creation_time")), + repairRunResult.getDouble("intensity"), + repairRunResult.getInt("segment_count"), + RepairParallelism.fromName(repairRunResult.getString("repair_parallelism"))) .cause(repairRunResult.getString("cause")) .owner(repairRunResult.getString("owner")) .endTime(new DateTime(repairRunResult.getTimestamp("end_time"))) diff --git a/src/main/java/com/spotify/reaper/storage/IStorage.java b/src/main/java/com/spotify/reaper/storage/IStorage.java index 36f9689f9..f5c74fb18 100644 --- a/src/main/java/com/spotify/reaper/storage/IStorage.java +++ b/src/main/java/com/spotify/reaper/storage/IStorage.java @@ -27,6 +27,7 @@ import java.util.Collection; import java.util.Set; +import java.util.UUID; import javax.validation.constraints.NotNull; @@ -54,15 +55,15 @@ public interface IStorage { */ Optional deleteCluster(String clusterName); - RepairRun addRepairRun(RepairRun.Builder repairRun); + RepairRun addRepairRun(RepairRun.Builder repairRun, Collection newSegments); boolean updateRepairRun(RepairRun repairRun); - Optional getRepairRun(long id); + Optional getRepairRun(UUID id); Collection getRepairRunsForCluster(String clusterName); - Collection getRepairRunsForUnit(long repairUnitId); + Collection getRepairRunsForUnit(UUID repairUnitId); Collection getRepairRunsWithState(RepairRun.RunState runState); @@ -73,11 +74,11 @@ public interface IStorage { * @param id The id of the RepairRun instance to delete, and all segments for it. * @return The deleted RepairRun instance, if delete succeeds, with state set to DELETED. */ - Optional deleteRepairRun(long id); + Optional deleteRepairRun(UUID id); RepairUnit addRepairUnit(RepairUnit.Builder newRepairUnit); - Optional getRepairUnit(long id); + Optional getRepairUnit(UUID id); /** * Get a stored RepairUnit targeting the given tables in the given keyspace. @@ -90,15 +91,14 @@ public interface IStorage { Optional getRepairUnit(String cluster, String keyspace, Set columnFamilyNames); - void addRepairSegments(Collection newSegments, long runId); boolean updateRepairSegment(RepairSegment newRepairSegment); - Optional getRepairSegment(long id); + Optional getRepairSegment(UUID runId, UUID segmentId); - Collection getRepairSegmentsForRun(long runId); + Collection getRepairSegmentsForRun(UUID runId); - Optional getNextFreeSegment(long runId); + Optional getNextFreeSegment(UUID runId); /** * @param runId the run id that the segment belongs to. @@ -107,21 +107,21 @@ Optional getRepairUnit(String cluster, String keyspace, * that covers the whole ring. * @return a segment enclosed by the range with state NOT_STARTED, or nothing. */ - Optional getNextFreeSegmentInRange(long runId, RingRange range); + Optional getNextFreeSegmentInRange(UUID runId, RingRange range); - Collection getSegmentsWithState(long runId, RepairSegment.State segmentState); + Collection getSegmentsWithState(UUID runId, RepairSegment.State segmentState); Collection getOngoingRepairsInCluster(String clusterName); - Collection getRepairRunIdsForCluster(String clusterName); + Collection getRepairRunIdsForCluster(String clusterName); - int getSegmentAmountForRepairRun(long runId); + int getSegmentAmountForRepairRun(UUID runId); - int getSegmentAmountForRepairRunWithState(long runId, RepairSegment.State state); + int getSegmentAmountForRepairRunWithState(UUID runId, RepairSegment.State state); RepairSchedule addRepairSchedule(RepairSchedule.Builder repairSchedule); - Optional getRepairSchedule(long repairScheduleId); + Optional getRepairSchedule(UUID repairScheduleId); Collection getRepairSchedulesForCluster(String clusterName); @@ -141,7 +141,7 @@ Collection getRepairSchedulesForClusterAndKeyspace(String cluste * @param id The id of the RepairSchedule instance to delete. * @return The deleted RepairSchedule instance, if delete succeeds, with state set to DELETED. */ - Optional deleteRepairSchedule(long id); + Optional deleteRepairSchedule(UUID id); @NotNull Collection getClusterRunStatuses(String clusterName, int limit); diff --git a/src/main/java/com/spotify/reaper/storage/MemoryStorage.java b/src/main/java/com/spotify/reaper/storage/MemoryStorage.java index 6e2ea8712..e72718917 100644 --- a/src/main/java/com/spotify/reaper/storage/MemoryStorage.java +++ b/src/main/java/com/spotify/reaper/storage/MemoryStorage.java @@ -13,10 +13,8 @@ */ package com.spotify.reaper.storage; -import com.google.common.base.Function; +import com.datastax.driver.core.utils.UUIDs; import com.google.common.base.Optional; -import com.google.common.collect.Collections2; -import com.google.common.collect.FluentIterable; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -39,27 +37,22 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.UUID; import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.atomic.AtomicInteger; /** * Implements the StorageAPI using transient Java classes. */ -public class MemoryStorage implements IStorage { - - private final AtomicInteger REPAIR_RUN_ID = new AtomicInteger(0); - private final AtomicInteger REPAIR_UNIT_ID = new AtomicInteger(0); - private final AtomicInteger SEGMENT_ID = new AtomicInteger(0); - private final AtomicInteger REPAIR_SCHEDULE_ID = new AtomicInteger(0); +public final class MemoryStorage implements IStorage { private final ConcurrentMap clusters = Maps.newConcurrentMap(); - private final ConcurrentMap repairRuns = Maps.newConcurrentMap(); - private final ConcurrentMap repairUnits = Maps.newConcurrentMap(); + private final ConcurrentMap repairRuns = Maps.newConcurrentMap(); + private final ConcurrentMap repairUnits = Maps.newConcurrentMap(); private final ConcurrentMap repairUnitsByKey = Maps.newConcurrentMap(); - private final ConcurrentMap repairSegments = Maps.newConcurrentMap(); - private final ConcurrentMap> repairSegmentsByRunId = + private final ConcurrentMap repairSegments = Maps.newConcurrentMap(); + private final ConcurrentMap> repairSegmentsByRunId = Maps.newConcurrentMap(); - private final ConcurrentMap repairSchedules = Maps.newConcurrentMap(); + private final ConcurrentMap repairSchedules = Maps.newConcurrentMap(); @Override public boolean isStorageConnected() { @@ -103,9 +96,10 @@ && getRepairRunsForCluster(clusterName).isEmpty()) { } @Override - public RepairRun addRepairRun(RepairRun.Builder repairRun) { - RepairRun newRepairRun = repairRun.build(REPAIR_RUN_ID.incrementAndGet()); + public RepairRun addRepairRun(RepairRun.Builder repairRun, Collection newSegments) { + RepairRun newRepairRun = repairRun.build(UUIDs.timeBased()); repairRuns.put(newRepairRun.getId(), newRepairRun); + addRepairSegments(newSegments, newRepairRun.getId()); return newRepairRun; } @@ -120,7 +114,7 @@ public boolean updateRepairRun(RepairRun repairRun) { } @Override - public Optional getRepairRun(long id) { + public Optional getRepairRun(UUID id) { return Optional.fromNullable(repairRuns.get(id)); } @@ -136,10 +130,10 @@ public List getRepairRunsForCluster(String clusterName) { } @Override - public Collection getRepairRunsForUnit(long repairUnitId) { + public Collection getRepairRunsForUnit(UUID repairUnitId) { List foundRepairRuns = new ArrayList<>(); for (RepairRun repairRun : repairRuns.values()) { - if (repairRun.getRepairUnitId() == repairUnitId) { + if (repairRun.getRepairUnitId().equals(repairUnitId)) { foundRepairRuns.add(repairRun); } } @@ -163,18 +157,18 @@ public Collection getRepairRunsWithState(RepairRun.RunState runState) * @param repairUnitId The RepairUnit instance id to delete. * @return The deleted RepairUnit instance, if delete succeeded. */ - private Optional deleteRepairUnit(long repairUnitId) { + private Optional deleteRepairUnit(UUID repairUnitId) { RepairUnit deletedUnit = null; boolean canDelete = true; for (RepairRun repairRun : repairRuns.values()) { - if (repairRun.getRepairUnitId() == repairUnitId) { + if (repairRun.getRepairUnitId().equals(repairUnitId)) { canDelete = false; break; } } if (canDelete) { for (RepairSchedule schedule : repairSchedules.values()) { - if (schedule.getRepairUnitId() == repairUnitId) { + if (schedule.getRepairUnitId().equals(repairUnitId)) { canDelete = false; break; } @@ -187,8 +181,8 @@ private Optional deleteRepairUnit(long repairUnitId) { return Optional.fromNullable(deletedUnit); } - private int deleteRepairSegmentsForRun(long runId) { - Map segmentsMap = repairSegmentsByRunId.remove(runId); + private int deleteRepairSegmentsForRun(UUID runId) { + Map segmentsMap = repairSegmentsByRunId.remove(runId); if (null != segmentsMap) { for (RepairSegment segment : segmentsMap.values()) { repairSegments.remove(segment.getId()); @@ -198,7 +192,7 @@ private int deleteRepairSegmentsForRun(long runId) { } @Override - public Optional deleteRepairRun(long id) { + public Optional deleteRepairRun(UUID id) { RepairRun deletedRun = repairRuns.remove(id); if (deletedRun != null) { if (getSegmentAmountForRepairRunWithState(id, RepairSegment.State.RUNNING) == 0) { @@ -214,10 +208,10 @@ public Optional deleteRepairRun(long id) { public RepairUnit addRepairUnit(RepairUnit.Builder repairUnit) { Optional existing = getRepairUnit(repairUnit.clusterName, repairUnit.keyspaceName, repairUnit.columnFamilies); - if (existing.isPresent() && repairUnit.incrementalRepair == existing.get().getIncrementalRepair().booleanValue()) { + if (existing.isPresent() && repairUnit.incrementalRepair == existing.get().getIncrementalRepair()) { return existing.get(); } else { - RepairUnit newRepairUnit = repairUnit.build(REPAIR_UNIT_ID.incrementAndGet()); + RepairUnit newRepairUnit = repairUnit.build(UUIDs.timeBased()); repairUnits.put(newRepairUnit.getId(), newRepairUnit); RepairUnitKey unitKey = new RepairUnitKey(newRepairUnit); repairUnitsByKey.put(unitKey, newRepairUnit); @@ -226,7 +220,7 @@ public RepairUnit addRepairUnit(RepairUnit.Builder repairUnit) { } @Override - public Optional getRepairUnit(long id) { + public Optional getRepairUnit(UUID id) { return Optional.fromNullable(repairUnits.get(id)); } @@ -236,11 +230,10 @@ public Optional getRepairUnit(String cluster, String keyspace, Set segments, long runId) { - LinkedHashMap newSegments = Maps.newLinkedHashMap(); + private void addRepairSegments(Collection segments, UUID runId) { + LinkedHashMap newSegments = Maps.newLinkedHashMap(); for (RepairSegment.Builder segment : segments) { - RepairSegment newRepairSegment = segment.build(SEGMENT_ID.incrementAndGet()); + RepairSegment newRepairSegment = segment.withRunId(runId).build(UUIDs.timeBased()); repairSegments.put(newRepairSegment.getId(), newRepairSegment); newSegments.put(newRepairSegment.getId(), newRepairSegment); } @@ -249,11 +242,11 @@ public void addRepairSegments(Collection segments, long r @Override public boolean updateRepairSegment(RepairSegment newRepairSegment) { - if (getRepairSegment(newRepairSegment.getId()) == null) { + if (getRepairSegment(newRepairSegment.getRunId(), newRepairSegment.getId()) == null) { return false; } else { repairSegments.put(newRepairSegment.getId(), newRepairSegment); - LinkedHashMap updatedSegment = + LinkedHashMap updatedSegment = repairSegmentsByRunId.get(newRepairSegment.getRunId()); updatedSegment.put(newRepairSegment.getId(), newRepairSegment); return true; @@ -261,17 +254,17 @@ public boolean updateRepairSegment(RepairSegment newRepairSegment) { } @Override - public Optional getRepairSegment(long id) { - return Optional.fromNullable(repairSegments.get(id)); + public Optional getRepairSegment(UUID runId, UUID segmentId) { + return Optional.fromNullable(repairSegments.get(segmentId)); } @Override - public Collection getRepairSegmentsForRun(long runId) { + public Collection getRepairSegmentsForRun(UUID runId) { return repairSegmentsByRunId.get(runId).values(); } @Override - public Optional getNextFreeSegment(long runId) { + public Optional getNextFreeSegment(UUID runId) { for (RepairSegment segment : repairSegmentsByRunId.get(runId).values()) { if (segment.getState() == RepairSegment.State.NOT_STARTED) { return Optional.of(segment); @@ -281,7 +274,7 @@ public Optional getNextFreeSegment(long runId) { } @Override - public Optional getNextFreeSegmentInRange(long runId, RingRange range) { + public Optional getNextFreeSegmentInRange(UUID runId, RingRange range) { for (RepairSegment segment : repairSegmentsByRunId.get(runId).values()) { if (segment.getState() == RepairSegment.State.NOT_STARTED && range.encloses(segment.getTokenRange())) { @@ -292,8 +285,7 @@ public Optional getNextFreeSegmentInRange(long runId, RingRange r } @Override - public Collection getSegmentsWithState(long runId, - RepairSegment.State segmentState) { + public Collection getSegmentsWithState(UUID runId, RepairSegment.State segmentState) { List segments = Lists.newArrayList(); for (RepairSegment segment : repairSegmentsByRunId.get(runId).values()) { if (segment.getState() == segmentState) { @@ -320,8 +312,8 @@ public Collection getOngoingRepairsInCluster(String clusterNam } @Override - public Collection getRepairRunIdsForCluster(String clusterName) { - Collection repairRunIds = new HashSet<>(); + public Collection getRepairRunIdsForCluster(String clusterName) { + Collection repairRunIds = new HashSet<>(); for (RepairRun repairRun : repairRuns.values()) { if (repairRun.getClusterName().equalsIgnoreCase(clusterName)) { repairRunIds.add(repairRun.getId()); @@ -331,14 +323,14 @@ public Collection getRepairRunIdsForCluster(String clusterName) { } @Override - public int getSegmentAmountForRepairRun(long runId) { - Map segmentsMap = repairSegmentsByRunId.get(runId); + public int getSegmentAmountForRepairRun(UUID runId) { + Map segmentsMap = repairSegmentsByRunId.get(runId); return segmentsMap == null ? 0 : segmentsMap.size(); } @Override - public int getSegmentAmountForRepairRunWithState(long runId, RepairSegment.State state) { - Map segmentsMap = repairSegmentsByRunId.get(runId); + public int getSegmentAmountForRepairRunWithState(UUID runId, RepairSegment.State state) { + Map segmentsMap = repairSegmentsByRunId.get(runId); int amount = 0; if (null != segmentsMap) { for (RepairSegment segment : segmentsMap.values()) { @@ -353,13 +345,13 @@ public int getSegmentAmountForRepairRunWithState(long runId, RepairSegment.State @Override public RepairSchedule addRepairSchedule(RepairSchedule.Builder repairSchedule) { - RepairSchedule newRepairSchedule = repairSchedule.build(REPAIR_SCHEDULE_ID.incrementAndGet()); + RepairSchedule newRepairSchedule = repairSchedule.build(UUIDs.timeBased()); repairSchedules.put(newRepairSchedule.getId(), newRepairSchedule); return newRepairSchedule; } @Override - public Optional getRepairSchedule(long id) { + public Optional getRepairSchedule(UUID id) { return Optional.fromNullable(repairSchedules.get(id)); } @@ -417,7 +409,7 @@ public boolean updateRepairSchedule(RepairSchedule newRepairSchedule) { } @Override - public Optional deleteRepairSchedule(long id) { + public Optional deleteRepairSchedule(UUID id) { RepairSchedule deletedSchedule = repairSchedules.remove(id); if (deletedSchedule != null) { deletedSchedule = deletedSchedule.with().state(RepairSchedule.State.DELETED).build(id); diff --git a/src/main/java/com/spotify/reaper/storage/PostgresStorage.java b/src/main/java/com/spotify/reaper/storage/PostgresStorage.java index be59eeede..c3187c50e 100644 --- a/src/main/java/com/spotify/reaper/storage/PostgresStorage.java +++ b/src/main/java/com/spotify/reaper/storage/PostgresStorage.java @@ -13,6 +13,7 @@ */ package com.spotify.reaper.storage; +import com.datastax.driver.core.utils.UUIDs; import com.google.common.base.Optional; import com.google.common.collect.Lists; @@ -33,6 +34,7 @@ import com.spotify.reaper.storage.postgresql.RunStateArgumentFactory; import com.spotify.reaper.storage.postgresql.ScheduleStateArgumentFactory; import com.spotify.reaper.storage.postgresql.StateArgumentFactory; +import com.spotify.reaper.storage.postgresql.UuidArgumentFactory; import org.skife.jdbi.v2.DBI; import org.skife.jdbi.v2.Handle; @@ -44,12 +46,13 @@ import java.util.Collection; import java.util.List; import java.util.Set; +import java.util.UUID; /** * Implements the StorageAPI using PostgreSQL database. */ -public class PostgresStorage implements IStorage { +public final class PostgresStorage implements IStorage { private static final Logger LOG = LoggerFactory.getLogger(PostgresStorage.class); @@ -67,6 +70,7 @@ private static IStoragePostgreSQL getPostgresStorage(Handle h) { h.registerArgumentFactory(new StateArgumentFactory()); h.registerArgumentFactory(new BigIntegerArgumentFactory()); h.registerArgumentFactory(new ScheduleStateArgumentFactory()); + h.registerArgumentFactory(new UuidArgumentFactory()); return h.attach(IStoragePostgreSQL.class); } @@ -144,10 +148,10 @@ public boolean updateCluster(Cluster cluster) { } @Override - public Optional getRepairRun(long id) { + public Optional getRepairRun(UUID id) { RepairRun result; try (Handle h = jdbi.open()) { - result = getPostgresStorage(h).getRepairRun(id); + result = getPostgresStorage(h).getRepairRun(toSequenceId(id)); } return Optional.fromNullable(result); } @@ -162,10 +166,10 @@ public Collection getRepairRunsForCluster(String clusterName) { } @Override - public Collection getRepairRunsForUnit(long repairUnitId) { + public Collection getRepairRunsForUnit(UUID repairUnitId) { Collection result; try (Handle h = jdbi.open()) { - result = getPostgresStorage(h).getRepairRunsForUnit(repairUnitId); + result = getPostgresStorage(h).getRepairRunsForUnit(toSequenceId(repairUnitId)); } return result == null ? Lists.newArrayList() : result; } @@ -180,20 +184,20 @@ public Collection getRepairRunsWithState(RepairRun.RunState runState) } @Override - public Optional deleteRepairRun(long id) { + public Optional deleteRepairRun(UUID id) { RepairRun result = null; Handle h = null; try { h = jdbi.open(); h.begin(); IStoragePostgreSQL pg = getPostgresStorage(h); - RepairRun runToDelete = pg.getRepairRun(id); + RepairRun runToDelete = pg.getRepairRun(toSequenceId(id)); if (runToDelete != null) { - int segmentsRunning = pg.getSegmentAmountForRepairRunWithState(id, + int segmentsRunning = pg.getSegmentAmountForRepairRunWithState(toSequenceId(id), RepairSegment.State.RUNNING); if (segmentsRunning == 0) { - pg.deleteRepairSegmentsForRun(runToDelete.getId()); - pg.deleteRepairRun(id); + pg.deleteRepairSegmentsForRun(toSequenceId(runToDelete.getId())); + pg.deleteRepairRun(toSequenceId(id)); result = runToDelete.with().runState(RepairRun.RunState.DELETED).build(id); } else { LOG.warn("not deleting RepairRun \"{}\" as it has segments running: {}", @@ -218,22 +222,23 @@ public Optional deleteRepairRun(long id) { return Optional.fromNullable(result); } - private void tryDeletingRepairUnit(long id) { + private void tryDeletingRepairUnit(UUID id) { try (Handle h = jdbi.open()) { IStoragePostgreSQL pg = getPostgresStorage(h); - pg.deleteRepairUnit(id); + pg.deleteRepairUnit(toSequenceId(id)); } catch (DBIException ex) { LOG.info("cannot delete RepairUnit with id " + id); } } @Override - public RepairRun addRepairRun(RepairRun.Builder newRepairRun) { + public RepairRun addRepairRun(RepairRun.Builder newRepairRun, Collection newSegments) { RepairRun result; try (Handle h = jdbi.open()) { - long insertedId = getPostgresStorage(h).insertRepairRun(newRepairRun.build(-1)); - result = newRepairRun.build(insertedId); + long insertedId = getPostgresStorage(h).insertRepairRun(newRepairRun.build(null)); + result = newRepairRun.build(fromSequenceId(insertedId)); } + addRepairSegments(newSegments, result.getId()); return result; } @@ -255,16 +260,16 @@ public boolean updateRepairRun(RepairRun repairRun) { public RepairUnit addRepairUnit(RepairUnit.Builder newRepairUnit) { long insertedId; try (Handle h = jdbi.open()) { - insertedId = getPostgresStorage(h).insertRepairUnit(newRepairUnit.build(-1)); + insertedId = getPostgresStorage(h).insertRepairUnit(newRepairUnit.build(null)); } - return newRepairUnit.build(insertedId); + return newRepairUnit.build(fromSequenceId(insertedId)); } @Override - public Optional getRepairUnit(long id) { + public Optional getRepairUnit(UUID id) { RepairUnit result; try (Handle h = jdbi.open()) { - result = getPostgresStorage(h).getRepairUnit(id); + result = getPostgresStorage(h).getRepairUnit(toSequenceId(id)); } return Optional.fromNullable(result); } @@ -280,11 +285,10 @@ public Optional getRepairUnit(String clusterName, String keyspaceNam return Optional.fromNullable(result); } - @Override - public void addRepairSegments(Collection newSegments, long runId) { + private void addRepairSegments(Collection newSegments, UUID runId) { List insertableSegments = new ArrayList<>(); for (RepairSegment.Builder segment : newSegments) { - insertableSegments.add(segment.build(-1)); + insertableSegments.add(segment.withRunId(runId).build(null)); } try (Handle h = jdbi.open()) { getPostgresStorage(h).insertRepairSegments(insertableSegments.iterator()); @@ -306,40 +310,40 @@ public boolean updateRepairSegment(RepairSegment repairSegment) { } @Override - public Optional getRepairSegment(long id) { + public Optional getRepairSegment(UUID runId, UUID segmentId) { RepairSegment result; try (Handle h = jdbi.open()) { - result = getPostgresStorage(h).getRepairSegment(id); + result = getPostgresStorage(h).getRepairSegment(toSequenceId(segmentId)); } return Optional.fromNullable(result); } @Override - public Collection getRepairSegmentsForRun(long runId) { + public Collection getRepairSegmentsForRun(UUID runId) { try (Handle h = jdbi.open()) { - return getPostgresStorage(h).getRepairSegmentsForRun(runId); + return getPostgresStorage(h).getRepairSegmentsForRun(toSequenceId(runId)); } } @Override - public Optional getNextFreeSegment(long runId) { + public Optional getNextFreeSegment(UUID runId) { RepairSegment result; try (Handle h = jdbi.open()) { - result = getPostgresStorage(h).getNextFreeRepairSegment(runId); + result = getPostgresStorage(h).getNextFreeRepairSegment(toSequenceId(runId)); } return Optional.fromNullable(result); } @Override - public Optional getNextFreeSegmentInRange(long runId, RingRange range) { + public Optional getNextFreeSegmentInRange(UUID runId, RingRange range) { RepairSegment result; try (Handle h = jdbi.open()) { IStoragePostgreSQL storage = getPostgresStorage(h); if (!range.isWrapping()) { - result = storage.getNextFreeRepairSegmentInNonWrappingRange(runId, range.getStart(), + result = storage.getNextFreeRepairSegmentInNonWrappingRange(toSequenceId(runId), range.getStart(), range.getEnd()); } else { - result = storage.getNextFreeRepairSegmentInWrappingRange(runId, range.getStart(), + result = storage.getNextFreeRepairSegmentInWrappingRange(toSequenceId(runId), range.getStart(), range.getEnd()); } } @@ -347,11 +351,10 @@ public Optional getNextFreeSegmentInRange(long runId, RingRange r } @Override - public Collection getSegmentsWithState(long runId, - RepairSegment.State segmentState) { + public Collection getSegmentsWithState(UUID runId, RepairSegment.State segmentState) { Collection result; try (Handle h = jdbi.open()) { - result = getPostgresStorage(h).getRepairSegmentsForRunWithState(runId, segmentState); + result = getPostgresStorage(h).getRepairSegmentsForRunWithState(toSequenceId(runId), segmentState); } return result; } @@ -364,26 +367,28 @@ public Collection getOngoingRepairsInCluster(String clusterNam } @Override - public Collection getRepairRunIdsForCluster(String clusterName) { - Collection result; + public Collection getRepairRunIdsForCluster(String clusterName) { + Collection result = Lists.newArrayList(); try (Handle h = jdbi.open()) { - result = getPostgresStorage(h).getRepairRunIdsForCluster(clusterName); + for(Long l: getPostgresStorage(h).getRepairRunIdsForCluster(clusterName)){ + result.add(fromSequenceId(l)); + } } return result; } @Override - public int getSegmentAmountForRepairRun(long runId) { + public int getSegmentAmountForRepairRun(UUID runId) { try (Handle h = jdbi.open()) { - return getPostgresStorage(h).getSegmentAmountForRepairRun(runId); + return getPostgresStorage(h).getSegmentAmountForRepairRun(toSequenceId(runId)); } } @Override - public int getSegmentAmountForRepairRunWithState(long runId, RepairSegment.State state) { + public int getSegmentAmountForRepairRunWithState(UUID runId, RepairSegment.State state) { int result; try (Handle h = jdbi.open()) { - result = getPostgresStorage(h).getSegmentAmountForRepairRunWithState(runId, state); + result = getPostgresStorage(h).getSegmentAmountForRepairRunWithState(toSequenceId(runId), state); } return result; } @@ -392,16 +397,16 @@ public int getSegmentAmountForRepairRunWithState(long runId, RepairSegment.State public RepairSchedule addRepairSchedule(RepairSchedule.Builder repairSchedule) { long insertedId; try (Handle h = jdbi.open()) { - insertedId = getPostgresStorage(h).insertRepairSchedule(repairSchedule.build(-1)); + insertedId = getPostgresStorage(h).insertRepairSchedule(repairSchedule.build(null)); } - return repairSchedule.build(insertedId); + return repairSchedule.build(fromSequenceId(insertedId)); } @Override - public Optional getRepairSchedule(long repairScheduleId) { + public Optional getRepairSchedule(UUID repairScheduleId) { RepairSchedule result; try (Handle h = jdbi.open()) { - result = getPostgresStorage(h).getRepairSchedule(repairScheduleId); + result = getPostgresStorage(h).getRepairSchedule(toSequenceId(repairScheduleId)); } return Optional.fromNullable(result); } @@ -459,13 +464,13 @@ public boolean updateRepairSchedule(RepairSchedule newRepairSchedule) { } @Override - public Optional deleteRepairSchedule(long id) { + public Optional deleteRepairSchedule(UUID id) { RepairSchedule result = null; try (Handle h = jdbi.open()) { IStoragePostgreSQL pg = getPostgresStorage(h); - RepairSchedule scheduleToDel = pg.getRepairSchedule(id); + RepairSchedule scheduleToDel = pg.getRepairSchedule(toSequenceId(id)); if (scheduleToDel != null) { - int rowsDeleted = pg.deleteRepairSchedule(scheduleToDel.getId()); + int rowsDeleted = pg.deleteRepairSchedule(toSequenceId(scheduleToDel.getId())); if (rowsDeleted > 0) { result = scheduleToDel.with().state(RepairSchedule.State.DELETED).build(id); } @@ -477,15 +482,25 @@ public Optional deleteRepairSchedule(long id) { return Optional.fromNullable(result); } + @Override public Collection getClusterRunStatuses(String clusterName, int limit) { try (Handle h = jdbi.open()) { return getPostgresStorage(h).getClusterRunOverview(clusterName, limit); } } + @Override public Collection getClusterScheduleStatuses(String clusterName) { try (Handle h = jdbi.open()) { return getPostgresStorage(h).getClusterScheduleOverview(clusterName); } } + + private static UUID fromSequenceId(long insertedId) { + return new UUID(insertedId, UUIDs.timeBased().getLeastSignificantBits()); + } + + private static long toSequenceId(UUID id) { + return id.getMostSignificantBits(); + } } diff --git a/src/main/java/com/spotify/reaper/storage/cassandra/Migration003.java b/src/main/java/com/spotify/reaper/storage/cassandra/Migration003.java new file mode 100644 index 000000000..44972313f --- /dev/null +++ b/src/main/java/com/spotify/reaper/storage/cassandra/Migration003.java @@ -0,0 +1,91 @@ + +package com.spotify.reaper.storage.cassandra; + +import com.datastax.driver.core.KeyspaceMetadata; +import com.datastax.driver.core.PreparedStatement; +import com.datastax.driver.core.Row; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.querybuilder.QueryBuilder; +import com.datastax.driver.core.utils.UUIDs; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class Migration003 { + + private static final Logger LOG = LoggerFactory.getLogger(Migration003.class); + + /** migrate over the repair_schedule table **/ + public static void migrate(Session session) { + KeyspaceMetadata metadata = session.getCluster().getMetadata().getKeyspace(session.getLoggedKeyspace()); + if(null != metadata.getTable("repair_unit")) { + + LOG.warn("Migrating repair_unit and repair_schedule tables. This may take some minutes…"); + + PreparedStatement insertRprUnit = session.prepare( + "INSERT INTO repair_unit_v1 (id, cluster_name, keyspace_name, column_families, incremental_repair) VALUES(?, ?, ?, ?, ?)"); + + PreparedStatement insertRprSched = session.prepare( + "INSERT INTO repair_schedule_v1 (id, repair_unit_id, state, days_between, next_activation, run_history, segment_count, repair_parallelism, intensity, creation_time, owner, pause_time) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); + + PreparedStatement insertRprSchedIdx = session.prepare( + "INSERT INTO repair_schedule_by_cluster_and_keyspace(cluster_name, keyspace_name, repair_schedule_id) VALUES(?, ?, ?)"); + + Map repairUnitIds = new HashMap<>(); + Map repairUnitClusters = new HashMap<>(); + Map repairUnitKeyspaces = new HashMap<>(); + + for(Row row : session.execute(QueryBuilder.select().from("repair_unit"))) { + UUID uuid = UUIDs.timeBased(); + repairUnitIds.put(row.getLong("id"), uuid); + repairUnitClusters.put(row.getLong("id"), row.getString("cluster_name")); + repairUnitKeyspaces.put(row.getLong("id"), row.getString("keyspace_name")); + + session.execute( + insertRprUnit.bind( + uuid, + row.getString("cluster_name"), + row.getString("keyspace_name"), + row.getSet("column_families", String.class), + row.getBool("incremental_repair"))); + } + session.executeAsync("DROP TABLE repair_unit"); + + for(Row row : session.execute(QueryBuilder.select().from("repair_schedule"))) { + UUID uuid = UUIDs.timeBased(); + long repairUnitId = row.getLong("repair_unit_id"); + + session.execute( + insertRprSched.bind( + uuid, + repairUnitIds.get(repairUnitId), + row.getString("state"), + row.getInt("days_between"), + row.getTimestamp("next_activation"), + Collections.emptySet(), + row.getInt("segment_count"), + row.getString("repair_parallelism"), + row.getDouble("intensity"), + row.getTimestamp("creation_time"), + row.getString("owner"), + row.getTimestamp("pause_time"))); + + + session.executeAsync(insertRprSchedIdx + .bind(repairUnitClusters.get(repairUnitId), repairUnitKeyspaces.get(repairUnitId), uuid)); + + session.executeAsync(insertRprSchedIdx.bind(repairUnitClusters.get(repairUnitId), " ", uuid)); + session.executeAsync(insertRprSchedIdx.bind(" ", repairUnitKeyspaces.get(repairUnitId), uuid)); + } + + session.executeAsync("DROP TABLE repair_schedule"); + + LOG.warn("Migration of repair_unit and repair_schedule tables completed."); + } + } + + private Migration003(){} +} diff --git a/src/main/java/com/spotify/reaper/storage/postgresql/LongCollectionSQLType.java b/src/main/java/com/spotify/reaper/storage/postgresql/LongCollectionSQLType.java index 769b84f3f..3a931ac06 100644 --- a/src/main/java/com/spotify/reaper/storage/postgresql/LongCollectionSQLType.java +++ b/src/main/java/com/spotify/reaper/storage/postgresql/LongCollectionSQLType.java @@ -3,19 +3,20 @@ import com.google.common.collect.Lists; import java.util.Collection; +import java.util.UUID; /** * This is required to be able to map in generic manner into Postgres array types through JDBI. */ public class LongCollectionSQLType { - private Collection collection; + private Collection collection; - public LongCollectionSQLType(Collection collection) { + public LongCollectionSQLType(Collection collection) { this.collection = collection; } - public Collection getValue() { + public Collection getValue() { if (this.collection == null) { return Lists.newArrayList(); } else { diff --git a/src/main/java/com/spotify/reaper/storage/postgresql/RepairRunMapper.java b/src/main/java/com/spotify/reaper/storage/postgresql/RepairRunMapper.java index 7667e6378..fbfbe1a37 100644 --- a/src/main/java/com/spotify/reaper/storage/postgresql/RepairRunMapper.java +++ b/src/main/java/com/spotify/reaper/storage/postgresql/RepairRunMapper.java @@ -13,6 +13,7 @@ */ package com.spotify.reaper.storage.postgresql; +import com.datastax.driver.core.utils.UUIDs; import com.spotify.reaper.core.RepairRun; import org.apache.cassandra.repair.RepairParallelism; @@ -23,6 +24,7 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Timestamp; +import java.util.UUID; public class RepairRunMapper implements ResultSetMapper { @@ -41,7 +43,7 @@ public RepairRun map(int index, ResultSet r, StatementContext ctx) throws SQLExc RepairParallelism.fromName(r.getString("repair_parallelism")); RepairRun.Builder repairRunBuilder = new RepairRun.Builder(r.getString("cluster_name"), - r.getLong("repair_unit_id"), + fromSequenceId(r.getLong("repair_unit_id")), getDateTimeOrNull(r, "creation_time"), r.getFloat("intensity"), r.getInt("segment_count"), @@ -54,7 +56,10 @@ public RepairRun map(int index, ResultSet r, StatementContext ctx) throws SQLExc .endTime(getDateTimeOrNull(r, "end_time")) .pauseTime(getDateTimeOrNull(r, "pause_time")) .lastEvent(r.getString("last_event")) - .build(r.getLong("id")); + .build(fromSequenceId(r.getLong("id"))); } + private static UUID fromSequenceId(long insertedId) { + return new UUID(insertedId, UUIDs.timeBased().getLeastSignificantBits()); + } } diff --git a/src/main/java/com/spotify/reaper/storage/postgresql/RepairRunStatusMapper.java b/src/main/java/com/spotify/reaper/storage/postgresql/RepairRunStatusMapper.java index 3c3476f73..1a460b57b 100644 --- a/src/main/java/com/spotify/reaper/storage/postgresql/RepairRunStatusMapper.java +++ b/src/main/java/com/spotify/reaper/storage/postgresql/RepairRunStatusMapper.java @@ -1,5 +1,6 @@ package com.spotify.reaper.storage.postgresql; +import com.datastax.driver.core.utils.UUIDs; import com.google.common.collect.ImmutableSet; import com.spotify.reaper.core.RepairRun; @@ -13,6 +14,7 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.Collection; +import java.util.UUID; public class RepairRunStatusMapper implements ResultSetMapper { @@ -42,8 +44,12 @@ public RepairRunStatus map(int index, ResultSet r, StatementContext ctx) throws } RepairParallelism repairParallelism = RepairParallelism.fromName(repairParallelismStr); - return new RepairRunStatus(runId, clusterName, keyspaceName, columnFamilies, segmentsRepaired, + return new RepairRunStatus(fromSequenceId(runId), clusterName, keyspaceName, columnFamilies, segmentsRepaired, totalSegments, state, startTime, endTime, cause, owner, lastEvent, creationTime, pauseTime, intensity, incrementalRepair, repairParallelism); } + + private static UUID fromSequenceId(long insertedId) { + return new UUID(insertedId, UUIDs.timeBased().getLeastSignificantBits()); + } } diff --git a/src/main/java/com/spotify/reaper/storage/postgresql/RepairScheduleMapper.java b/src/main/java/com/spotify/reaper/storage/postgresql/RepairScheduleMapper.java index c92f38111..6d9ca4b41 100644 --- a/src/main/java/com/spotify/reaper/storage/postgresql/RepairScheduleMapper.java +++ b/src/main/java/com/spotify/reaper/storage/postgresql/RepairScheduleMapper.java @@ -13,6 +13,7 @@ */ package com.spotify.reaper.storage.postgresql; +import com.datastax.driver.core.utils.UUIDs; import com.google.common.collect.ImmutableList; import com.spotify.reaper.core.RepairSchedule; @@ -24,13 +25,14 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.Arrays; +import java.util.UUID; public class RepairScheduleMapper implements ResultSetMapper { @Override public RepairSchedule map(int index, ResultSet r, StatementContext ctx) throws SQLException { - Long[] runHistoryLong = new Long[0]; + UUID[] runHistoryUUIDs = new UUID[0]; Integer[] runHistory = null; Array av = r.getArray("run_history"); @@ -44,9 +46,9 @@ public RepairSchedule map(int index, ResultSet r, StatementContext ctx) throws S } if (null != runHistory && runHistory.length > 0) { - runHistoryLong = new Long[runHistory.length]; + runHistoryUUIDs = new UUID[runHistory.length]; for (int i = 0; i < runHistory.length; i++) { - runHistoryLong[i] = runHistory[i].longValue(); + runHistoryUUIDs[i] = fromSequenceId(runHistory[i]); } } } @@ -59,18 +61,21 @@ public RepairSchedule map(int index, ResultSet r, StatementContext ctx) throws S RepairSchedule.State scheduleState = RepairSchedule.State.valueOf(stateStr); return new RepairSchedule.Builder( - r.getLong("repair_unit_id"), + fromSequenceId(r.getLong("repair_unit_id")), scheduleState, r.getInt("days_between"), RepairRunMapper.getDateTimeOrNull(r, "next_activation"), - ImmutableList.copyOf(runHistoryLong), + ImmutableList.copyOf(runHistoryUUIDs), r.getInt("segment_count"), RepairParallelism.fromName(r.getString("repair_parallelism")), r.getDouble("intensity"), RepairRunMapper.getDateTimeOrNull(r, "creation_time")) .owner(r.getString("owner")) .pauseTime(RepairRunMapper.getDateTimeOrNull(r, "pause_time")) - .build(r.getLong("id")); + .build(fromSequenceId(r.getLong("id"))); } + private static UUID fromSequenceId(long insertedId) { + return new UUID(insertedId, UUIDs.timeBased().getLeastSignificantBits()); + } } diff --git a/src/main/java/com/spotify/reaper/storage/postgresql/RepairScheduleStatusMapper.java b/src/main/java/com/spotify/reaper/storage/postgresql/RepairScheduleStatusMapper.java index 5b5f5ac3f..9bf28712a 100644 --- a/src/main/java/com/spotify/reaper/storage/postgresql/RepairScheduleStatusMapper.java +++ b/src/main/java/com/spotify/reaper/storage/postgresql/RepairScheduleStatusMapper.java @@ -13,6 +13,7 @@ */ package com.spotify.reaper.storage.postgresql; +import com.datastax.driver.core.utils.UUIDs; import com.google.common.collect.ImmutableSet; import com.spotify.reaper.core.RepairSchedule; @@ -25,6 +26,7 @@ import java.sql.ResultSet; import java.sql.SQLException; +import java.util.UUID; public class RepairScheduleStatusMapper implements ResultSetMapper { @@ -40,7 +42,7 @@ public RepairScheduleStatus map(int index, ResultSet r, StatementContext ctx) RepairParallelism repairParallelism = RepairParallelism.fromName(repairParallelismStr); return new RepairScheduleStatus( - r.getLong("id"), + fromSequenceId(r.getLong("id")), r.getString("owner"), r.getString("cluster_name"), r.getString("keyspace_name"), @@ -56,4 +58,8 @@ public RepairScheduleStatus map(int index, ResultSet r, StatementContext ctx) r.getInt("days_between") ); } + + private static UUID fromSequenceId(long insertedId) { + return new UUID(insertedId, UUIDs.timeBased().getLeastSignificantBits()); + } } diff --git a/src/main/java/com/spotify/reaper/storage/postgresql/RepairSegmentMapper.java b/src/main/java/com/spotify/reaper/storage/postgresql/RepairSegmentMapper.java index 46739e932..a1a9eb165 100644 --- a/src/main/java/com/spotify/reaper/storage/postgresql/RepairSegmentMapper.java +++ b/src/main/java/com/spotify/reaper/storage/postgresql/RepairSegmentMapper.java @@ -13,6 +13,7 @@ */ package com.spotify.reaper.storage.postgresql; +import com.datastax.driver.core.utils.UUIDs; import com.spotify.reaper.core.RepairSegment; import com.spotify.reaper.service.RingRange; @@ -21,20 +22,25 @@ import java.sql.ResultSet; import java.sql.SQLException; +import java.util.UUID; public class RepairSegmentMapper implements ResultSetMapper { public RepairSegment map(int index, ResultSet r, StatementContext ctx) throws SQLException { RingRange range = new RingRange(r.getBigDecimal("start_token").toBigInteger(), r.getBigDecimal("end_token").toBigInteger()); - RepairSegment.Builder repairSegmentBuilder = - new RepairSegment.Builder(r.getLong("run_id"), range, r.getLong("repair_unit_id")); - return repairSegmentBuilder - .state(RepairSegment.State.values()[r.getInt("state")]) - .coordinatorHost(r.getString("coordinator_host")) - .startTime(RepairRunMapper.getDateTimeOrNull(r, "start_time")) - .endTime(RepairRunMapper.getDateTimeOrNull(r, "end_time")) - .failCount(r.getInt("fail_count")) - .build(r.getLong("id")); + return + new RepairSegment.Builder(range, fromSequenceId(r.getLong("repair_unit_id"))) + .withRunId(fromSequenceId(r.getLong("run_id"))) + .state(RepairSegment.State.values()[r.getInt("state")]) + .coordinatorHost(r.getString("coordinator_host")) + .startTime(RepairRunMapper.getDateTimeOrNull(r, "start_time")) + .endTime(RepairRunMapper.getDateTimeOrNull(r, "end_time")) + .failCount(r.getInt("fail_count")) + .build(fromSequenceId(r.getLong("id"))); + } + + private static UUID fromSequenceId(long insertedId) { + return new UUID(insertedId, UUIDs.timeBased().getLeastSignificantBits()); } } diff --git a/src/main/java/com/spotify/reaper/storage/postgresql/RepairUnitMapper.java b/src/main/java/com/spotify/reaper/storage/postgresql/RepairUnitMapper.java index de0bee3b1..57446017f 100644 --- a/src/main/java/com/spotify/reaper/storage/postgresql/RepairUnitMapper.java +++ b/src/main/java/com/spotify/reaper/storage/postgresql/RepairUnitMapper.java @@ -13,6 +13,7 @@ */ package com.spotify.reaper.storage.postgresql; +import com.datastax.driver.core.utils.UUIDs; import com.google.common.collect.Sets; import com.spotify.reaper.core.RepairUnit; @@ -22,6 +23,7 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.Arrays; +import java.util.UUID; public class RepairUnitMapper implements ResultSetMapper { @@ -39,7 +41,11 @@ public RepairUnit map(int index, ResultSet r, StatementContext ctx) throws SQLEx r.getString("keyspace_name"), Sets.newHashSet(columnFamilies), r.getBoolean("incremental_repair")); - return builder.build(r.getLong("id")); + return builder.build(fromSequenceId(r.getLong("id"))); } + + private static UUID fromSequenceId(long insertedId) { + return new UUID(insertedId, UUIDs.timeBased().getLeastSignificantBits()); + } } diff --git a/src/main/java/com/spotify/reaper/storage/postgresql/UuidArgumentFactory.java b/src/main/java/com/spotify/reaper/storage/postgresql/UuidArgumentFactory.java new file mode 100644 index 000000000..80bb5616f --- /dev/null +++ b/src/main/java/com/spotify/reaper/storage/postgresql/UuidArgumentFactory.java @@ -0,0 +1,27 @@ +package com.spotify.reaper.storage.postgresql; + +import org.skife.jdbi.v2.StatementContext; +import org.skife.jdbi.v2.tweak.Argument; +import org.skife.jdbi.v2.tweak.ArgumentFactory; + +import java.util.UUID; + +/** + * Provides JDBI a method to map UUID value to a long (most sig bits) value in database. + */ +public final class UuidArgumentFactory implements ArgumentFactory { + + @Override + public boolean accepts(Class expectedType, Object value, StatementContext ctx) { + return value instanceof UUID; + } + + @Override + public Argument build(Class expectedType, final UUID value, StatementContext ctx) { + return (pos, stmt, sc) -> stmt.setLong(pos, toSequenceId(value)); + } + + private static long toSequenceId(UUID id) { + return id.getMostSignificantBits(); + } +} diff --git a/src/main/resources/db/cassandra/003_switch_to_uuids.cql b/src/main/resources/db/cassandra/003_switch_to_uuids.cql new file mode 100644 index 000000000..104b0af9c --- /dev/null +++ b/src/main/resources/db/cassandra/003_switch_to_uuids.cql @@ -0,0 +1,112 @@ +-- +-- Cassandra schema for cassandra-reaper database + +-- CREATE KEYSPACE IF NOT EXISTS reaper_db WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor':3}; + +-- use reaper_db; + +ALTER TABLE cluster + WITH compaction = {'class': 'LeveledCompactionStrategy'} + AND caching = {'rows_per_partition': 'ALL'}; + +-- Repair unit is basically a keyspace with a set of column families. +-- Cassandra supports repairing multiple column families in one go. + +CREATE TABLE IF NOT EXISTS repair_unit_v1 ( + id timeuuid PRIMARY KEY, + cluster_name text, + keyspace_name text, + column_families set, + incremental_repair boolean +) + WITH compaction = {'class': 'LeveledCompactionStrategy'} + AND caching = {'rows_per_partition': 1}; + +DROP TABLE IF EXISTS repair_run; + +CREATE TABLE IF NOT EXISTS repair_run ( + id timeuuid, + cluster_name text STATIC, + repair_unit_id timeuuid STATIC, + cause text STATIC, + owner text STATIC, + state text STATIC, + creation_time timestamp STATIC, + start_time timestamp STATIC, + end_time timestamp STATIC, + pause_time timestamp STATIC, + intensity double STATIC, + last_event text STATIC, + segment_count int STATIC, + repair_parallelism text STATIC, + segment_id timeuuid, + start_token varint, + end_token varint, + segment_state int, + coordinator_host text, + segment_start_time timestamp, + segment_end_time timestamp, + fail_count int, + PRIMARY KEY (id, segment_id) +) + WITH compaction = {'class': 'LeveledCompactionStrategy'} + AND caching = {'rows_per_partition': 5000}; + + +DROP TABLE IF EXISTS repair_run_by_cluster; + +CREATE TABLE IF NOT EXISTS repair_run_by_cluster ( + cluster_name text, + id timeuuid, + PRIMARY KEY(cluster_name, id) +) + WITH compaction = {'class': 'LeveledCompactionStrategy'} + AND caching = {'rows_per_partition': 'ALL'}; + + +DROP TABLE IF EXISTS repair_run_by_unit; + +CREATE TABLE IF NOT EXISTS repair_run_by_unit ( + repair_unit_id timeuuid, + id timeuuid, + PRIMARY KEY(repair_unit_id, id) +) + WITH compaction = {'class': 'LeveledCompactionStrategy'} + AND caching = {'rows_per_partition': 'ALL'}; + + + + +CREATE TABLE IF NOT EXISTS repair_schedule_v1 ( + id timeuuid PRIMARY KEY, + repair_unit_id timeuuid, + state text , + days_between int , + next_activation timestamp, + run_history set, + segment_count int , + repair_parallelism text , + intensity double , + creation_time timestamp, + owner text , + pause_time timestamp +) + WITH compaction = {'class': 'LeveledCompactionStrategy'} + AND caching = {'rows_per_partition': 1}; + + +DROP TABLE IF EXISTS repair_schedule_by_cluster_and_keyspace; + +CREATE TABLE IF NOT EXISTS repair_schedule_by_cluster_and_keyspace ( + cluster_name text, + keyspace_name text, + repair_schedule_id timeuuid, + PRIMARY KEY((cluster_name, keyspace_name), repair_schedule_id) +) + WITH compaction = {'class': 'LeveledCompactionStrategy'} + AND caching = {'rows_per_partition': 10}; + + +DROP TABLE IF EXISTS repair_segment; +DROP TABLE IF EXISTS repair_segment_by_run_id; +DROP TABLE IF EXISTS repair_id; \ No newline at end of file diff --git a/src/test/java/com/spotify/reaper/AssertionTest.java b/src/test/java/com/spotify/reaper/AssertionTest.java new file mode 100644 index 000000000..9dd5a36d8 --- /dev/null +++ b/src/test/java/com/spotify/reaper/AssertionTest.java @@ -0,0 +1,22 @@ + +package com.spotify.reaper; + +import org.junit.Test; + + +public final class AssertionTest{ + + @Test + public void test_assertions_enabled(){ + boolean asserted = false; + try{ + assert false; + }catch (AssertionError error){ + asserted = true; + } + if (!asserted){ + throw new AssertionError("assertions are not enabled"); + } + } + +} \ No newline at end of file diff --git a/src/test/java/com/spotify/reaper/acceptance/ReaperCassandraIT.java b/src/test/java/com/spotify/reaper/acceptance/ReaperCassandraIT.java index cc087d973..8013c06ca 100644 --- a/src/test/java/com/spotify/reaper/acceptance/ReaperCassandraIT.java +++ b/src/test/java/com/spotify/reaper/acceptance/ReaperCassandraIT.java @@ -57,7 +57,14 @@ public static void setUp() throws Exception { public static void initSchema() throws IOException{ Cluster cluster = Cluster.builder().addContactPoint("127.0.0.1").build(); Session tmpSession = cluster.connect(); - tmpSession.execute("DROP KEYSPACE IF EXISTS reaper_db"); + while(true){ + try{ + tmpSession.execute("DROP KEYSPACE IF EXISTS reaper_db"); + break; + }catch(Exception ex){ + LOG.warn("error dropping keyspace", ex); + } + } tmpSession.execute("CREATE KEYSPACE IF NOT EXISTS reaper_db WITH replication = {'class':'SimpleStrategy', 'replication_factor':1}"); tmpSession.close(); } diff --git a/src/test/java/com/spotify/reaper/acceptance/TestContext.java b/src/test/java/com/spotify/reaper/acceptance/TestContext.java index bccf18520..11f9943e9 100644 --- a/src/test/java/com/spotify/reaper/acceptance/TestContext.java +++ b/src/test/java/com/spotify/reaper/acceptance/TestContext.java @@ -3,6 +3,7 @@ import java.util.HashMap; import java.util.Map; import java.util.Set; +import java.util.UUID; /** * Helper class for holding acceptance test scenario state. @@ -15,7 +16,7 @@ public class TestContext { public static String TEST_CLUSTER; /* Used for targeting an object accessed in last test step. */ - public static Long LAST_MODIFIED_ID; + public static UUID LAST_MODIFIED_ID; /* Testing cluster seed host mapped to cluster name. */ public static Map TEST_CLUSTER_SEED_HOSTS = new HashMap<>(); diff --git a/src/test/java/com/spotify/reaper/resources/view/RepairScheduleStatusTest.java b/src/test/java/com/spotify/reaper/resources/view/RepairScheduleStatusTest.java index 66e7a1b53..b4dfb80bf 100644 --- a/src/test/java/com/spotify/reaper/resources/view/RepairScheduleStatusTest.java +++ b/src/test/java/com/spotify/reaper/resources/view/RepairScheduleStatusTest.java @@ -1,5 +1,6 @@ package com.spotify.reaper.resources.view; +import com.datastax.driver.core.utils.UUIDs; import com.google.common.collect.Lists; import com.fasterxml.jackson.core.type.TypeReference; @@ -26,7 +27,7 @@ public void testJacksonJSONParsing() throws Exception { data.setColumnFamilies(Lists.newArrayList()); data.setCreationTime(DateTime.now().withMillis(0)); data.setDaysBetween(2); - data.setId(1); + data.setId(UUIDs.timeBased()); data.setIntensity(0.75); data.setIncrementalRepair(false); data.setKeyspaceName("testKeyspace"); diff --git a/src/test/java/com/spotify/reaper/unit/resources/RepairRunResourceTest.java b/src/test/java/com/spotify/reaper/unit/resources/RepairRunResourceTest.java index aa02fcf75..bcc141992 100644 --- a/src/test/java/com/spotify/reaper/unit/resources/RepairRunResourceTest.java +++ b/src/test/java/com/spotify/reaper/unit/resources/RepairRunResourceTest.java @@ -1,5 +1,6 @@ package com.spotify.reaper.unit.resources; +import com.datastax.driver.core.utils.UUIDs; import com.google.common.base.Optional; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -30,6 +31,7 @@ import java.util.Collections; import java.util.List; import java.util.Set; +import java.util.UUID; import java.util.concurrent.TimeUnit; import javax.ws.rs.core.Response; @@ -149,7 +151,7 @@ public void testAddRepairRun() throws Exception { assertEquals(1, context.storage.getClusters().size()); assertEquals(1, context.storage.getRepairRunsForCluster(CLUSTER_NAME).size()); assertEquals(1, context.storage.getRepairRunIdsForCluster(CLUSTER_NAME).size()); - Long runId = context.storage.getRepairRunIdsForCluster(CLUSTER_NAME).iterator().next(); + UUID runId = context.storage.getRepairRunIdsForCluster(CLUSTER_NAME).iterator().next(); RepairRun run = context.storage.getRepairRun(runId).get(); assertEquals(RepairRun.RunState.NOT_STARTED, run.getRunState()); assertEquals(TIME_CREATE, run.getCreationTime().getMillis()); @@ -175,7 +177,7 @@ public void testAddRepairRun() throws Exception { public void testTriggerNotExistingRun() throws ReaperException { RepairRunResource resource = new RepairRunResource(context); Optional newState = Optional.of(RepairRun.RunState.RUNNING.toString()); - Response response = resource.modifyRunState(uriInfo, 42l, newState); + Response response = resource.modifyRunState(uriInfo, UUIDs.timeBased(), newState); assertEquals(Response.Status.NOT_FOUND.getStatusCode(), response.getStatus()); assertTrue(response.getEntity() instanceof String); assertTrue(response.getEntity().toString().contains("not found")); @@ -189,7 +191,7 @@ public void testTriggerAlreadyRunningRun() throws InterruptedException, ReaperEx RepairRunResource resource = new RepairRunResource(context); Response response = addDefaultRepairRun(resource); RepairRunStatus repairRunStatus = (RepairRunStatus) response.getEntity(); - long runId = repairRunStatus.getId(); + UUID runId = repairRunStatus.getId(); DateTimeUtils.setCurrentMillisFixed(TIME_START); Optional newState = Optional.of(RepairRun.RunState.RUNNING.toString()); @@ -207,7 +209,7 @@ public void testTriggerNewRunAlreadyRunningRun() throws InterruptedException, Re RepairRunResource resource = new RepairRunResource(context); Response response = addDefaultRepairRun(resource); RepairRunStatus repairRunStatus = (RepairRunStatus) response.getEntity(); - long runId = repairRunStatus.getId(); + UUID runId = repairRunStatus.getId(); DateTimeUtils.setCurrentMillisFixed(TIME_START); Optional newState = Optional.of(RepairRun.RunState.RUNNING.toString()); @@ -220,7 +222,7 @@ public void testTriggerNewRunAlreadyRunningRun() throws InterruptedException, Re RepairRunResource newResource = new RepairRunResource(context); Response newResponse = addDefaultRepairRun(newResource); RepairRunStatus newRepairRunStatus = (RepairRunStatus) newResponse.getEntity(); - long newRunId = newRepairRunStatus.getId(); + UUID newRunId = newRepairRunStatus.getId(); DateTimeUtils.setCurrentMillisFixed(TIME_START); Optional newRunState = Optional.of(RepairRun.RunState.RUNNING.toString()); @@ -268,7 +270,7 @@ public void testPauseNotRunningRun() throws InterruptedException, ReaperExceptio RepairRunResource resource = new RepairRunResource(context); Response response = addDefaultRepairRun(resource); RepairRunStatus repairRunStatus = (RepairRunStatus) response.getEntity(); - long runId = repairRunStatus.getId(); + UUID runId = repairRunStatus.getId(); response = resource.modifyRunState(uriInfo, runId, Optional.of(RepairRun.RunState.PAUSED.toString())); @@ -287,7 +289,7 @@ public void testPauseNotRunningRun() throws InterruptedException, ReaperExceptio @Test public void testPauseNotExistingRun() throws InterruptedException, ReaperException { RepairRunResource resource = new RepairRunResource(context); - Response response = resource.modifyRunState(uriInfo, 42l, + Response response = resource.modifyRunState(uriInfo, UUIDs.timeBased(), Optional.of(RepairRun.RunState.PAUSED.toString())); assertEquals(Response.Status.NOT_FOUND.getStatusCode(), response.getStatus()); assertEquals(0, context.storage.getRepairRunsWithState(RepairRun.RunState.RUNNING).size()); diff --git a/src/test/java/com/spotify/reaper/unit/service/RepairRunnerTest.java b/src/test/java/com/spotify/reaper/unit/service/RepairRunnerTest.java index 2202e431d..3205a4af0 100644 --- a/src/test/java/com/spotify/reaper/unit/service/RepairRunnerTest.java +++ b/src/test/java/com/spotify/reaper/unit/service/RepairRunnerTest.java @@ -50,6 +50,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.UUID; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; @@ -87,15 +88,12 @@ public void testHangingRepair() throws InterruptedException, ReaperException { storage.addRepairUnit(new RepairUnit.Builder(CLUSTER_NAME, KS_NAME, CF_NAMES, INCREMENTAL_REPAIR)); DateTimeUtils.setCurrentMillisFixed(TIME_RUN); RepairRun run = storage.addRepairRun( - new RepairRun.Builder(CLUSTER_NAME, cf.getId(), DateTime.now(), INTENSITY, 1, - RepairParallelism.PARALLEL)); - storage.addRepairSegments(Collections.singleton( - new RepairSegment.Builder(run.getId(), new RingRange(BigInteger.ZERO, BigInteger.ONE), - cf.getId())), run.getId()); - final long RUN_ID = run.getId(); - final long SEGMENT_ID = storage.getNextFreeSegment(run.getId()).get().getId(); - - assertEquals(storage.getRepairSegment(SEGMENT_ID).get().getState(), + new RepairRun.Builder(CLUSTER_NAME, cf.getId(), DateTime.now(), INTENSITY, 1, RepairParallelism.PARALLEL), + Collections.singleton(new RepairSegment.Builder(new RingRange(BigInteger.ZERO, BigInteger.ONE), cf.getId()))); + final UUID RUN_ID = run.getId(); + final UUID SEGMENT_ID = storage.getNextFreeSegment(run.getId()).get().getId(); + + assertEquals(storage.getRepairSegment(RUN_ID, SEGMENT_ID).get().getState(), RepairSegment.State.NOT_STARTED); AppContext context = new AppContext(); context.storage = storage; @@ -123,7 +121,7 @@ public JmxProxy connect(final Optional handler, String host @Override public Integer answer(InvocationOnMock invocation) throws Throwable { assertEquals(RepairSegment.State.NOT_STARTED, - storage.getRepairSegment(SEGMENT_ID).get().getState()); + storage.getRepairSegment(RUN_ID, SEGMENT_ID).get().getState()); final int repairNumber = repairAttempts.getAndIncrement(); switch (repairNumber) { @@ -134,7 +132,7 @@ public void run() { handler.get() .handle(repairNumber, Optional.of(ActiveRepairService.Status.STARTED), Optional.absent(), null); assertEquals(RepairSegment.State.RUNNING, - storage.getRepairSegment(SEGMENT_ID).get().getState()); + storage.getRepairSegment(RUN_ID, SEGMENT_ID).get().getState()); } }.start(); break; @@ -145,11 +143,11 @@ public void run() { handler.get() .handle(repairNumber, Optional.of(ActiveRepairService.Status.STARTED), Optional.absent(), null); assertEquals(RepairSegment.State.RUNNING, - storage.getRepairSegment(SEGMENT_ID).get().getState()); + storage.getRepairSegment(RUN_ID, SEGMENT_ID).get().getState()); handler.get() .handle(repairNumber, Optional.of(ActiveRepairService.Status.SESSION_SUCCESS), Optional.absent(), null); assertEquals(RepairSegment.State.DONE, - storage.getRepairSegment(SEGMENT_ID).get().getState()); + storage.getRepairSegment(RUN_ID, SEGMENT_ID).get().getState()); handler.get() .handle(repairNumber, Optional.of(ActiveRepairService.Status.FINISHED), Optional.absent(), null); mutex.release(); @@ -193,15 +191,12 @@ public void testHangingRepairNewAPI() throws InterruptedException, ReaperExcepti storage.addRepairUnit(new RepairUnit.Builder(CLUSTER_NAME, KS_NAME, CF_NAMES, INCREMENTAL_REPAIR)); DateTimeUtils.setCurrentMillisFixed(TIME_RUN); RepairRun run = storage.addRepairRun( - new RepairRun.Builder(CLUSTER_NAME, cf.getId(), DateTime.now(), INTENSITY, 1, - RepairParallelism.PARALLEL)); - storage.addRepairSegments(Collections.singleton( - new RepairSegment.Builder(run.getId(), new RingRange(BigInteger.ZERO, BigInteger.ONE), - cf.getId())), run.getId()); - final long RUN_ID = run.getId(); - final long SEGMENT_ID = storage.getNextFreeSegment(run.getId()).get().getId(); - - assertEquals(storage.getRepairSegment(SEGMENT_ID).get().getState(), + new RepairRun.Builder(CLUSTER_NAME, cf.getId(), DateTime.now(), INTENSITY, 1, RepairParallelism.PARALLEL), + Collections.singleton(new RepairSegment.Builder(new RingRange(BigInteger.ZERO, BigInteger.ONE), cf.getId()))); + final UUID RUN_ID = run.getId(); + final UUID SEGMENT_ID = storage.getNextFreeSegment(run.getId()).get().getId(); + + assertEquals(storage.getRepairSegment(RUN_ID, SEGMENT_ID).get().getState(), RepairSegment.State.NOT_STARTED); AppContext context = new AppContext(); context.storage = storage; @@ -230,7 +225,7 @@ public JmxProxy connect(final Optional handler, String host @Override public Integer answer(InvocationOnMock invocation) throws Throwable { assertEquals(RepairSegment.State.NOT_STARTED, - storage.getRepairSegment(SEGMENT_ID).get().getState()); + storage.getRepairSegment(RUN_ID, SEGMENT_ID).get().getState()); final int repairNumber = repairAttempts.getAndIncrement(); switch (repairNumber) { @@ -241,7 +236,7 @@ public void run() { handler.get() .handle(repairNumber, Optional.absent(), Optional.of(ProgressEventType.START), null); assertEquals(RepairSegment.State.RUNNING, - storage.getRepairSegment(SEGMENT_ID).get().getState()); + storage.getRepairSegment(RUN_ID, SEGMENT_ID).get().getState()); } }.start(); break; @@ -252,11 +247,11 @@ public void run() { handler.get() .handle(repairNumber, Optional.absent(), Optional.of(ProgressEventType.START), null); assertEquals(RepairSegment.State.RUNNING, - storage.getRepairSegment(SEGMENT_ID).get().getState()); + storage.getRepairSegment(RUN_ID, SEGMENT_ID).get().getState()); handler.get() .handle(repairNumber, Optional.absent(), Optional.of(ProgressEventType.SUCCESS), null); assertEquals(RepairSegment.State.DONE, - storage.getRepairSegment(SEGMENT_ID).get().getState()); + storage.getRepairSegment(RUN_ID, SEGMENT_ID).get().getState()); handler.get() .handle(repairNumber, Optional.absent(), Optional.of(ProgressEventType.COMPLETE), null); mutex.release(); @@ -282,7 +277,7 @@ public void run() { Thread.sleep(100); assertEquals(RepairRun.RunState.DONE, storage.getRepairRun(RUN_ID).get().getRunState()); } - + @Test public void testResumeRepair() throws InterruptedException, ReaperException { final String CLUSTER_NAME = "reaper"; @@ -298,24 +293,22 @@ public void testResumeRepair() throws InterruptedException, ReaperException { context.repairManager = new RepairManager(); storage.addCluster(new Cluster(CLUSTER_NAME, null, Collections.singleton(null))); - long cf = storage.addRepairUnit( + UUID cf = storage.addRepairUnit( new RepairUnit.Builder(CLUSTER_NAME, KS_NAME, CF_NAMES, INCREMENTAL_REPAIR)).getId(); DateTimeUtils.setCurrentMillisFixed(TIME_RUN); RepairRun run = storage.addRepairRun( - new RepairRun.Builder(CLUSTER_NAME, cf, DateTime.now(), INTENSITY, 1, - RepairParallelism.PARALLEL)); - storage.addRepairSegments(Lists.newArrayList( - new RepairSegment.Builder(run.getId(), new RingRange(BigInteger.ZERO, BigInteger.ONE), cf) - .state(RepairSegment.State.RUNNING).startTime(DateTime.now()).coordinatorHost("reaper") - .repairCommandId(1337), - new RepairSegment.Builder(run.getId(), new RingRange(BigInteger.ONE, BigInteger.ZERO), cf) - ), run.getId()); - final long RUN_ID = run.getId(); - final long SEGMENT_ID = storage.getNextFreeSegment(run.getId()).get().getId(); + new RepairRun.Builder(CLUSTER_NAME, cf, DateTime.now(), INTENSITY, 1, RepairParallelism.PARALLEL), + Lists.newArrayList( + new RepairSegment.Builder(new RingRange(BigInteger.ZERO, BigInteger.ONE), cf) + .state(RepairSegment.State.RUNNING).startTime(DateTime.now()).coordinatorHost("reaper") + .repairCommandId(1337), + new RepairSegment.Builder(new RingRange(BigInteger.ONE, BigInteger.ZERO), cf))); + final UUID RUN_ID = run.getId(); + final UUID SEGMENT_ID = storage.getNextFreeSegment(run.getId()).get().getId(); context.repairManager.initializeThreadPool(1, 500, TimeUnit.MILLISECONDS, 1, TimeUnit.MILLISECONDS); - assertEquals(storage.getRepairSegment(SEGMENT_ID).get().getState(), + assertEquals(storage.getRepairSegment(RUN_ID, SEGMENT_ID).get().getState(), RepairSegment.State.NOT_STARTED); context.jmxConnectionFactory = new JmxConnectionFactory() { @Override @@ -334,7 +327,7 @@ public JmxProxy connect(final Optional handler, String host @Override public Integer answer(InvocationOnMock invocation) throws Throwable { assertEquals(RepairSegment.State.NOT_STARTED, - storage.getRepairSegment(SEGMENT_ID).get().getState()); + storage.getRepairSegment(RUN_ID, SEGMENT_ID).get().getState()); new Thread() { @Override public void run() { @@ -362,10 +355,12 @@ public void run() { @Test public void getPossibleParallelRepairsTest() throws Exception { Map, List> map = RepairRunnerTest.threeNodeCluster(); - assertEquals(1, RepairRunner.getPossibleParallelRepairsCount(map)); + Map endpointsThreeNodes = RepairRunnerTest.threeNodeClusterEndpoint(); + assertEquals(1, RepairRunner.getPossibleParallelRepairsCount(map, endpointsThreeNodes)); map = RepairRunnerTest.sixNodeCluster(); - assertEquals(2, RepairRunner.getPossibleParallelRepairsCount(map)); + Map endpointsSixNodes = RepairRunnerTest.sixNodeClusterEndpoint(); + assertEquals(2, RepairRunner.getPossibleParallelRepairsCount(map, endpointsSixNodes)); } @Test @@ -384,8 +379,37 @@ public BigInteger apply(String s) { List segments = generator.generateSegments(32, tokens, Boolean.FALSE); Map, List> map = RepairRunnerTest.sixNodeCluster(); + Map endpointsSixNodes = RepairRunnerTest.sixNodeClusterEndpoint(); + List ranges = RepairRunner.getParallelRanges( + RepairRunner.getPossibleParallelRepairsCount(map, endpointsSixNodes), + segments + ); + assertEquals(2, ranges.size()); + assertEquals( "0", ranges.get(0).getStart().toString()); + assertEquals("150", ranges.get(0).getEnd().toString()); + assertEquals("150", ranges.get(1).getStart().toString()); + assertEquals( "0", ranges.get(1).getEnd().toString()); + } + + @Test + public void getParallelSegmentsTest2() throws ReaperException { + List tokens = Lists.transform( + Lists.newArrayList("0", "25", "50", "75", "100", "125", "150", "175", "200", "225", "250"), + new Function() { + @Nullable + @Override + public BigInteger apply(String s) { + return new BigInteger(s); + } + } + ); + SegmentGenerator generator = new SegmentGenerator(new BigInteger("0"), new BigInteger("299")); + List segments = generator.generateSegments(32, tokens, Boolean.FALSE); + + Map, List> map = RepairRunnerTest.sixNodeCluster(); + Map endpointsSixNodes = RepairRunnerTest.sixNodeClusterEndpoint(); List ranges = RepairRunner.getParallelRanges( - RepairRunner.getPossibleParallelRepairsCount(map), + RepairRunner.getPossibleParallelRepairsCount(map, endpointsSixNodes), segments ); assertEquals(2, ranges.size()); @@ -414,6 +438,26 @@ public static Map, List> sixNodeCluster() { return map; } + public static Map threeNodeClusterEndpoint() { + Map map = Maps.newHashMap(); + map.put("host1", "hostId1"); + map.put("host2", "hostId2"); + map.put("host3", "hostId3"); + return map; + } + + public static Map sixNodeClusterEndpoint() { + Map map = Maps.newHashMap(); + map.put("host1", "hostId1"); + map.put("host2", "hostId2"); + map.put("host3", "hostId3"); + map.put("host4", "hostId4"); + map.put("host5", "hostId5"); + map.put("host6", "hostId6"); + return map; + } + + private static Map, List> addRangeToMap(Map, List> map, String rStart, String rEnd, String... hosts) { List range = Lists.newArrayList(rStart, rEnd); diff --git a/src/test/java/com/spotify/reaper/unit/service/SegmentRunnerTest.java b/src/test/java/com/spotify/reaper/unit/service/SegmentRunnerTest.java index e8826d64d..e5bf8d10c 100644 --- a/src/test/java/com/spotify/reaper/unit/service/SegmentRunnerTest.java +++ b/src/test/java/com/spotify/reaper/unit/service/SegmentRunnerTest.java @@ -43,6 +43,7 @@ import java.math.BigInteger; import java.util.Collections; +import java.util.UUID; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -70,12 +71,11 @@ public void timeoutTest() throws InterruptedException, ReaperException, Executio RepairUnit cf = context.storage.addRepairUnit( new RepairUnit.Builder("reaper", "reaper", Sets.newHashSet("reaper"), false)); RepairRun run = context.storage.addRepairRun( - new RepairRun.Builder("reaper", cf.getId(), DateTime.now(), 0.5, 1, - RepairParallelism.PARALLEL)); - context.storage.addRepairSegments(Collections.singleton( - new RepairSegment.Builder(run.getId(), new RingRange(BigInteger.ONE, BigInteger.ZERO), - cf.getId())), run.getId()); - final long segmentId = context.storage.getNextFreeSegment(run.getId()).get().getId(); + new RepairRun.Builder("reaper", cf.getId(), DateTime.now(), 0.5, 1, RepairParallelism.PARALLEL), + Collections.singleton(new RepairSegment.Builder(new RingRange(BigInteger.ONE, BigInteger.ZERO), cf.getId()))); + + final UUID runId = run.getId(); + final UUID segmentId = context.storage.getNextFreeSegment(run.getId()).get().getId(); final ExecutorService executor = Executors.newSingleThreadExecutor(); final MutableObject> future = new MutableObject<>(); @@ -94,14 +94,14 @@ public JmxProxy connect(final Optional handler, String host @Override public Integer answer(InvocationOnMock invocation) { assertEquals(RepairSegment.State.NOT_STARTED, - context.storage.getRepairSegment(segmentId).get().getState()); + context.storage.getRepairSegment(runId, segmentId).get().getState()); future.setValue(executor.submit(new Thread() { @Override public void run() { handler.get().handle(1, Optional.of(ActiveRepairService.Status.STARTED), Optional.absent(), "Repair command 1 has started"); assertEquals(RepairSegment.State.RUNNING, - context.storage.getRepairSegment(segmentId).get().getState()); + context.storage.getRepairSegment(runId, segmentId).get().getState()); } })); return 1; @@ -121,8 +121,8 @@ public void run() { executor.shutdown(); assertEquals(RepairSegment.State.NOT_STARTED, - context.storage.getRepairSegment(segmentId).get().getState()); - assertEquals(1, context.storage.getRepairSegment(segmentId).get().getFailCount()); + context.storage.getRepairSegment(runId, segmentId).get().getState()); + assertEquals(1, context.storage.getRepairSegment(runId, segmentId).get().getFailCount()); } @Test @@ -131,12 +131,10 @@ public void successTest() throws InterruptedException, ReaperException, Executio RepairUnit cf = storage.addRepairUnit( new RepairUnit.Builder("reaper", "reaper", Sets.newHashSet("reaper"), false)); RepairRun run = storage.addRepairRun( - new RepairRun.Builder("reaper", cf.getId(), DateTime.now(), 0.5, 1, - RepairParallelism.PARALLEL)); - storage.addRepairSegments(Collections.singleton( - new RepairSegment.Builder(run.getId(), new RingRange(BigInteger.ONE, BigInteger.ZERO), - cf.getId())), run.getId()); - final long segmentId = storage.getNextFreeSegment(run.getId()).get().getId(); + new RepairRun.Builder("reaper", cf.getId(), DateTime.now(), 0.5, 1, RepairParallelism.PARALLEL), + Collections.singleton(new RepairSegment.Builder(new RingRange(BigInteger.ONE, BigInteger.ZERO), cf.getId()))); + final UUID runId = run.getId(); + final UUID segmentId = storage.getNextFreeSegment(run.getId()).get().getId(); final ExecutorService executor = Executors.newSingleThreadExecutor(); final MutableObject> future = new MutableObject<>(); @@ -157,25 +155,25 @@ public JmxProxy connect(final Optional handler, String host @Override public Integer answer(InvocationOnMock invocation) { assertEquals(RepairSegment.State.NOT_STARTED, - storage.getRepairSegment(segmentId).get().getState()); + storage.getRepairSegment(runId, segmentId).get().getState()); future.setValue(executor.submit(new Runnable() { @Override public void run() { handler.get().handle(1, Optional.of(ActiveRepairService.Status.STARTED), Optional.absent(), "Repair command 1 has started"); assertEquals(RepairSegment.State.RUNNING, - storage.getRepairSegment(segmentId).get().getState()); + storage.getRepairSegment(runId, segmentId).get().getState()); // report about an unrelated repair. Shouldn't affect anything. handler.get().handle(2, Optional.of(ActiveRepairService.Status.SESSION_FAILED), Optional.absent(), "Repair command 2 has failed"); handler.get().handle(1, Optional.of(ActiveRepairService.Status.SESSION_SUCCESS), Optional.absent(), "Repair session succeeded in command 1"); assertEquals(RepairSegment.State.DONE, - storage.getRepairSegment(segmentId).get().getState()); + storage.getRepairSegment(runId, segmentId).get().getState()); handler.get().handle(1, Optional.of(ActiveRepairService.Status.FINISHED), Optional.absent(), "Repair command 1 has finished"); assertEquals(RepairSegment.State.DONE, - storage.getRepairSegment(segmentId).get().getState()); + storage.getRepairSegment(runId, segmentId).get().getState()); } })); return 1; @@ -194,8 +192,8 @@ public void run() { future.getValue().get(); executor.shutdown(); - assertEquals(RepairSegment.State.DONE, storage.getRepairSegment(segmentId).get().getState()); - assertEquals(0, storage.getRepairSegment(segmentId).get().getFailCount()); + assertEquals(RepairSegment.State.DONE, storage.getRepairSegment(runId, segmentId).get().getState()); + assertEquals(0, storage.getRepairSegment(runId, segmentId).get().getFailCount()); } @Test @@ -205,12 +203,10 @@ public void failureTest() throws InterruptedException, ReaperException, Executio storage.addRepairUnit( new RepairUnit.Builder("reaper", "reaper", Sets.newHashSet("reaper"), false)); RepairRun run = storage.addRepairRun( - new RepairRun.Builder("reaper", cf.getId(), DateTime.now(), 0.5, 1, - RepairParallelism.PARALLEL)); - storage.addRepairSegments(Collections.singleton( - new RepairSegment.Builder(run.getId(), new RingRange(BigInteger.ONE, BigInteger.ZERO), - cf.getId())), run.getId()); - final long segmentId = storage.getNextFreeSegment(run.getId()).get().getId(); + new RepairRun.Builder("reaper", cf.getId(), DateTime.now(), 0.5, 1, RepairParallelism.PARALLEL), + Collections.singleton(new RepairSegment.Builder(new RingRange(BigInteger.ONE, BigInteger.ZERO), cf.getId()))); + final UUID runId = run.getId(); + final UUID segmentId = storage.getNextFreeSegment(run.getId()).get().getId(); final ExecutorService executor = Executors.newSingleThreadExecutor(); final MutableObject> future = new MutableObject<>(); @@ -231,22 +227,22 @@ public JmxProxy connect(final Optional handler, String host @Override public Integer answer(InvocationOnMock invocation) { assertEquals(RepairSegment.State.NOT_STARTED, - storage.getRepairSegment(segmentId).get().getState()); + storage.getRepairSegment(runId, segmentId).get().getState()); future.setValue(executor.submit(new Runnable() { @Override public void run() { handler.get().handle(1, Optional.of(ActiveRepairService.Status.STARTED), Optional.absent(), "Repair command 1 has started"); assertEquals(RepairSegment.State.RUNNING, - storage.getRepairSegment(segmentId).get().getState()); + storage.getRepairSegment(runId, segmentId).get().getState()); handler.get().handle(1, Optional.of(ActiveRepairService.Status.SESSION_FAILED), Optional.absent(), "Repair command 1 has failed"); assertEquals(RepairSegment.State.NOT_STARTED, - storage.getRepairSegment(segmentId).get().getState()); + storage.getRepairSegment(runId, segmentId).get().getState()); handler.get().handle(1, Optional.of(ActiveRepairService.Status.FINISHED), Optional.absent(), "Repair command 1 has finished"); assertEquals(RepairSegment.State.NOT_STARTED, - storage.getRepairSegment(segmentId).get().getState()); + storage.getRepairSegment(runId, segmentId).get().getState()); } })); @@ -267,8 +263,8 @@ public void run() { executor.shutdown(); assertEquals(RepairSegment.State.NOT_STARTED, - storage.getRepairSegment(segmentId).get().getState()); - assertEquals(1, storage.getRepairSegment(segmentId).get().getFailCount()); + storage.getRepairSegment(runId, segmentId).get().getState()); + assertEquals(1, storage.getRepairSegment(runId, segmentId).get().getFailCount()); } @Test diff --git a/src/test/resources/cassandra-reaper-cassandra-at.yaml b/src/test/resources/cassandra-reaper-cassandra-at.yaml index 4b9510cb1..bb04f9c8c 100644 --- a/src/test/resources/cassandra-reaper-cassandra-at.yaml +++ b/src/test/resources/cassandra-reaper-cassandra-at.yaml @@ -14,6 +14,7 @@ logging: loggers: io.dropwizard: INFO org.eclipse.jetty: INFO + com.datastax.driver.core.QueryLogger.NORMAL: INFO appenders: - type: console