Skip to content

Commit 68b612f

Browse files
committed
More cleanup
1 parent 1b75090 commit 68b612f

File tree

7 files changed

+223
-197
lines changed

7 files changed

+223
-197
lines changed

server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,7 @@ private CloseableIterator<DataSegmentPlus> retrieveSegmentBatchById(
524524
} else {
525525
final Query<Map<String, Object>> query = handle.createQuery(
526526
StringUtils.format(
527-
"SELECT payload, used, upgraded_from_segment_id, used_status_last_updated"
527+
"SELECT payload, used, upgraded_from_segment_id, used_status_last_updated, created_date"
528528
+ " FROM %s WHERE dataSource = :dataSource %s",
529529
dbTables.getSegmentsTable(), getParameterizedInConditionForColumn("id", segmentIds)
530530
)
@@ -538,7 +538,7 @@ private CloseableIterator<DataSegmentPlus> retrieveSegmentBatchById(
538538
.map(
539539
(index, r, ctx) -> new DataSegmentPlus(
540540
JacksonUtils.readValue(jsonMapper, r.getBytes(1), DataSegment.class),
541-
null,
541+
DateTimes.of(r.getString(5)),
542542
DateTimes.of(r.getString(4)),
543543
r.getBoolean(2),
544544
null,

server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemoryDatasourceSegmentCache.java

+121-102
Original file line numberDiff line numberDiff line change
@@ -96,24 +96,9 @@ boolean isEmpty()
9696
*/
9797
boolean shouldRefreshUsedSegment(SegmentId segmentId, @Nullable DateTime persistedUpdateTime)
9898
{
99-
return withReadLock(() -> {
100-
final DataSegmentPlus cachedState = readSegmentsFor(segmentId.getInterval())
101-
.idToUsedSegment.get(segmentId);
102-
return cachedState == null
103-
|| shouldUpdateCache(cachedState.getUsedStatusLastUpdatedDate(), persistedUpdateTime);
104-
});
105-
}
106-
107-
/**
108-
* Checks if a pending segment needs to be refreshed in the cache.
109-
*/
110-
boolean shouldRefreshPendingSegment(PendingSegmentRecord record)
111-
{
112-
final SegmentIdWithShardSpec segmentId = record.getId();
11399
return withReadLock(
114-
() -> !readSegmentsFor(segmentId.getInterval())
115-
.idToPendingSegment
116-
.containsKey(segmentId.toString())
100+
() -> readSegmentsFor(segmentId.getInterval())
101+
.shouldRefreshUsedSegment(segmentId, persistedUpdateTime)
117102
);
118103
}
119104

@@ -139,60 +124,6 @@ private static boolean shouldUpdateCache(
139124
}
140125
}
141126

142-
/**
143-
* Adds or updates the given segment in the cache.
144-
*
145-
* @return true if the segment was updated in the cache, false if the segment
146-
* was left unchanged in the cache.
147-
*/
148-
boolean addSegment(DataSegmentPlus segmentPlus)
149-
{
150-
if (Boolean.TRUE.equals(segmentPlus.getUsed())) {
151-
return addUsedSegment(segmentPlus);
152-
} else {
153-
return addUnusedSegmentId(
154-
segmentPlus.getDataSegment().getId(),
155-
segmentPlus.getUsedStatusLastUpdatedDate()
156-
);
157-
}
158-
}
159-
160-
/**
161-
* Adds or updates a used segment in the cache.
162-
*/
163-
private boolean addUsedSegment(DataSegmentPlus segmentPlus)
164-
{
165-
final DataSegment segment = segmentPlus.getDataSegment();
166-
final SegmentId segmentId = segment.getId();
167-
168-
return withWriteLock(() -> {
169-
if (!shouldRefreshUsedSegment(segmentId, segmentPlus.getUsedStatusLastUpdatedDate())) {
170-
return false;
171-
}
172-
173-
final SegmentsInInterval segments = writeSegmentsFor(segmentId.getInterval());
174-
segments.idToUsedSegment.put(segmentId, segmentPlus);
175-
segments.unusedSegmentIdToUpdatedTime.remove(segment.getId());
176-
return true;
177-
});
178-
}
179-
180-
/**
181-
* Adds or updates an unused segment in the cache.
182-
*
183-
* @param updatedTime Last updated time of this segment as persisted in the
184-
* metadata store. This value can be null for segments
185-
* persisted to the metadata store before the column
186-
* used_status_last_updated was added to the segments table.
187-
*/
188-
boolean addUnusedSegmentId(SegmentId segmentId, @Nullable DateTime updatedTime)
189-
{
190-
return withWriteLock(
191-
() -> writeSegmentsFor(segmentId.getInterval())
192-
.addUnusedSegmentId(segmentId, updatedTime)
193-
);
194-
}
195-
196127
/**
197128
* Atomically updates segment IDs in the cache based on the segments
198129
* currently present in the metadata store.
@@ -219,9 +150,7 @@ SegmentSyncResult syncSegmentIds(List<SegmentRecord> persistedSegments, DateTime
219150

220151
if (record.isUsed()) {
221152
// Refresh this used segment if it has been updated in the metadata store
222-
final DataSegmentPlus cachedState = intervalSegments.idToUsedSegment.get(segmentId);
223-
if (cachedState == null
224-
|| shouldUpdateCache(cachedState.getUsedStatusLastUpdatedDate(), record.getLastUpdatedTime())) {
153+
if (intervalSegments.shouldRefreshUsedSegment(segmentId, record.getLastUpdatedTime())) {
225154
usedSegmentIdsToRefresh.add(segmentId.toString());
226155
}
227156
} else {
@@ -241,14 +170,23 @@ SegmentSyncResult syncSegmentIds(List<SegmentRecord> persistedSegments, DateTime
241170
});
242171
}
243172

244-
SegmentSyncResult syncPendingSegments(List<PendingSegmentRecord> persistedPendingSegments, DateTime syncStartTime)
173+
/**
174+
* Atomically updates pending segments in the cache based on the segments
175+
* currently present in the metadata store.
176+
*
177+
* @param persistedPendingSegments All pending segments present in the metadata store.
178+
* @param syncStartTime Start time of the current sync
179+
* @return Summary of updates made to the cache.
180+
*/
181+
SegmentSyncResult syncPendingSegments(
182+
List<PendingSegmentRecord> persistedPendingSegments,
183+
DateTime syncStartTime
184+
)
245185
{
246186
return withWriteLock(() -> {
247187
int numSegmentsUpdated = 0;
248188
for (PendingSegmentRecord record : persistedPendingSegments) {
249-
final boolean updated = shouldRefreshPendingSegment(record)
250-
&& insertPendingSegment(record, false);
251-
if (updated) {
189+
if (insertPendingSegment(record, false)) {
252190
++numSegmentsUpdated;
253191
}
254192
}
@@ -264,7 +202,7 @@ SegmentSyncResult syncPendingSegments(List<PendingSegmentRecord> persistedPendin
264202
* Removes all pending segments which are present in the cache but not present
265203
* in the metadata store.
266204
*/
267-
int removeUnpersistedPendingSegments(Set<String> persistedPendingSegmentIds, DateTime pollStartTime)
205+
private int removeUnpersistedPendingSegments(Set<String> persistedPendingSegmentIds, DateTime pollStartTime)
268206
{
269207
return withWriteLock(() -> {
270208
final Set<String> unpersistedSegmentIds =
@@ -284,7 +222,7 @@ && shouldUpdateCache(record.getCreatedDate(), pollStartTime)
284222
* @param syncStartTime Start time of the current sync
285223
* @return Number of unpersisted segments removed from cache.
286224
*/
287-
int removeUnpersistedSegments(Set<SegmentId> persistedSegmentIds, DateTime syncStartTime)
225+
private int removeUnpersistedSegments(Set<SegmentId> persistedSegmentIds, DateTime syncStartTime)
288226
{
289227
return withWriteLock(() -> {
290228
final Set<SegmentId> unpersistedSegmentIds = new HashSet<>();
@@ -350,11 +288,17 @@ void markCacheSynced()
350288
});
351289
}
352290

291+
/**
292+
* Must be accessed within a {@link #withReadLock} method.
293+
*/
353294
private SegmentsInInterval readSegmentsFor(Interval interval)
354295
{
355296
return intervalToSegments.getOrDefault(interval, SegmentsInInterval.EMPTY);
356297
}
357298

299+
/**
300+
* Must be accessed within a {@link #withWriteLock} method.
301+
*/
358302
private SegmentsInInterval writeSegmentsFor(Interval interval)
359303
{
360304
return intervalToSegments.computeIfAbsent(interval, i -> new SegmentsInInterval());
@@ -535,7 +479,8 @@ public int insertSegments(Set<DataSegmentPlus> segments)
535479
return withWriteLock(() -> {
536480
int numInsertedSegments = 0;
537481
for (DataSegmentPlus segmentPlus : segments) {
538-
if (addSegment(segmentPlus)) {
482+
final Interval interval = segmentPlus.getDataSegment().getInterval();
483+
if (writeSegmentsFor(interval).addSegment(segmentPlus)) {
539484
++numInsertedSegments;
540485
}
541486
}
@@ -553,19 +498,22 @@ public int insertSegmentsWithMetadata(Set<DataSegmentPlus> segments)
553498
@Override
554499
public boolean markSegmentAsUnused(SegmentId segmentId, DateTime updateTime)
555500
{
556-
return addUnusedSegmentId(segmentId, updateTime);
501+
return writeSegmentsFor(segmentId.getInterval()).addUnusedSegmentId(segmentId, updateTime);
557502
}
558503

559504
@Override
560505
public int markSegmentsAsUnused(Set<SegmentId> segmentIds, DateTime updateTime)
561506
{
562-
int updatedCount = 0;
563-
for (SegmentId segmentId : segmentIds) {
564-
if (addUnusedSegmentId(segmentId, updateTime)) {
565-
++updatedCount;
507+
return withWriteLock(() -> {
508+
int updatedCount = 0;
509+
for (SegmentId segmentId : segmentIds) {
510+
final Interval interval = segmentId.getInterval();
511+
if (writeSegmentsFor(interval).addUnusedSegmentId(segmentId, updateTime)) {
512+
++updatedCount;
513+
}
566514
}
567-
}
568-
return updatedCount;
515+
return updatedCount;
516+
});
569517
}
570518

571519
@Override
@@ -577,29 +525,38 @@ public int markSegmentsWithinIntervalAsUnused(
577525
{
578526
final Set<String> eligibleVersions = versions == null ? null : Set.copyOf(versions);
579527

580-
int updatedCount = 0;
581-
for (DataSegmentPlus segment : findUsedSegmentsPlusOverlappingAnyOf(List.of(interval))) {
582-
// Update segments with eligible versions or all versions (if eligibleVersions is null)
583-
if ((eligibleVersions == null || eligibleVersions.contains(segment.getDataSegment().getVersion()))
584-
&& addUnusedSegmentId(segment.getDataSegment().getId(), updateTime)) {
585-
++updatedCount;
528+
return withWriteLock(() -> {
529+
int updatedCount = 0;
530+
for (DataSegmentPlus segmentPlus : findUsedSegmentsPlusOverlappingAnyOf(List.of(interval))) {
531+
// Update segments with eligible versions or all versions (if eligibleVersions is null)
532+
final DataSegment segment = segmentPlus.getDataSegment();
533+
final boolean isEligibleVersion = eligibleVersions == null
534+
|| eligibleVersions.contains(segment.getVersion());
535+
if (isEligibleVersion
536+
&& writeSegmentsFor(segment.getInterval()).addUnusedSegmentId(segment.getId(), updateTime)) {
537+
++updatedCount;
538+
}
586539
}
587-
}
588540

589-
return updatedCount;
541+
return updatedCount;
542+
});
590543
}
591544

592545
@Override
593546
public int markAllSegmentsAsUnused(DateTime updateTime)
594547
{
595-
int updatedCount = 0;
596-
for (DataSegmentPlus segment : findUsedSegmentsPlusOverlappingAnyOf(List.of())) {
597-
if (addUnusedSegmentId(segment.getDataSegment().getId(), updateTime)) {
598-
++updatedCount;
548+
return withWriteLock(() -> {
549+
int updatedCount = 0;
550+
for (DataSegmentPlus segmentPlus : findUsedSegmentsPlusOverlappingAnyOf(List.of())) {
551+
final DataSegment segment = segmentPlus.getDataSegment();
552+
if (writeSegmentsFor(segment.getInterval())
553+
.addUnusedSegmentId(segment.getId(), updateTime)) {
554+
++updatedCount;
555+
}
599556
}
600-
}
601557

602-
return updatedCount;
558+
return updatedCount;
559+
});
603560
}
604561

605562
@Override
@@ -786,6 +743,49 @@ private void updateMaxUnusedId(SegmentId segmentId)
786743
.merge(segmentId.getVersion(), segmentId.getPartitionNum(), Math::max);
787744
}
788745

746+
/**
747+
* Adds or updates the given segment in the cache.
748+
*
749+
* @return true if the segment was updated in the cache, false if the segment
750+
* was left unchanged in the cache.
751+
*/
752+
boolean addSegment(DataSegmentPlus segmentPlus)
753+
{
754+
if (Boolean.TRUE.equals(segmentPlus.getUsed())) {
755+
return addUsedSegment(segmentPlus);
756+
} else {
757+
return addUnusedSegmentId(
758+
segmentPlus.getDataSegment().getId(),
759+
segmentPlus.getUsedStatusLastUpdatedDate()
760+
);
761+
}
762+
}
763+
764+
/**
765+
* Adds or updates a used segment in the cache.
766+
*/
767+
private boolean addUsedSegment(DataSegmentPlus segmentPlus)
768+
{
769+
final DataSegment segment = segmentPlus.getDataSegment();
770+
final SegmentId segmentId = segment.getId();
771+
772+
if (!shouldRefreshUsedSegment(segmentId, segmentPlus.getUsedStatusLastUpdatedDate())) {
773+
return false;
774+
}
775+
776+
idToUsedSegment.put(segmentId, segmentPlus);
777+
unusedSegmentIdToUpdatedTime.remove(segment.getId());
778+
return true;
779+
}
780+
781+
/**
782+
* Adds or updates an unused segment in the cache.
783+
*
784+
* @param updatedTime Last updated time of this segment as persisted in the
785+
* metadata store. This value can be null for segments
786+
* persisted to the metadata store before the column
787+
* used_status_last_updated was added to the segments table.
788+
*/
789789
private boolean addUnusedSegmentId(SegmentId segmentId, @Nullable DateTime updatedTime)
790790
{
791791
idToUsedSegment.remove(segmentId);
@@ -799,5 +799,24 @@ private boolean addUnusedSegmentId(SegmentId segmentId, @Nullable DateTime updat
799799
return false;
800800
}
801801
}
802+
803+
private boolean shouldRefreshUnusedSegment(SegmentId segmentId, DateTime newUpdateTime)
804+
{
805+
return !unusedSegmentIdToUpdatedTime.containsKey(segmentId)
806+
|| shouldUpdateCache(unusedSegmentIdToUpdatedTime.get(segmentId), newUpdateTime);
807+
}
808+
809+
private boolean shouldRefreshUsedSegment(SegmentId segmentId, DateTime newUpdateTime)
810+
{
811+
final DataSegmentPlus usedSegment = idToUsedSegment.get(segmentId);
812+
813+
if (usedSegment == null) {
814+
// Do not refresh the segment if it has recently been marked as unused in the cache
815+
return shouldRefreshUnusedSegment(segmentId, newUpdateTime);
816+
} else {
817+
// Refresh the used segment if the entry in the cache is stale
818+
return shouldUpdateCache(usedSegment.getUsedStatusLastUpdatedDate(), newUpdateTime);
819+
}
820+
}
802821
}
803822
}

server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemorySegmentMetadataCache.java

+5-3
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@
6767
/**
6868
* In-memory implementation of {@link SegmentMetadataCache}.
6969
* <p>
70+
* Non-leader Overlords also keep polling the metadata store to keep the cache
71+
* up-to-date in case leadership changes.
72+
* <p>
7073
* The map {@link #datasourceToSegmentCache} contains the cache for each datasource.
7174
* Items are only added to this map and never removed. This is to avoid handling
7275
* race conditions where a thread has invoked {@link #getDatasource} but hasn't
@@ -606,9 +609,8 @@ private void updatePendingSegmentsInCache(
606609
}
607610

608611
/**
609-
* Retrieves all pending segments from metadata store and updates the cache if
610-
* {@link HeapMemoryDatasourceSegmentCache#shouldRefreshPendingSegment} is
611-
* true for it.
612+
* Retrieves all pending segments from metadata store and populates them in
613+
* the respective {@link DatasourceSegmentSummary}.
612614
*/
613615
private void retrieveAllPendingSegments(
614616
Map<String, DatasourceSegmentSummary> datasourceToSummary

server/src/main/java/org/apache/druid/metadata/segment/cache/Metric.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ private Metric()
5050
/**
5151
* Number of transactions performed on the cache for a datasource.
5252
*/
53-
public static final String TRANSACTION_COUNT = "transactions";
53+
public static final String TRANSACTION_COUNT = METRIC_NAME_PREFIX + "transactions";
5454

5555

5656
// CACHE SYNC TIME METRICS

server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentMetadataCache.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,12 @@
2020
package org.apache.druid.metadata.segment.cache;
2121

2222
/**
23-
* Cache for metadata of pending segments and committed segments.
23+
* Cache for metadata of pending segments and committed segments maintained by
24+
* the Overlord to improve performance of segment allocation and other task actions.
25+
* <p>
26+
* Not to be confused with {@link org.apache.druid.segment.metadata.AbstractSegmentMetadataCache}
27+
* which is used by Brokers to cache row signature, number of rows, etc. to aid
28+
* with Druid query performance.
2429
*/
2530
public interface SegmentMetadataCache
2631
{

0 commit comments

Comments
 (0)