Skip to content

Commit efd6347

Browse files
authored
DEVPROD-8937 Set a max number of intent hosts (#8820)
1 parent dca02b1 commit efd6347

File tree

3 files changed

+71
-0
lines changed

3 files changed

+71
-0
lines changed

model/host/host.go

+11
Original file line numberDiff line numberDiff line change
@@ -3136,6 +3136,17 @@ func CountSpawnhostsWithNoExpirationByUser(ctx context.Context, user string) (in
31363136
return Count(ctx, query)
31373137
}
31383138

3139+
// CountIntentHosts counts the number of intent hosts Evergreen will soon
3140+
// attempt to create.
3141+
func CountIntentHosts(ctx context.Context) (int, error) {
3142+
query := bson.M{
3143+
IdKey: bson.M{"$regex": "^evg.*"},
3144+
StartedByKey: evergreen.User,
3145+
StatusKey: bson.M{"$in": evergreen.UpHostStatus},
3146+
}
3147+
return Count(ctx, query)
3148+
}
3149+
31393150
// FindSpawnhostsWithNoExpirationToExtend returns all hosts that are set to never
31403151
// expire but have their expiration time within the next day and are still up.
31413152
func FindSpawnhostsWithNoExpirationToExtend(ctx context.Context) ([]Host, error) {

model/host/host_test.go

+38
Original file line numberDiff line numberDiff line change
@@ -4622,6 +4622,44 @@ func TestCountSpawnhostsWithNoExpirationByUser(t *testing.T) {
46224622
assert.Equal(t, 0, count)
46234623
}
46244624

4625+
func TestCountIntentHosts(t *testing.T) {
4626+
4627+
require.NoError(t, db.ClearCollections(Collection))
4628+
hosts := []Host{
4629+
{
4630+
Id: "evg-host-1",
4631+
Status: evergreen.HostRunning,
4632+
StartedBy: evergreen.User,
4633+
},
4634+
{
4635+
Id: "evg-host-2",
4636+
Status: evergreen.HostRunning,
4637+
StartedBy: evergreen.User,
4638+
},
4639+
{
4640+
Id: "evg-host-3",
4641+
Status: evergreen.HostTerminated,
4642+
StartedBy: evergreen.User,
4643+
},
4644+
{
4645+
Id: "host-4",
4646+
Status: evergreen.HostRunning,
4647+
StartedBy: evergreen.User,
4648+
},
4649+
{
4650+
Id: "host-5",
4651+
Status: evergreen.HostStarting,
4652+
StartedBy: evergreen.User,
4653+
},
4654+
}
4655+
for _, h := range hosts {
4656+
assert.NoError(t, h.Insert(t.Context()))
4657+
}
4658+
count, err := CountIntentHosts(t.Context())
4659+
assert.NoError(t, err)
4660+
assert.Equal(t, 2, count)
4661+
}
4662+
46254663
func TestFindSpawnhostsWithNoExpirationToExtend(t *testing.T) {
46264664
ctx, cancel := context.WithCancel(context.Background())
46274665
defer cancel()

units/host_allocator.go

+22
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ const (
2727
hostAllocatorJobName = "host-allocator"
2828
hostAllocatorAttributePrefix = "evergreen.host_allocator"
2929
maxHostAllocatorJobTime = 10 * time.Minute
30+
// maxIntentHosts represents the maximum number of intent hosts we can
31+
// be processing at once, in order to prevent over-logging
32+
maxIntentHosts = 5000
3033
)
3134

3235
func init() {
@@ -198,6 +201,25 @@ func (j *hostAllocatorJob) Run(ctx context.Context) {
198201
// host-spawning phase
199202
//////////////////////
200203

204+
numIntentHosts, err := host.CountIntentHosts(ctx)
205+
grip.Error(message.WrapError(err, message.Fields{
206+
"runner": hostAllocatorJobName,
207+
"instance": j.ID(),
208+
"distro": j.DistroID,
209+
"message": "failed to count intent hosts",
210+
}))
211+
212+
if numIntentHosts > maxIntentHosts {
213+
grip.Info(message.Fields{
214+
"runner": hostAllocatorJobName,
215+
"instance": j.ID(),
216+
"distro": j.DistroID,
217+
"message": "too many intent hosts, skipping host allocation",
218+
"max_hosts": maxIntentHosts,
219+
})
220+
return
221+
}
222+
201223
hostSpawningBegins := time.Now()
202224
// Number of new hosts to be allocated
203225
hostsSpawned, err := scheduler.SpawnHosts(ctx, *distro, nHosts, containerPool)

0 commit comments

Comments
 (0)