Fix/too many open files in download (#276)

JohnRusk · web-flow · commit fad439f75b78 · 2019-03-20T16:56:21.000+13:00
* Limit number of open files when downloading, to fit within OS's configured limit

Not needed when uploading, because there the number of open files is (more or less) limited by the number of go-routines used to initiate transfers. ("More or less" because  when we do retries for 503s we will tend to open additonal file handles to re-read the retried data).

* Reduce MaxIdleConnsPerHost

To accomodate Linux systems with low open-files-per-user limit (since each open connection counts as a file)

* Log concurrency parameters at start of job

Useful for debugging things like the max concurrent files issue and limits

* Take network connections into account when setting max number of concurrently open files

* Use one HTTP client for the whole job, instead of one for each job part

Previously, when doing one for each job part, we had an issue when the files in the job parts were very small, and there were a lot of files. This meant we got though each job part quickly, so quickly, that its lingering pool of open connections were an issue, especially on resource-constrained systems.  While the connections are eventually cleaned up, they were not cleaned up fast enough to compensate for our fast progression through the job parts.  Now, we use just one HTTP client, so there's just one connection pool for the whole job.

* Add comment to explain max idle conns per host

* Fix test suite to match change in connection handling
diff --git a/.gitignore b/.gitignore
diff --git a/cmd/credentialUtil.go b/cmd/credentialUtil.go
@@ -277,7 +277,8 @@ func createBlobPipeline(ctx context.Context, credInfo common.CredentialInfo) (pi
 			RetryDelay:    ste.UploadRetryDelay,
 			MaxRetryDelay: ste.UploadMaxRetryDelay,
 		},
-		nil), nil
+		nil,
+		ste.NewAzcopyHTTPClient()), nil
 }
 
 func createBlobFSPipeline(ctx context.Context, credInfo common.CredentialInfo) (pipeline.Pipeline, error) {
diff --git a/common/cacheLimiter.go b/common/cacheLimiter.go
@@ -29,14 +29,16 @@ import (
 
 type Predicate func() bool
 
-// Used to limit the amount of in-flight data in RAM, to keep it an an acceptable level.
-// For downloads, network is producer and disk is consumer, while for uploads the roles are reversed.
+// Used to limit the amounts of things. E.g. amount of in-flight data in RAM, to keep it an an acceptable level.
+// Also used for number of open files (since that's limited on Linux).
+// In the case of RAM usage, for downloads, network is producer and disk is consumer, while for uploads the roles are reversed.
 // In either case, if the producer is faster than the consumer, this CacheLimiter is necessary
-// prevent unbounded RAM usage
+// prevent unbounded RAM usage.
 type CacheLimiter interface {
-	TryAddBytes(count int64, useRelaxedLimit bool) (added bool)
-	WaitUntilAddBytes(ctx context.Context, count int64, useRelaxedLimit Predicate) error
-	RemoveBytes(count int64)
+	TryAdd(count int64, useRelaxedLimit bool) (added bool)
+	WaitUntilAdd(ctx context.Context, count int64, useRelaxedLimit Predicate) error
+	Remove(count int64)
+	Limit() int64
 }
 
 type cacheLimiter struct {
@@ -49,7 +51,7 @@ func NewCacheLimiter(limit int64) CacheLimiter {
 }
 
 // TryAddBytes tries to add a memory allocation within the limit.  Returns true if it could be (and was) added
-func (c *cacheLimiter) TryAddBytes(count int64, useRelaxedLimit bool) (added bool) {
+func (c *cacheLimiter) TryAdd(count int64, useRelaxedLimit bool) (added bool) {
 	lim := c.limit
 
 	// Above the "strict" limit, there's a bit of extra room, which we use
@@ -63,6 +65,8 @@ func (c *cacheLimiter) TryAddBytes(count int64, useRelaxedLimit bool) (added boo
 		// no backlogging of new chunks behind slow ones (i.e. these "good" cases are allowed to proceed without
 		// interruption) and for uploads its used for re-doing the prefetches when we do retries (i.e. so these are
 		// not blocked by other chunks using up RAM).
+		// TODO: now that cacheLimiter is used for multiple purposes, the hard-coding of the distinction between
+		//   relaxed and strict limits is less appropriate. Refactor to make it a configuration param of the instance?
 	}
 
 	if atomic.AddInt64(&c.value, count) <= lim {
@@ -74,10 +78,10 @@ func (c *cacheLimiter) TryAddBytes(count int64, useRelaxedLimit bool) (added boo
 }
 
 /// WaitUntilAddBytes blocks until it completes a successful call to TryAddBytes
-func (c *cacheLimiter) WaitUntilAddBytes(ctx context.Context, count int64, useRelaxedLimit Predicate) error {
+func (c *cacheLimiter) WaitUntilAdd(ctx context.Context, count int64, useRelaxedLimit Predicate) error {
 	for {
 		// Proceed if there's room in the cache
-		if c.TryAddBytes(count, useRelaxedLimit()) {
+		if c.TryAdd(count, useRelaxedLimit()) {
 			return nil
 		}
 
@@ -97,7 +101,11 @@ func (c *cacheLimiter) WaitUntilAddBytes(ctx context.Context, count int64, useRe
 	}
 }
 
-func (c *cacheLimiter) RemoveBytes(count int64) {
+func (c *cacheLimiter) Remove(count int64) {
 	negativeDelta := -count
 	atomic.AddInt64(&c.value, negativeDelta)
 }
+
+func (c *cacheLimiter) Limit() int64 {
+	return c.limit
+}
diff --git a/common/chunkedFileWriter.go b/common/chunkedFileWriter.go
@@ -131,7 +131,7 @@ const maxDesirableActiveChunks = 20 // TODO: can we find a sensible way to remov
 // from the cache limiter, which is also in this struct.
 func (w *chunkedFileWriter) WaitToScheduleChunk(ctx context.Context, id ChunkID, chunkSize int64) error {
 	w.chunkLogger.LogChunkStatus(id, EWaitReason.RAMToSchedule())
-	err := w.cacheLimiter.WaitUntilAddBytes(ctx, chunkSize, w.shouldUseRelaxedRamThreshold)
+	err := w.cacheLimiter.WaitUntilAdd(ctx, chunkSize, w.shouldUseRelaxedRamThreshold)
 	if err == nil {
 		atomic.AddInt32(&w.activeChunkCount, 1)
 	}
@@ -286,7 +286,7 @@ func (w *chunkedFileWriter) setStatusForContiguousAvailableChunks(unsavedChunksB
 // Saves one chunk to its destination
 func (w *chunkedFileWriter) saveOneChunk(chunk fileChunk) error {
 	defer func() {
-		w.cacheLimiter.RemoveBytes(int64(len(chunk.data))) // remove this from the tally of scheduled-but-unsaved bytes
+		w.cacheLimiter.Remove(int64(len(chunk.data))) // remove this from the tally of scheduled-but-unsaved bytes
 		atomic.AddInt32(&w.activeChunkCount, -1)
 		w.slicePool.ReturnSlice(chunk.data)
 		w.chunkLogger.LogChunkStatus(chunk.id, EWaitReason.ChunkDone()) // this chunk is all finished
@@ -311,9 +311,9 @@ func (w *chunkedFileWriter) shouldUseRelaxedRamThreshold() bool {
 
 // Are we currently in a memory-constrained situation?
 func (w *chunkedFileWriter) haveMemoryPressure(chunkSize int64) bool {
-	didAdd := w.cacheLimiter.TryAddBytes(chunkSize, w.shouldUseRelaxedRamThreshold())
+	didAdd := w.cacheLimiter.TryAdd(chunkSize, w.shouldUseRelaxedRamThreshold())
 	if didAdd {
-		w.cacheLimiter.RemoveBytes(chunkSize) // remove immediately, since this was only a test
+		w.cacheLimiter.Remove(chunkSize) // remove immediately, since this was only a test
 	}
 	return !didAdd
 }
diff --git a/common/singleChunkReader.go b/common/singleChunkReader.go
@@ -194,7 +194,7 @@ func (cr *singleChunkReader) blockingPrefetch(fileReader io.ReaderAt, isRetry bo
 	// here doing retries, but no RAM _will_ become available because its
 	// all used by queued chunkfuncs (that can't be processed because all goroutines are active).
 	cr.chunkLogger.LogChunkStatus(cr.chunkId, EWaitReason.RAMToSchedule())
-	err := cr.cacheLimiter.WaitUntilAddBytes(cr.ctx, cr.length, func() bool { return isRetry })
+	err := cr.cacheLimiter.WaitUntilAdd(cr.ctx, cr.length, func() bool { return isRetry })
 	if err != nil {
 		return err
 	}
@@ -318,7 +318,7 @@ func (cr *singleChunkReader) returnBuffer() {
 		return
 	}
 	cr.slicePool.ReturnSlice(cr.buffer)
-	cr.cacheLimiter.RemoveBytes(int64(len(cr.buffer)))
+	cr.cacheLimiter.Remove(int64(len(cr.buffer)))
 	cr.buffer = nil
 }
 
diff --git a/main.go b/main.go
@@ -47,15 +47,18 @@ func main() {
 		return
 	}
 
+	configureGC()
+
 	// Perform os specific initialization
-	_, err := ProcessOSSpecificInitialization()
+	maxFileAndSocketHandles, err := ProcessOSSpecificInitialization()
 	if err != nil {
 		log.Fatalf("initialization failed: %v", err)
 	}
 
-	configureGC()
+	concurrentConnections := common.ComputeConcurrencyValue(runtime.NumCPU())
+	concurrentFilesLimit := computeConcurrentFilesLimit(maxFileAndSocketHandles, concurrentConnections)
 
-	err = ste.MainSTE(common.ComputeConcurrencyValue(runtime.NumCPU()), 2400, azcopyAppPathFolder, azcopyLogPathFolder)
+	err = ste.MainSTE(concurrentConnections, concurrentFilesLimit, 2400, azcopyAppPathFolder, azcopyLogPathFolder)
 	common.PanicIfErr(err)
 
 	cmd.Execute(azcopyAppPathFolder, azcopyLogPathFolder)
@@ -71,3 +74,26 @@ func configureGC() {
 		debug.SetGCPercent(20)       // activate more aggressive/frequent GC than the default
 	}()
 }
+
+// ComputeConcurrentFilesLimit finds a number of concurrently-openable files
+// such that we'll have enough handles left, after using some as network handles
+// TODO: add environment var to optionally allow bringing concurrentFiles down lower
+//    (and, when we do, actually USE it for uploads, since currently we're only using it on downloads)
+//    (update logging
+func computeConcurrentFilesLimit(maxFileAndSocketHandles int, concurrentConnections int) int {
+
+	allowanceForOnGoingEnumeration := 1 // might still be scanning while we are transferring. Make this bigger if we ever do parallel scanning
+
+	// Compute a very conservative estimate for total number of connections that we may have
+	// To get a conservative estimate we pessimistically assume that the pool of idle conns is full,
+	// but all the ones we are actually using are (by some fluke of timing) not in the pool.
+	// TODO: consider actually SETTING AzCopyMaxIdleConnsPerHost to say, max(0.3 * FileAndSocketHandles, 1000), instead of using the hard-coded value we currently have
+	possibleMaxTotalConcurrentHttpConnections := concurrentConnections + ste.AzCopyMaxIdleConnsPerHost + allowanceForOnGoingEnumeration
+
+	concurrentFilesLimit := maxFileAndSocketHandles - possibleMaxTotalConcurrentHttpConnections
+
+	if concurrentFilesLimit < ste.NumTransferInitiationRoutines {
+		concurrentFilesLimit = ste.NumTransferInitiationRoutines // Set sensible floor, so we don't get negative or zero values if maxFileAndSocketHandles is low
+	}
+	return concurrentFilesLimit
+}
diff --git a/main_unix.go b/main_unix.go
@@ -23,17 +23,19 @@
 package main
 
 import (
+	"math"
 	"os"
 	"path"
 	"syscall"
 )
 
 // ProcessOSSpecificInitialization changes the soft limit for file descriptor for process
-// and returns the file descriptor limit for process. If the function fails with some error
-// it returns the error.
+// and returns the new file descriptor limit for process.
+// We need to do this because the default limits are low on Linux, and we concurrently open lots of files
+// and sockets (both of which count towards this limit).
 // Api gets the hard limit for process file descriptor
-// and sets the soft limit for process file descriptor to above hard limit
-func ProcessOSSpecificInitialization() (uint64, error) {
+// and sets the soft limit for process file descriptor to (hard limit - 1)
+func ProcessOSSpecificInitialization() (int, error) {
 	var rlimit, zero syscall.Rlimit
 	// get the hard limit
 	err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rlimit)
@@ -47,12 +49,15 @@ func ProcessOSSpecificInitialization() (uint64, error) {
 	set := rlimit
 	// set the current limit to one less than max of the rlimit
 	set.Cur = set.Max - 1
-	// set the soft limit to above rlimit
 	err = syscall.Setrlimit(syscall.RLIMIT_NOFILE, &set)
 	if err != nil {
 		return 0, err
 	}
-	return set.Max, nil
+	if set.Cur > math.MaxInt32 {
+		return math.MaxInt32, nil
+	} else {
+		return int(set.Cur), nil
+	}
 }
 
 // GetAzCopyAppPath returns the path of Azcopy folder in local appdata.
diff --git a/main_windows.go b/main_windows.go
@@ -21,6 +21,7 @@
 package main
 
 import (
+	"math"
 	"os"
 	"os/exec"
 	"path"
@@ -39,9 +40,14 @@ func osModifyProcessCommand(cmd *exec.Cmd) *exec.Cmd {
 // ProcessOSSpecificInitialization chnages the soft limit for filedescriptor for process
 // return the filedescriptor limit for process. If the function fails with some, it returns
 // the error
-// TODO: this api is implemented for windows as well but not required.
+// TODO: this api is implemented for windows as well but not required because Windows
+// does not default to a precise low limit like Linux does
 func ProcessOSSpecificInitialization() (int, error) {
-	return 0, nil
+
+	// this exaggerates what's possible, but is accurate enough for our purposes, in which our goal is simply to apply no specific limit on Windows
+	const effectivelyUnlimited = math.MaxInt32
+
+	return effectivelyUnlimited, nil
 }
 
 // GetAzCopyAppPath returns the path of Azcopy in local appdata.
diff --git a/ste/JobsAdmin.go b/ste/JobsAdmin.go
@@ -96,7 +96,7 @@ var JobsAdmin interface {
 	common.ILoggerCloser
 }
 
-func initJobsAdmin(appCtx context.Context, concurrentConnections int, targetRateInMBps int64, azcopyAppPathFolder string, azcopyLogPathFolder string) {
+func initJobsAdmin(appCtx context.Context, concurrentConnections int, concurrentFilesLimit int, targetRateInMBps int64, azcopyAppPathFolder string, azcopyLogPathFolder string) {
 	if JobsAdmin != nil {
 		panic("initJobsAdmin was already called once")
 	}
@@ -140,14 +140,15 @@ func initJobsAdmin(appCtx context.Context, concurrentConnections int, targetRate
 	maxRamBytesToUse := int64(gbToUse * 1024 * 1024 * 1024)
 
 	ja := &jobsAdmin{
-		logger:        common.NewAppLogger(pipeline.LogInfo, azcopyLogPathFolder),
-		jobIDToJobMgr: newJobIDToJobMgr(),
-		logDir:        azcopyLogPathFolder,
-		planDir:       planDir,
-		pacer:         newPacer(targetRateInMBps * 1024 * 1024),
-		slicePool:     common.NewMultiSizeSlicePool(common.MaxBlockBlobBlockSize),
-		cacheLimiter:  common.NewCacheLimiter(maxRamBytesToUse),
-		appCtx:        appCtx,
+		logger:           common.NewAppLogger(pipeline.LogInfo, azcopyLogPathFolder),
+		jobIDToJobMgr:    newJobIDToJobMgr(),
+		logDir:           azcopyLogPathFolder,
+		planDir:          planDir,
+		pacer:            newPacer(targetRateInMBps * 1024 * 1024),
+		slicePool:        common.NewMultiSizeSlicePool(common.MaxBlockBlobBlockSize),
+		cacheLimiter:     common.NewCacheLimiter(maxRamBytesToUse),
+		fileCountLimiter: common.NewCacheLimiter(int64(concurrentFilesLimit)),
+		appCtx:           appCtx,
 		coordinatorChannels: CoordinatorChannels{
 			partsChannel:     partsCh,
 			normalTransferCh: normalTransferCh,
@@ -181,11 +182,13 @@ func initJobsAdmin(appCtx context.Context, concurrentConnections int, targetRate
 	// out progress on already-scheduled chunks. (Not sure whether that can really happen, but this protects against it
 	// anyway.)
 	// Perhaps MORE importantly, doing this separately gives us more CONTROL over how we interact with the file system.
-	for cc := 0; cc < 64; cc++ {
+	for cc := 0; cc < NumTransferInitiationRoutines; cc++ {
 		go ja.transferProcessor(cc)
 	}
 }
 
+const NumTransferInitiationRoutines = 64 // TODO make this configurable
+
 // QueueJobParts puts the given JobPartManager into the partChannel
 // from where this JobPartMgr will be picked by a routine and
 // its transfers will be scheduled
@@ -289,6 +292,7 @@ type jobsAdmin struct {
 	pacer               *pacer
 	slicePool           common.ByteSlicePooler
 	cacheLimiter        common.CacheLimiter
+	fileCountLimiter    common.CacheLimiter
 }
 
 type CoordinatorChannels struct {
diff --git a/ste/init.go b/ste/init.go
@@ -49,9 +49,9 @@ func ToFixed(num float64, precision int) float64 {
 }
 
 // MainSTE initializes the Storage Transfer Engine
-func MainSTE(concurrentConnections int, targetRateInMBps int64, azcopyAppPathFolder, azcopyLogPathFolder string) error {
+func MainSTE(concurrentConnections int, concurrentFilesLimit int, targetRateInMBps int64, azcopyAppPathFolder, azcopyLogPathFolder string) error {
 	// Initialize the JobsAdmin, resurrect Job plan files
-	initJobsAdmin(steCtx, concurrentConnections, targetRateInMBps, azcopyAppPathFolder, azcopyLogPathFolder)
+	initJobsAdmin(steCtx, concurrentConnections, concurrentFilesLimit, targetRateInMBps, azcopyAppPathFolder, azcopyLogPathFolder)
 	// No need to read the existing JobPartPlan files since Azcopy is running in process
 	//JobsAdmin.ResurrectJobParts()
 	// TODO: We may want to list listen first and terminate if there is already an instance listening
diff --git a/ste/mgr-JobMgr.go b/ste/mgr-JobMgr.go
@@ -23,6 +23,8 @@ package ste
 import (
 	"context"
 	"fmt"
+	"net/http"
+	"runtime"
 	"strings"
 	"sync"
 	"sync/atomic"
@@ -71,6 +73,7 @@ type IJobMgr interface {
 	getInMemoryTransitJobState() InMemoryTransitJobState      // get in memory transit job state saved in this job.
 	setInMemoryTransitJobState(state InMemoryTransitJobState) // set in memory transit job state saved in this job.
 	LogChunkStatus(id common.ChunkID, reason common.WaitReason)
+	HttpClient() *http.Client
 
 	common.ILoggerCloser
 }
@@ -81,6 +84,7 @@ func newJobMgr(appLogger common.ILogger, jobID common.JobID, appCtx context.Cont
 	// atomicAllTransfersScheduled is set to 1 since this api is also called when new job part is ordered.
 	enableChunkLogOutput := level.ToPipelineLogLevel() == pipeline.LogDebug
 	jm := jobMgr{jobID: jobID, jobPartMgrs: newJobPartToJobPartMgr(), include: map[string]int{}, exclude: map[string]int{},
+		httpClient:        NewAzcopyHTTPClient(),
 		logger:            common.NewJobLogger(jobID, level, appLogger, logFileFolder),
 		chunkStatusLogger: common.NewChunkStatusLogger(jobID, logFileFolder, enableChunkLogOutput),
 		/*Other fields remain zero-value until this job is scheduled */}
@@ -96,13 +100,22 @@ func (jm *jobMgr) reset(appCtx context.Context, commandString string) IJobMgr {
 	if len(commandString) > 0 {
 		jm.logger.Log(pipeline.LogInfo, fmt.Sprintf("Job-Command %s", commandString))
 	}
+	jm.logConcurrencyParameters()
 	jm.ctx, jm.cancel = context.WithCancel(appCtx)
 	atomic.StoreUint64(&jm.atomicNumberOfBytesCovered, 0)
 	atomic.StoreUint64(&jm.atomicTotalBytesToXfer, 0)
 	jm.partsDone = 0
 	return jm
 }
 
+func (jm *jobMgr) logConcurrencyParameters() {
+	jm.logger.Log(pipeline.LogInfo, fmt.Sprintf("Number of CPUs: %d", runtime.NumCPU()))
+	jm.logger.Log(pipeline.LogInfo, fmt.Sprintf("Max file buffer RAM %.3f GB", float32(JobsAdmin.(*jobsAdmin).cacheLimiter.Limit())/(1024*1024*1024)))
+	jm.logger.Log(pipeline.LogInfo, fmt.Sprintf("Max open files when downloading: %d", JobsAdmin.(*jobsAdmin).fileCountLimiter.Limit()))
+	jm.logger.Log(pipeline.LogInfo, fmt.Sprintf("Max concurrent transfer initiation routines: %d", NumTransferInitiationRoutines))
+	// TODO: find a way to add concurrency value here (i.e. number of chunk func worker go routines)
+}
+
 // jobMgr represents the runtime information for a Job
 type jobMgr struct {
 	logger            common.ILoggerResetable
@@ -111,6 +124,10 @@ type jobMgr struct {
 	ctx               context.Context
 	cancel            context.CancelFunc
 
+	// Share the same HTTP Client across all job parts, so that the we maximize re-use of
+	// its internal connection pool
+	httpClient *http.Client
+
 	jobPartMgrs jobPartToJobPartMgr // The map of part #s to JobPartMgrs
 	// partsDone keep the count of completed part of the Job.
 	partsDone uint32
@@ -212,8 +229,9 @@ func (jm *jobMgr) AddJobPart(partNum PartNumber, planFile JobPartPlanFileName, s
 	destinationSAS string, scheduleTransfers bool) IJobPartMgr {
 	jpm := &jobPartMgr{jobMgr: jm, filename: planFile, sourceSAS: sourceSAS,
 		destinationSAS: destinationSAS, pacer: JobsAdmin.(*jobsAdmin).pacer,
-		slicePool:    JobsAdmin.(*jobsAdmin).slicePool,
-		cacheLimiter: JobsAdmin.(*jobsAdmin).cacheLimiter}
+		slicePool:        JobsAdmin.(*jobsAdmin).slicePool,
+		cacheLimiter:     JobsAdmin.(*jobsAdmin).cacheLimiter,
+		fileCountLimiter: JobsAdmin.(*jobsAdmin).fileCountLimiter}
 	jpm.planMMF = jpm.filename.Map()
 	jm.jobPartMgrs.Set(partNum, jpm)
 	jm.finalPartOrdered = jpm.planMMF.Plan().IsFinalPart
@@ -253,6 +271,10 @@ func (jm *jobMgr) setDirection(fromTo common.FromTo) {
 	}
 }
 
+func (jm *jobMgr) HttpClient() *http.Client {
+	return jm.httpClient
+}
+
 // SetIncludeExclude sets the include / exclude list of transfers
 // supplied with resume command to include or exclude mentioned transfers
 func (jm *jobMgr) SetIncludeExclude(include, exclude map[string]int) {
diff --git a/ste/mgr-JobPartMgr.go b/ste/mgr-JobPartMgr.go
diff --git a/ste/mgr-JobPartTransferMgr.go b/ste/mgr-JobPartTransferMgr.go
diff --git a/ste/xfer-remoteToLocal.go b/ste/xfer-remoteToLocal.go
diff --git a/testSuite/cmd/testblob.go b/testSuite/cmd/testblob.go

Original file line number	Diff line number	Diff line change
`@@ -277,7 +277,8 @@ func createBlobPipeline(ctx context.Context, credInfo common.CredentialInfo) (pi`
`277`	`277`	`RetryDelay: ste.UploadRetryDelay,`
`278`	`278`	`MaxRetryDelay: ste.UploadMaxRetryDelay,`
`279`	`279`	`},`
`280`		`- nil), nil`
	`280`	`+ nil,`
	`281`	`+ ste.NewAzcopyHTTPClient()), nil`
`281`	`282`	`}`
`282`	`283`
`283`	`284`	`func createBlobFSPipeline(ctx context.Context, credInfo common.CredentialInfo) (pipeline.Pipeline, error) {`
Original file line number	Diff line number	Diff line change
`@@ -194,7 +194,7 @@ func (cr *singleChunkReader) blockingPrefetch(fileReader io.ReaderAt, isRetry bo`
`194`	`194`	`// here doing retries, but no RAM _will_ become available because its`
`195`	`195`	`// all used by queued chunkfuncs (that can't be processed because all goroutines are active).`
`196`	`196`	`cr.chunkLogger.LogChunkStatus(cr.chunkId, EWaitReason.RAMToSchedule())`
`197`		`- err := cr.cacheLimiter.WaitUntilAddBytes(cr.ctx, cr.length, func() bool { return isRetry })`
	`197`	`+ err := cr.cacheLimiter.WaitUntilAdd(cr.ctx, cr.length, func() bool { return isRetry })`
`198`	`198`	`if err != nil {`
`199`	`199`	`return err`
`200`	`200`	`}`
`@@ -318,7 +318,7 @@ func (cr *singleChunkReader) returnBuffer() {`
`318`	`318`	`return`
`319`	`319`	`}`
`320`	`320`	`cr.slicePool.ReturnSlice(cr.buffer)`
`321`		`- cr.cacheLimiter.RemoveBytes(int64(len(cr.buffer)))`
	`321`	`+ cr.cacheLimiter.Remove(int64(len(cr.buffer)))`
`322`	`322`	`cr.buffer = nil`
`323`	`323`	`}`
`324`	`324`