diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 88ab8907..f47647bd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,7 +52,7 @@ jobs: rustc: 1.56.1 # Oldest supported version, keep in sync with README.md - os: ubuntu-20.04 rustc: 1.56.1 - extra_desc: dist-server + extra_desc: dist-worker extra_args: --no-default-features --features=dist-tests test_dist_ -- --test-threads 1 - os: ubuntu-20.04 rustc: stable @@ -153,9 +153,9 @@ jobs: path: target_repo - name: Double-check that cachepot is not running - run: cachepot --stop-server || true + run: cachepot --stop-coordinator || true - name: Start the local cachepot server daemon - run: cachepot --start-server + run: cachepot --start-coordinator # This is the main server that's going to send the compilation requests # so logging is more important for this than for subsequent (client) # cachepot invocations @@ -192,10 +192,10 @@ jobs: include: - os: ubuntu-20.04 target: x86_64-unknown-linux-musl - extra_args: --features="dist-server" + extra_args: --features="dist-worker" - os: ubuntu-20.04 binary: cachepot-dist - extra_args: --features="dist-server" + extra_args: --features="dist-worker" target: x86_64-unknown-linux-musl - os: ubuntu-20.04 target: aarch64-unknown-linux-musl diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3458e6a7..9c7baaef 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -112,7 +112,7 @@ build: <<: *build-refs stage: build variables: - FEATURES: "openssl/vendored,dist-server" + FEATURES: "openssl/vendored,dist-worker" script: - cargo +stable build --locked --release --verbose --bin cachepot-dist --target x86_64-unknown-linux-musl --features=${FEATURES} # collect artifacts diff --git a/Cargo.toml b/Cargo.toml index 7005eb7e..125d298e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ path = "src/main.rs" [[bin]] name = "cachepot-dist" -required-features = ["dist-server"] +required-features = ["dist-worker"] path = "src/bin/cachepot-dist/main.rs" [dependencies] @@ -97,7 +97,7 @@ structopt = "0.3.25" strum = { version = "0.23.0", features = ["derive"] } native-tls = "0.2.8" -# dist-server only +# dist-worker only crossbeam-utils = { version = "0.8", optional = true } libmount = { version = "0.1.10", optional = true } nix = { version = "0.19", optional = true } @@ -147,11 +147,11 @@ unstable = [] # Enables distributed support in the cachepot client dist-client = ["ar", "flate2", "hyper", "hyperx", "reqwest/stream", "url", "sha2", "tokio/fs"] # Enables the cachepot-dist binary -dist-server = ["chrono", "crossbeam-utils", "jsonwebtoken", "flate2", "hyperx", "libmount", "nix", "reqwest", "sha2", "syslog", "void", "version-compare", "warp", "thiserror"] +dist-worker = ["chrono", "crossbeam-utils", "jsonwebtoken", "flate2", "hyperx", "libmount", "nix", "reqwest", "sha2", "syslog", "void", "version-compare", "warp", "thiserror"] # Enables dist tests with external requirements -dist-tests = ["dist-client", "dist-server"] +dist-tests = ["dist-client", "dist-worker"] # Run JWK token crypto against openssl ref impl -vs_openssl = ["openssl", "dist-server"] +vs_openssl = ["openssl", "dist-worker"] # Make sure to always optimize big integer calculations as this cuts down # certificate generation time by two orders of magnitude (down to ~0.1s) diff --git a/README.md b/README.md index 967226ad..ba2b2b2f 100644 --- a/README.md +++ b/README.md @@ -81,9 +81,9 @@ cachepot supports gcc, clang, MSVC, rustc, NVCC, and [Wind River's diab compiler If you don't [specify otherwise](#storage-options), cachepot will use a local disk cache. -cachepot works using a client-server model, where the server runs locally on the same machine as the client. The client-server model allows the server to be more efficient by keeping some state in memory. The cachepot command will spawn a server process if one is not already running, or you can run `cachepot --start-server` to start the background server process without performing any compilation. +cachepot works using a client-server model, where the server (which we refer to as "coordinator") runs locally on the same machine as the client. The client-server model allows the server/coordinator to be more efficient by keeping some state in memory. The cachepot command will spawn a coordinator process if one is not already running, or you can run `cachepot --start-coordinator` to start the background server process without performing any compilation. -You can run `cachepot --stop-server` to terminate the server. It will also terminate after (by default) 10 minutes of inactivity. +You can run `cachepot --stop-coordinator` to terminate the coordinator. It will also terminate after (by default) 10 minutes of inactivity. Running `cachepot --show-stats` will print a summary of cache statistics. @@ -168,6 +168,7 @@ cachepot defaults to using local disk storage. You can set the `CACHEPOT_DIR` en The default cache size is 10 gigabytes. To change this, set `CACHEPOT_CACHE_SIZE`, for example `CACHEPOT_CACHE_SIZE="1G"`. ### S3 + If you want to use S3 storage for the cachepot cache, you need to set the `CACHEPOT_BUCKET` environment variable to the name of the S3 bucket to use. You can use `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` to set the S3 credentials. Alternately, you can set `AWS_IAM_CREDENTIALS_URL` to a URL that returns credentials in the format supported by the [EC2 metadata service](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html#instance-metadata-security-credentials), and credentials will be fetched from that location as needed. In the absence of either of these options, credentials for the instance's IAM role will be fetched from the EC2 metadata service directly. @@ -178,20 +179,24 @@ You can also define a prefix that will be prepended to the keys of all cache obj ### Redis + Set `CACHEPOT_REDIS` to a [Redis](https://redis.io/) url in format `redis://[:@][:port][/]` to store the cache in a Redis instance. Redis can be configured as a LRU (least recently used) cache with a fixed maximum cache size. Set `maxmemory` and `maxmemory-policy` according to the [Redis documentation](https://redis.io/topics/lru-cache). The `allkeys-lru` policy which discards the *least recently accessed or modified* key fits well for the cachepot use case. Redis over TLS is supported. Use the [`rediss://`](https://www.iana.org/assignments/uri-schemes/prov/rediss) url scheme (note `rediss` vs `redis`). Append `#insecure` the the url to disable hostname verification and accept self-signed certificates (dangerous!). Note that this also disables [SNI](https://en.wikipedia.org/wiki/Server_Name_Indication). ### Memcached + Set `CACHEPOT_MEMCACHED` to a [Memcached](https://memcached.org/) url in format `tcp://: ...` to store the cache in a Memcached instance. ### Google Cloud Storage + To use [Google Cloud Storage](https://cloud.google.com/storage/), you need to set the `CACHEPOT_GCS_BUCKET` environment variable to the name of the GCS bucket. If you're using authentication, either set `CACHEPOT_GCS_KEY_PATH` to the location of your JSON service account credentials or `CACHEPOT_GCS_CREDENTIALS_URL` with a URL that returns the oauth token. By default, CACHEPOT on GCS will be read-only. To change this, set `CACHEPOT_GCS_RW_MODE` to either `READ_ONLY` or `READ_WRITE`. ### Azure + To use Azure Blob Storage, you'll need your Azure connection string and an _existing_ Blob Storage container name. Set the `CACHEPOT_AZURE_CONNECTION_STRING` environment variable to your connection string, and `CACHEPOT_AZURE_BLOB_CONTAINER` to the name of the container to use. Note that cachepot will not create the container for you - you'll need to do that yourself. diff --git a/docs/book/src/ci/jenkins.md b/docs/book/src/ci/jenkins.md index 7629b888..434af17d 100644 --- a/docs/book/src/ci/jenkins.md +++ b/docs/book/src/ci/jenkins.md @@ -14,7 +14,7 @@ configuration and share the statistics. If a per-jobs cachepot configuration is needed or preferred (e.g place a local disc cache in `$WORKSPACE`) the Port allocator plugin does a good job. It assigns a free and -unique port number to a job by exporting a variable. Naming this variable `CACHEPOT_SERVER_PORT` is enough to make the +unique port number to a job by exporting a variable. Naming this variable `CACHEPOT_COORDINATOR_PORT` is enough to make the job spawn it's own cachepot server that is save to terminate upon job termination. This approach has the advantage that each job (with a dedicated server instance) maintains it's own statistics that might be interesting upon job finalization. diff --git a/docs/book/src/dist/dist.md b/docs/book/src/dist/dist.md index 9e3d9c35..146bafe8 100644 --- a/docs/book/src/dist/dist.md +++ b/docs/book/src/dist/dist.md @@ -23,10 +23,10 @@ Distributed cachepot consists of three parts: remote machines - the scheduler (`cachepot-dist` binary), responsible for deciding where a compilation job should run -- the server (`cachepot-dist` binary), responsible for actually executing +- the worker (`cachepot-dist` binary), responsible for actually executing a build -All servers are required to be a 64-bit Linux install. Clients may request +All workers are required to be a 64-bit Linux install. Clients may request compilation from Linux, Windows or macOS. Linux compilations will attempt to automatically package the compiler in use, while Windows and macOS users will need to specify a toolchain for cross-compilation ahead of time. @@ -35,49 +35,49 @@ need to specify a toolchain for cross-compilation ahead of time. The HTTP implementation of cachepot has the following API, where all HTTP body content is encoded using [`bincode`](http://docs.rs/bincode): -- scheduler +- `scheduler` - `POST /api/v1/scheduler/alloc_job` - - Called by a client to submit a compilation request. + - Called by the coordinator to submit a compilation request. - Returns information on where the job is allocated it should run. - - `GET /api/v1/scheduler/server_certificate` - - Called by a client to retrieve the (dynamically created) HTTPS - certificate for a server, for use in communication with that server. - - Returns a digest and PEM for the temporary server HTTPS certificate. - - `POST /api/v1/scheduler/heartbeat_server` - - Called (repeatedly) by servers to register as available for jobs. + - `GET /api/v1/scheduler/worker_certificate` + - Called by the coordinator to retrieve the (dynamically created) HTTPS + certificate for a worker, for use in communication with that worker. + - Returns a digest and PEM for the temporary worker HTTPS certificate. + - `POST /api/v1/scheduler/heartbeat_worker` + - Called (repeatedly) by workers to register as available for jobs. - `POST /api/v1/scheduler/job_state` - - Called by servers to inform the scheduler of the state of the job. + - Called by workers to inform the scheduler of the state of the job. - `GET /api/v1/scheduler/status` - Returns information about the scheduler. -- `server` - - `POST /api/v1/distserver/assign_job` - - Called by the scheduler to inform of a new job being assigned to this server. - - Returns whether the toolchain is already on the server or needs submitting. - - `POST /api/v1/distserver/submit_toolchain` - - Called by the client to submit a toolchain. - - `POST /api/v1/distserver/run_job` - - Called by the client to run a job. +- `worker` + - `POST /api/v1/distworker/assign_job` + - Called by the scheduler to inform of a new job being assigned to this worker. + - Returns whether the toolchain is already on the worker or needs submitting. + - `POST /api/v1/distworker/submit_toolchain` + - Called by the coordinator to submit a toolchain. + - `POST /api/v1/distworker/run_job` + - Called by the coordinator to run a job. - Returns the compilation stdout along with files created. There are three axes of security in this setup: -1. Can the scheduler trust the servers? -2. Is the client permitted to submit and run jobs? +1. Can the scheduler trust the workers? +2. Is the coordinator permitted to submit and run jobs? 3. Can third parties see and/or modify traffic? -### Server Trust +### Worker Trust -If a server is malicious, they can return malicious compilation output to a user. -To protect against this, servers must be authenticated to the scheduler. You have three -means for doing this, and the scheduler and all servers must use the same mechanism. +If a worker is malicious, they can return malicious compilation output to a user. +To protect against this, workers must be authenticated to the scheduler. You have three +means for doing this, and the scheduler and all workers must use the same mechanism. -Once a server has registered itself using the selected authentication, the scheduler -will trust the registered server address and use it for builds. +Once a worker has registered itself using the selected authentication, the scheduler +will trust the registered worker address and use it for builds. #### JWT HS256 (preferred) -This method uses secret key to create a per-IP-and-port token for each server. -Acquiring a token will only allow participation as a server if the attacker can +This method uses secret key to create a per-IP-and-port token for each worker. +Acquiring a token will only allow participation as a worker if the attacker can additionally impersonate the IP and port the token was generated for. You *must* keep the secret key safe. @@ -89,28 +89,28 @@ use your OS random number generator) and put it in your scheduler config file as follows: ```toml -server_auth = { type = "jwt_hs256", secret_key = "YOUR_KEY_HERE" } +worker_auth = { type = "jwt_hs256", secret_key = "YOUR_KEY_HERE" } ``` -Now generate a token for the server, giving the IP and port the scheduler and clients can -connect to the server on (address `192.168.1.10:10501` here): +Now generate a token for the worker, giving the IP and port the scheduler and coordinator can +connect to the worker on (address `192.168.1.10:10501` here): ```sh -cachepot-dist auth generate-jwt-hs256-server-token \ +cachepot-dist auth generate-jwt-hs256-worker-token \ --secret-key YOUR_KEY_HERE \ - --server 192.168.1.10:10501 + --worker 192.168.1.10:10501 ``` *or:* ```sh -cachepot-dist auth generate-jwt-hs256-server-token \ +cachepot-dist auth generate-jwt-hs256-worker-token \ --config /path/to/scheduler-config.toml \ - --server 192.168.1.10:10501 + --worker 192.168.1.10:10501 ``` This will output a token (you can examine it with https://jwt.io if you're -curious) that you should add to your server config file as follows: +curious) that you should add to your worker config file as follows: ```toml scheduler_auth = { type = "jwt_token", token = "YOUR_TOKEN_HERE" } @@ -120,20 +120,20 @@ Done! #### Token -This method simply shares a token between the scheduler and all servers. A token -leak from anywhere allows any attacker to participate as a server. +This method simply shares a token between the scheduler and all workers. A token +leak from anywhere allows any attacker to participate as a worker. *To use it*: -Choose a 'secure token' you can share between your scheduler and all servers. +Choose a 'secure token' you can share between your scheduler and all workers. Put the following in your scheduler config file: ```toml -server_auth = { type = "token", token = "YOUR_TOKEN_HERE" } +worker_auth = { type = "token", token = "YOUR_TOKEN_HERE" } ``` -Put the following in your server config file: +Put the following in your worker config file: ```toml scheduler_auth = { type = "token", token = "YOUR_TOKEN_HERE" } @@ -153,10 +153,10 @@ provides no security at all. Put the following in your scheduler config file: ```toml -server_auth = { type = "DANGEROUSLY_INSECURE" } +worker_auth = { type = "DANGEROUSLY_INSECURE" } ``` -Put the following in your server config file: +Put the following in your worker config file: ```toml scheduler_auth = { type = "DANGEROUSLY_INSECURE" } @@ -164,25 +164,25 @@ scheduler_auth = { type = "DANGEROUSLY_INSECURE" } Done! -### Client Trust +### Coordinator Trust -If a client is malicious, they can cause a DoS of distributed cachepot servers or +If a client is malicious, they can cause a DoS of distributed cachepot workers or explore ways to escape the build sandbox. To protect against this, clients must be authenticated. Each client will use an authentication token for the initial job allocation request to the scheduler. A successful allocation will return a job token that is used -to authorise requests to the appropriate server for that specific job. +to authorise requests to the appropriate worker for that specific job. -This job token is a JWT HS256 token of the job id, signed with a server key. -The key for each server is randomly generated on server startup and given to -the scheduler during registration. This means that the server can verify users -without either a) adding client authentication to every server or b) needing -secret transfer between scheduler and server on every job allocation. +This job token is a JWT HS256 token of the job id, signed with a worker key. +The key for each worker is randomly generated on worker startup and given to +the scheduler during registration. This means that the worker can verify users +without either a) adding coordinator authentication to every worker or b) needing +secret transfer between scheduler and worker on every job allocation. #### OAuth2 -This is a group of similar methods for achieving the same thing - the client +This is a group of similar methods for achieving the same thing - the coordinator retrieves a token from an OAuth2 service, and then submits it to the scheduler which has a few different options for performing validation on that token. @@ -246,7 +246,7 @@ auth = { type = "token", token = "YOUR_TOKEN_HERE" } Done! -#### Insecure (bad idea) +#### Insecure (bad idea, again) *This route is not recommended* @@ -268,27 +268,27 @@ Done! ### Eavesdropping and Tampering Protection -If third parties can see traffic to the servers, source code can be leaked. If third -parties can modify traffic to and from the servers or the scheduler, they can cause +If third parties can see traffic to the workers, source code can be leaked. If third +parties can modify traffic to and from the workers or the scheduler, they can cause the client to receive malicious compiled objects. Securing communication with the scheduler is the responsibility of the cachepot cluster -administrator - it is recommended to put a webserver with a HTTPS certificate in front +administrator - it is recommended to put a webworker with a HTTPS certificate in front of the scheduler and instruct clients to configure their `scheduler_url` with the -appropriate `https://` address. The scheduler will verify the server's IP in this -configuration by inspecting the `X-Real-IP` header's value, if present. The webserver +appropriate `https://` address. The scheduler will verify the worker's IP in this +configuration by inspecting the `X-Real-IP` header's value, if present. The webworker used in this case should be configured to set this header to the appropriate value. -Securing communication with the server is performed automatically - HTTPS certificates -are generated dynamically on server startup and communicated to the scheduler during +Securing communication with the worker is performed automatically - HTTPS certificates +are generated dynamically on worker startup and communicated to the scheduler during the heartbeat. If a client does not have the appropriate certificate for communicating -securely with a server (after receiving a job allocation from the scheduler), the +securely with a worker (after receiving a job allocation from the scheduler), the certificate will be requested from the scheduler. -# Building the Distributed Server Binaries +# Building the Distributed Worker Binaries Until these binaries [are included in releases](https://github.com/paritytech/cachepot/issues/393) I've put together a Docker container that can be used to easily build a release binary: ```toml -docker run -ti --rm -v $PWD:/cachepot luser/cachepot-musl-build:0.1 /bin/bash -c "cd /cachepot; cargo build --release --target x86_64-unknown-linux-musl --features=dist-server && strip target/x86_64-unknown-linux-musl/release/cachepot-dist && cd target/x86_64-unknown-linux-musl/release/ && tar czf cachepot-dist.tar.gz cachepot-dist" +docker run -ti --rm -v $PWD:/cachepot luser/cachepot-musl-build:0.1 /bin/bash -c "cd /cachepot; cargo build --release --target x86_64-unknown-linux-musl --features=dist-worker && strip target/x86_64-unknown-linux-musl/release/cachepot-dist && cd target/x86_64-unknown-linux-musl/release/ && tar czf cachepot-dist.tar.gz cachepot-dist" ``` diff --git a/docs/book/src/dist/quickstart.md b/docs/book/src/dist/quickstart.md index cbb5cf6e..e8987efe 100644 --- a/docs/book/src/dist/quickstart.md +++ b/docs/book/src/dist/quickstart.md @@ -5,39 +5,39 @@ macOS and Windows clients are supported but have seen significantly less testing ## Get cachepot binaries -Either download pre-built cachepot binaries (not currently available), or build cachepot locally with the `dist-client` and `dist-server` features enabled: +Either download pre-built cachepot binaries (not currently available), or build cachepot locally with the `dist-client` and `dist-worker` features enabled: ```sh -cargo build --release --features="dist-client dist-server" +cargo build --release --features="dist-client dist-worker" ``` -The `target/release/cachepot` binary will be used on the client, and the `target/release/cachepot-dist` binary will be used on the scheduler and build server. +The `target/release/cachepot` binary will be used on the client, and the `target/release/cachepot-dist` binary will be used on the scheduler and build worker. If you're only planning to use the client, it is enabled by default, so just `cargo install cachepot` should do the trick. ## Configure a scheduler -If you're adding a server to a cluster that has already be set up, skip ahead to [configuring a build server](#configure-a-build-server). +If you're adding a worker to a cluster that has already be set up, skip ahead to [configuring a build worker](#configure-a-build-coordinator). -The scheduler is a daemon that manages compile request from clients and parcels them out to build servers. You only need one of these per cachepot setup. Currently only Linux is supported for running the scheduler. +The scheduler is a daemon that manages compile request from clients and parcels them out to build workers. You only need one of these per cachepot setup. Currently only Linux is supported for running the scheduler. -Create a scheduler.conf file to configure client/server authentication. A minimal example looks like: +Create a scheduler.conf file to configure client/worker authentication. A minimal example looks like: ```toml # The socket address the scheduler will listen on. It's strongly recommended -# to listen on localhost and put a HTTPS server in front of it. +# to listen on localhost and put a HTTPS worker in front of it. public_addr = "127.0.0.1:10600" [client_auth] type = "token" token = "my client token" -[server_auth] +[worker_auth] type = "jwt_hs256" secret_key = "my secret key" ``` -Mozilla build servers will typically require clients to be authenticated with the +Mozilla build workers will typically require clients to be authenticated with the [Mozilla identity system](https://github.com/mozilla-iam/mozilla-iam). To configure for scheduler for this, the `client_auth` section should be as follows @@ -57,15 +57,15 @@ Start the scheduler by running: cachepot-dist scheduler --config scheduler.conf ``` -Like the local server, the scheduler process will daemonize itself unless `CACHEPOT_NO_DAEMON=1` is set. If the scheduler fails to start you may need to set `RUST_LOG=trace` when starting it to get useful diagnostics (or to get less noisy logs: `RUST_LOG=cachepot=trace,cachepot-dist=trace` ). +Like the local worker, the scheduler process will daemonize itself unless `CACHEPOT_NO_DAEMON=1` is set. If the scheduler fails to start you may need to set `RUST_LOG=trace` when starting it to get useful diagnostics (or to get less noisy logs: `RUST_LOG=cachepot=trace,cachepot-dist=trace` ). -### Configure a build server +### Configure a build worker -A build server communicates with the scheduler and executes compiles requested by clients. Only Linux is supported for running a build server, but executing cross-compile requests from macOS/Windows clients is supported. +A build worker communicates with the scheduler and executes compiles requested by clients. Only Linux is supported for running a build worker, but executing cross-compile requests from macOS/Windows clients is supported. -The build server requires [bubblewrap](https://github.com/projectatomic/bubblewrap) to sandbox execution, at least version 0.3.0. Verify your version of bubblewrap *before* attempting to run the server. On Ubuntu 18.10+ you can `apt install bubblewrap` to install it. If you build from source you will need to first install your distro's equivalent of the `libcap-dev` package. +The build worker requires [bubblewrap](https://github.com/projectatomic/bubblewrap) to sandbox execution, at least version 0.3.0. Verify your version of bubblewrap *before* attempting to run the worker. On Ubuntu 18.10+ you can `apt install bubblewrap` to install it. If you build from source you will need to first install your distro's equivalent of the `libcap-dev` package. -Create a server.conf file to configure authentication, storage locations, network addresses and the path to bubblewrap. A minimal example looks like: +Create a worker.conf file to configure authentication, storage locations, network addresses and the path to bubblewrap. A minimal example looks like: ```toml # This is where client toolchains will be stored. @@ -76,7 +76,7 @@ cache_dir = "/tmp/toolchains" # A public IP address and port that clients will use to connect to this builder. public_addr = "192.168.1.1:10501" # The URL used to connect to the scheduler (should use https, given an ideal -# setup of a HTTPS server in front of the scheduler) +# setup of a HTTPS worker in front of the scheduler) scheduler_url = "https://192.168.1.1" [builder] @@ -88,22 +88,22 @@ bwrap_path = "/usr/bin/bwrap" [scheduler_auth] type = "jwt_token" -# This will be generated by the `generate-jwt-hs256-server-token` command or +# This will be generated by the `generate-jwt-hs256-worker-token` command or # provided by an administrator of the cachepot cluster. -token = "my server's token" +token = "my worker's token" ``` -Due to `bubblewrap` requirements currently the build server *must* be run as root. Start the build server by running: +Due to `bubblewrap` requirements currently the build worker *must* be run as root. Start the build worker by running: ```toml -sudo cachepot-dist server --config server.conf +sudo cachepot-dist worker --config worker.conf ``` -As with the scheduler, if the build server fails to start you may need to set `RUST_LOG=trace` to get useful diagnostics. (or to get less noisy logs: `RUST_LOG=cachepot=trace,cachepot-dist=trace` ). +As with the scheduler, if the build worker fails to start you may need to set `RUST_LOG=trace` to get useful diagnostics. (or to get less noisy logs: `RUST_LOG=cachepot=trace,cachepot-dist=trace` ). ## Configure a client -A client uses `cachepot` to wrap compile commands, communicates with the scheduler to find available build servers, and communicates with build servers to execute the compiles and receive the results. +A client uses `cachepot` to wrap compile commands, communicates with the scheduler to find available build workers, and communicates with build workers to execute the compiles and receive the results. Clients that are not targeting linux64 require the `icecc-create-env` script or should be provided with an archive. `icecc-create-env` is part of `icecream` for packaging toolchains. You can install icecream to get this script (`apt install icecc` on Ubuntu), or download it from the git repository and place it in your `PATH`: `curl https://raw.githubusercontent.com/icecc/icecream/master/client/icecc-create-env.in > icecc-create-env && chmod +x icecc-create-env`. See [using custom toolchains](#using-custom-toolchains). @@ -112,10 +112,10 @@ Create a client config file in `~/.config/cachepot/config` (on Linux), `~/Librar ```toml [dist] # The URL used to connect to the scheduler (should use https, given an ideal -# setup of a HTTPS server in front of the scheduler) +# setup of a HTTPS worker in front of the scheduler) scheduler_url = "https://192.168.1.1" # Used for mapping local toolchains to remote cross-compile toolchains. Empty in -# this example where the client and build server are both Linux. +# this example where the client and build worker are both Linux. toolchains = [] # Size of the local toolchain cache, in bytes (5GB here, 10GB if unspecified). toolchain_cache_size = 5368709120 @@ -126,7 +126,7 @@ type = "token" token = "my client token" ``` -Clients using Mozilla build servers should configure their `dist.auth` section as follows: +Clients using Mozilla build workers should configure their `dist.auth` section as follows: ```toml [dist.auth] @@ -137,17 +137,17 @@ And retrieve a token from the Mozilla identity service by running `cachepot --di and following the instructions. Completing this process will retrieve and cache a token valid for 7 days. -Make sure to run `cachepot --stop-server` and `cachepot --start-server` if cachepot was +Make sure to run `cachepot --stop-coordinator` and `cachepot --start-coordinator` if cachepot was running before changing the configuration. You can check the status with `cachepot --dist-status`, it should say something like: -```toml +```sh $ cachepot --dist-status -{"SchedulerStatus":["https://cachepot1.corpdmz.ber3.mozilla.com/",{"num_servers":3,"num_cpus":56,"in_progress":24}]} +{"SchedulerStatus":["https://cachepot1.corpdmz.ber3.mozilla.com/",{"num_workers":3,"num_cpus":56,"in_progress":24}]} ``` -For diagnostics, advice for scheduler/server does not work with `RUSTC_WRAPPER`. Therefore following approach is advised: `CACHEPOT_LOG=trace RUSTC_WRAPPER=... cargo build`. +For diagnostics, advice for scheduler/worker does not work with `RUSTC_WRAPPER`. Therefore following approach is advised: `CACHEPOT_LOG=trace RUSTC_WRAPPER=... cargo build`. ### Using custom toolchains @@ -213,21 +213,21 @@ may be required: - Some cross compilers may not understand some intrinsics used in more recent macOS SDKs. The 10.11 SDK is known to work. -## Making a build server start at boot time +## Making a build worker start at boot time -It is very easy with a systemd service to spawn the server on boot. +It is very easy with a systemd service to spawn the worker on boot. -You can create a service file like `/etc/systemd/system/cachepot-server.service` +You can create a service file like `/etc/systemd/system/cachepot-worker.service` with the following contents: ```toml [Unit] -Description=cachepot-dist server +Description=cachepot-dist worker Wants=network-online.target After=network-online.target [Service] -ExecStart=/path/to/cachepot-dist server --config /path/to/server.conf +ExecStart=/path/to/cachepot-dist worker --config /path/to/worker.conf [Install] WantedBy=multi-user.target @@ -237,8 +237,8 @@ WantedBy=multi-user.target you're in a distro with SELinux enabled (like Fedora), you may need to use an `ExecStart` line like: -```toml -ExecStart=/bin/bash -c "/home//path/to/cachepot-dist server --config /home//path/to/server.conf" +```sh +ExecStart=/bin/bash -c "/home//path/to/cachepot-dist worker --config /home//path/to/worker.conf" ``` This is because SELinux by default prevents services from running binaries in @@ -251,7 +251,7 @@ like: ```sh systemctl daemon-reload -systemctl start cachepot-server +systemctl start cachepot-worker systemctl status # And check it's fine. -systemctl enable cachepot-server # This enables the service on boot +systemctl enable cachepot-worker # This enables the service on boot ``` diff --git a/scripts/extratest.sh b/scripts/extratest.sh index 7add858e..eb5ca967 100755 --- a/scripts/extratest.sh +++ b/scripts/extratest.sh @@ -24,36 +24,36 @@ wintarget() { # all-windows doesn't work as redis-rs build.rs has issues (checks for cfg!(unix)) if [ "$1" = checkall ]; then - $CARGO check --target $target --all-targets --features 'all dist-client dist-server dist-tests' - $CARGO check --target $target --all-targets --features 'all dist-client dist-server' + $CARGO check --target $target --all-targets --features 'all dist-client dist-worker dist-tests' + $CARGO check --target $target --all-targets --features 'all dist-client dist-worker' $CARGO check --target $target --all-targets --features 'all dist-client dist-tests' - $CARGO check --target $target --all-targets --features 'all dist-server dist-tests' + $CARGO check --target $target --all-targets --features 'all dist-worker dist-tests' $CARGO check --target $target --all-targets --features 'all dist-client' - $CARGO check --target $target --all-targets --features 'all dist-server' + $CARGO check --target $target --all-targets --features 'all dist-worker' $CARGO check --target $target --all-targets --features 'all dist-tests' $CARGO check --target $target --all-targets --features 'all' - $CARGO check --target $target --all-targets --features 'dist-client dist-server dist-tests' - $CARGO check --target $target --all-targets --features 'dist-client dist-server' + $CARGO check --target $target --all-targets --features 'dist-client dist-worker dist-tests' + $CARGO check --target $target --all-targets --features 'dist-client dist-worker' $CARGO check --target $target --all-targets --features 'dist-client dist-tests' - $CARGO check --target $target --all-targets --features 'dist-server dist-tests' + $CARGO check --target $target --all-targets --features 'dist-worker dist-tests' $CARGO check --target $target --all-targets --features 'dist-client' - $CARGO check --target $target --all-targets --features 'dist-server' + $CARGO check --target $target --all-targets --features 'dist-worker' $CARGO check --target $target --all-targets --features 'dist-tests' $CARGO check --target $target --all-targets --features '' - $CARGO check --target $target --all-targets --no-default-features --features 'all dist-client dist-server dist-tests' - $CARGO check --target $target --all-targets --no-default-features --features 'all dist-client dist-server' + $CARGO check --target $target --all-targets --no-default-features --features 'all dist-client dist-worker dist-tests' + $CARGO check --target $target --all-targets --no-default-features --features 'all dist-client dist-worker' $CARGO check --target $target --all-targets --no-default-features --features 'all dist-client dist-tests' - $CARGO check --target $target --all-targets --no-default-features --features 'all dist-server dist-tests' + $CARGO check --target $target --all-targets --no-default-features --features 'all dist-worker dist-tests' $CARGO check --target $target --all-targets --no-default-features --features 'all dist-client' - $CARGO check --target $target --all-targets --no-default-features --features 'all dist-server' + $CARGO check --target $target --all-targets --no-default-features --features 'all dist-worker' $CARGO check --target $target --all-targets --no-default-features --features 'all dist-tests' $CARGO check --target $target --all-targets --no-default-features --features 'all' - $CARGO check --target $target --all-targets --no-default-features --features 'dist-client dist-server dist-tests' - $CARGO check --target $target --all-targets --no-default-features --features 'dist-client dist-server' + $CARGO check --target $target --all-targets --no-default-features --features 'dist-client dist-worker dist-tests' + $CARGO check --target $target --all-targets --no-default-features --features 'dist-client dist-worker' $CARGO check --target $target --all-targets --no-default-features --features 'dist-client dist-tests' - $CARGO check --target $target --all-targets --no-default-features --features 'dist-server dist-tests' + $CARGO check --target $target --all-targets --no-default-features --features 'dist-worker dist-tests' $CARGO check --target $target --all-targets --no-default-features --features 'dist-client' - $CARGO check --target $target --all-targets --no-default-features --features 'dist-server' + $CARGO check --target $target --all-targets --no-default-features --features 'dist-worker' $CARGO check --target $target --all-targets --no-default-features --features 'dist-tests' $CARGO check --target $target --all-targets --no-default-features --features '' wintarget @@ -89,7 +89,7 @@ elif [ "$1" = test ]; then fi set -x - RUST_BACKTRACE=1 $CARGO test $NORUN --target $target --features 'all dist-client dist-server dist-tests' $VERBOSE -- $NOCAPTURE $TESTTHREADS test_dist_nobuilder + RUST_BACKTRACE=1 $CARGO test $NORUN --target $target --features 'all dist-client dist-worker dist-tests' $VERBOSE -- $NOCAPTURE $TESTTHREADS test_dist_nobuilder else echo invalid command diff --git a/snap/hooks/configure b/snap/hooks/configure index 24ddd175..a4fd8be0 100755 --- a/snap/hooks/configure +++ b/snap/hooks/configure @@ -50,4 +50,4 @@ done mv -f ${CONFIG_FILE}.new $CONFIG_FILE || /bin/true # Restart the server to pick up new values -systemctl restart snap.cachepot.cachepot-server.service +systemctl restart snap.cachepot.cachepot-coordinator.service diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml index 41108d99..395f0abd 100644 --- a/snap/snapcraft.yaml +++ b/snap/snapcraft.yaml @@ -17,7 +17,7 @@ apps: cachepot: command: bin/cachepot.wrapper - cachepot-server: + cachepot-coordinator: environment: CACHEPOT_START_SERVER: 1 CACHEPOT_NO_DAEMON: 1 diff --git a/src/bin/cachepot-dist/build.rs b/src/bin/cachepot-dist/build.rs index f6322fcd..d52f22e1 100644 --- a/src/bin/cachepot-dist/build.rs +++ b/src/bin/cachepot-dist/build.rs @@ -858,7 +858,7 @@ impl DockerBuilder { } impl BuilderIncoming for DockerBuilder { - // From Server + // From Coordinator fn run_build( &self, tc: Toolchain, diff --git a/src/bin/cachepot-dist/main.rs b/src/bin/cachepot-dist/main.rs index a75a1075..d1307a68 100644 --- a/src/bin/cachepot-dist/main.rs +++ b/src/bin/cachepot-dist/main.rs @@ -8,14 +8,13 @@ extern crate serde_derive; use anyhow::{bail, Context, Error, Result}; use async_trait::async_trait; use cachepot::config::{ - scheduler as scheduler_config, server as server_config, ServerUrl, INSECURE_DIST_CLIENT_TOKEN, + scheduler as scheduler_config, worker, WorkerUrl, INSECURE_DIST_CLIENT_TOKEN, }; use cachepot::dist::{ - self, AllocJobResult, AssignJobResult, BuilderIncoming, CompileCommand, HeartbeatServerResult, + self, AllocJobResult, AssignJobResult, BuilderIncoming, CompileCommand, HeartbeatWorkerResult, InputsReader, JobAlloc, JobAuthorizer, JobComplete, JobId, JobState, RunJobResult, - SchedulerIncoming, SchedulerOutgoing, SchedulerStatusResult, ServerIncoming, ServerNonce, - ServerOutgoing, SubmitToolchainResult, TcCache, Toolchain, ToolchainReader, - UpdateJobStateResult, + SchedulerIncoming, SchedulerOutgoing, SchedulerStatusResult, SubmitToolchainResult, TcCache, + Toolchain, ToolchainReader, UpdateJobStateResult, WorkerIncoming, WorkerNonce, WorkerOutgoing, }; use cachepot::util::daemonize; use jsonwebtoken as jwt; @@ -35,19 +34,19 @@ use syslog::Facility; mod build; mod token_check; -pub const INSECURE_DIST_SERVER_TOKEN: &str = "dangerously_insecure_server"; +pub const INSECURE_DIST_WORKER_TOKEN: &str = "dangerously_insecure_worker"; #[derive(StructOpt)] enum Command { Auth(AuthSubcommand), Scheduler(SchedulerSubcommand), - Server(ServerSubcommand), + Worker(WorkerSubcommand), } #[derive(StructOpt)] #[structopt(rename_all = "kebab-case")] struct SchedulerSubcommand { - /// Use the server config file at PATH + /// Use the scheduler config file at PATH #[structopt(long, value_name = "PATH")] config: PathBuf, @@ -58,8 +57,8 @@ struct SchedulerSubcommand { #[derive(StructOpt)] #[structopt(rename_all = "kebab-case")] -struct ServerSubcommand { - /// Use the server config file at PATH +struct WorkerSubcommand { + /// Use the worker config file at PATH #[structopt(long, value_name = "PATH")] config: PathBuf, @@ -78,7 +77,7 @@ struct GenerateSharedToken { #[derive(StructOpt)] #[structopt(rename_all = "kebab-case")] -struct GenerateJwtHS256ServerToken { +struct GenerateJwtHS256WorkerToken { /// Use the key from the scheduler config file #[structopt(long, value_name = "PATH")] config: Option, @@ -87,9 +86,9 @@ struct GenerateJwtHS256ServerToken { #[structopt(long, value_name = "KEY", required_unless = "config")] secret_key: Option, - /// Generate a key for the specified server - #[structopt(long, value_name = "SERVER_ADDR", required_unless = "secret_key")] - server: ServerUrl, + /// Generate a key for the specified worker + #[structopt(long, value_name = "WORKER_ADDR", required_unless = "secret_key")] + worker: WorkerUrl, } #[derive(StructOpt)] @@ -97,7 +96,7 @@ struct GenerateJwtHS256ServerToken { enum AuthSubcommand { GenerateSharedToken(GenerateSharedToken), GenerateJwtHS256Key, - GenerateJwtHS256ServerToken(GenerateJwtHS256ServerToken), + GenerateJwtHS256WorkerToken(GenerateJwtHS256WorkerToken), } // Only supported on x86_64 Linux machines @@ -130,45 +129,45 @@ fn check_init_syslog(name: &str, level: &str) -> Result<()> { Ok(()) } -fn create_server_token(server_id: ServerUrl, auth_token: &str) -> String { - format!("{} {}", server_id.to_string(), auth_token) +fn create_worker_token(worker_url: WorkerUrl, auth_token: &str) -> String { + format!("{} {}", worker_url, auth_token) } -fn check_server_token(server_token: &str, auth_token: &str) -> Option { - let mut split = server_token.splitn(2, |c| c == ' '); - let server_addr = split.next()?; +fn check_worker_token(worker_token: &str, auth_token: &str) -> Option { + let mut split = worker_token.splitn(2, |c| c == ' '); + let worker_addr = split.next()?; match split.next() { - Some(t) if t == auth_token => Some(ServerUrl::from_str(server_addr).ok()?), + Some(t) if t == auth_token => Some(WorkerUrl::from_str(worker_addr).ok()?), Some(_) | None => None, } } #[derive(Serialize, Deserialize)] #[serde(deny_unknown_fields)] -struct ServerJwt { - server_id: ServerUrl, +struct WorkerJwt { + worker_url: WorkerUrl, } -fn create_jwt_server_token( - server_id: ServerUrl, +fn create_jwt_worker_token( + worker_url: WorkerUrl, header: &jwt::Header, key: &[u8], ) -> Result { let key = jwt::EncodingKey::from_secret(key); - jwt::encode(header, &ServerJwt { server_id }, &key).map_err(Into::into) + jwt::encode(header, &WorkerJwt { worker_url }, &key).map_err(Into::into) } -fn dangerous_insecure_extract_jwt_server_token(server_token: &str) -> Option { - jwt::dangerous_insecure_decode::(server_token) - .map(|res| res.claims.server_id) +fn dangerous_insecure_extract_jwt_worker_token(worker_token: &str) -> Option { + jwt::dangerous_insecure_decode::(worker_token) + .map(|res| res.claims.worker_url) .ok() } -fn check_jwt_server_token( - server_token: &str, +fn check_jwt_worker_token( + worker_token: &str, key: &[u8], validation: &jwt::Validation, -) -> Option { +) -> Option { let key = jwt::DecodingKey::from_secret(key); - jwt::decode::(server_token, &key, validation) - .map(|res| res.claims.server_id) + jwt::decode::(worker_token, &key, validation) + .map(|res| res.claims.worker_url) .ok() } @@ -182,21 +181,21 @@ async fn run(command: Command) -> Result { println!("{}", base64::encode_config(&bytes, base64::URL_SAFE_NO_PAD)); Ok(0) } - Command::Auth(AuthSubcommand::GenerateJwtHS256ServerToken( - GenerateJwtHS256ServerToken { + Command::Auth(AuthSubcommand::GenerateJwtHS256WorkerToken( + GenerateJwtHS256WorkerToken { config, secret_key, - server, + worker, }, )) => { let header = jwt::Header::new(jwt::Algorithm::HS256); let secret_key = if let Some(config_path) = config { if let Some(config) = scheduler_config::from_path(&config_path)? { - match config.server_auth { - scheduler_config::ServerAuth::JwtHS256 { secret_key } => secret_key, - scheduler_config::ServerAuth::Insecure - | scheduler_config::ServerAuth::Token { token: _ } => { + match config.worker_auth { + scheduler_config::WorkerAuth::JwtHS256 { secret_key } => secret_key, + scheduler_config::WorkerAuth::Insecure + | scheduler_config::WorkerAuth::Token { token: _ } => { bail!("Scheduler not configured with JWT HS256") } } @@ -208,8 +207,8 @@ async fn run(command: Command) -> Result { }; let secret_key = base64::decode_config(&secret_key, base64::URL_SAFE_NO_PAD)?; - let token = create_jwt_server_token(server, &header, &secret_key) - .context("Failed to create server token")?; + let token = create_jwt_worker_token(worker, &header, &secret_key) + .context("Failed to create worker token")?; println!("{}", token); Ok(0) } @@ -226,7 +225,7 @@ async fn run(command: Command) -> Result { let scheduler_config::Config { public_addr, client_auth, - server_auth, + worker_auth, } = if let Some(config) = scheduler_config::from_path(&config)? { config } else { @@ -234,43 +233,44 @@ async fn run(command: Command) -> Result { }; if let Some(syslog) = syslog { - check_init_syslog("cachepot-buildserver", &syslog)?; + check_init_syslog("cachepot", &syslog)?; } - let check_client_auth: Box = match client_auth { - scheduler_config::ClientAuth::Insecure => Box::new(token_check::EqCheck::new( - INSECURE_DIST_CLIENT_TOKEN.to_owned(), - )), - scheduler_config::ClientAuth::Token { token } => { - Box::new(token_check::EqCheck::new(token)) - } - scheduler_config::ClientAuth::JwtValidate { - audience, - issuer, - jwks_url, - } => Box::new( - token_check::ValidJWTCheck::new(audience, issuer, &jwks_url) - .await - .context("Failed to create a checker for valid JWTs")?, - ), - scheduler_config::ClientAuth::Mozilla { required_groups } => { - Box::new(token_check::MozillaCheck::new(required_groups)) - } - scheduler_config::ClientAuth::ProxyToken { url, cache_secs } => { - Box::new(token_check::ProxyTokenCheck::new(url, cache_secs)) - } - }; + let checker_coordinator_auth: Box = + match client_auth { + scheduler_config::ClientAuth::Insecure => Box::new(token_check::EqCheck::new( + INSECURE_DIST_CLIENT_TOKEN.to_owned(), + )), + scheduler_config::ClientAuth::Token { token } => { + Box::new(token_check::EqCheck::new(token)) + } + scheduler_config::ClientAuth::JwtValidate { + audience, + issuer, + jwks_url, + } => Box::new( + token_check::ValidJWTCheck::new(audience, issuer, &jwks_url) + .await + .context("Failed to create a checker for valid JWTs")?, + ), + scheduler_config::ClientAuth::Mozilla { required_groups } => { + Box::new(token_check::MozillaCheck::new(required_groups)) + } + scheduler_config::ClientAuth::ProxyToken { url, cache_secs } => { + Box::new(token_check::ProxyTokenCheck::new(url, cache_secs)) + } + }; - let check_server_auth: dist::http::ServerAuthCheck = match server_auth { - scheduler_config::ServerAuth::Insecure => { - warn!("Scheduler starting with DANGEROUSLY_INSECURE server authentication"); - let token = INSECURE_DIST_SERVER_TOKEN; - Arc::new(move |server_token| check_server_token(server_token, token)) + let check_worker_auth: dist::http::WorkerAuthCheck = match worker_auth { + scheduler_config::WorkerAuth::Insecure => { + warn!("Scheduler starting with DANGEROUSLY_INSECURE worker authentication"); + let token = INSECURE_DIST_WORKER_TOKEN; + Arc::new(move |worker_token| check_worker_token(worker_token, token)) } - scheduler_config::ServerAuth::Token { token } => { - Arc::new(move |server_token| check_server_token(server_token, &token)) + scheduler_config::WorkerAuth::Token { token } => { + Arc::new(move |worker_token| check_worker_token(worker_token, &token)) } - scheduler_config::ServerAuth::JwtHS256 { secret_key } => { + scheduler_config::WorkerAuth::JwtHS256 { secret_key } => { let secret_key = base64::decode_config(&secret_key, base64::URL_SAFE_NO_PAD) .context("Secret key base64 invalid")?; if secret_key.len() != 256 / 8 { @@ -285,8 +285,8 @@ async fn run(command: Command) -> Result { sub: None, algorithms: vec![jwt::Algorithm::HS256], }; - Arc::new(move |server_token| { - check_jwt_server_token(server_token, &secret_key, &validation) + Arc::new(move |worker_token| { + check_jwt_worker_token(worker_token, &secret_key, &validation) }) } }; @@ -296,35 +296,35 @@ async fn run(command: Command) -> Result { let http_scheduler = dist::http::Scheduler::new( public_addr.to_url().clone(), scheduler, - check_client_auth, - check_server_auth, + checker_coordinator_auth, + check_worker_auth, ); void::unreachable(http_scheduler.start().await?); } - Command::Server(ServerSubcommand { config, syslog }) => { - let server_config::Config { + Command::Worker(WorkerSubcommand { config, syslog }) => { + let worker::Config { builder, cache_dir, public_addr, scheduler_url, scheduler_auth, toolchain_cache_size, - } = if let Some(config) = server_config::from_path(&config)? { + } = if let Some(config) = worker::from_path(&config)? { config } else { bail!("Could not load config!"); }; if let Some(syslog) = syslog { - check_init_syslog("cachepot-build-server", &syslog)?; + check_init_syslog("cachepot-build-coordinator", &syslog)?; } let builder: Box = match builder { - server_config::BuilderType::Docker => { + worker::BuilderType::Docker => { Box::new(build::DockerBuilder::new().context("Docker builder failed to start")?) } - server_config::BuilderType::Overlay { + worker::BuilderType::Overlay { bwrap_path, build_dir, } => Box::new( @@ -333,37 +333,35 @@ async fn run(command: Command) -> Result { ), }; - let server_id = public_addr.clone(); + let worker_url = public_addr.clone(); let scheduler_auth = match scheduler_auth { - server_config::SchedulerAuth::Insecure => { - warn!("Server starting with DANGEROUSLY_INSECURE scheduler authentication"); - create_server_token(server_id, INSECURE_DIST_SERVER_TOKEN) - } - server_config::SchedulerAuth::Token { token } => { - create_server_token(server_id, &token) + worker::SchedulerAuth::Insecure => { + warn!("Worker starting with DANGEROUSLY_INSECURE scheduler authentication"); + create_worker_token(worker_url, INSECURE_DIST_WORKER_TOKEN) } - server_config::SchedulerAuth::JwtToken { token } => { - let token_server_id: ServerUrl = - dangerous_insecure_extract_jwt_server_token(&token) + worker::SchedulerAuth::Token { token } => create_worker_token(worker_url, &token), + worker::SchedulerAuth::JwtToken { token } => { + let token_worker_url: WorkerUrl = + dangerous_insecure_extract_jwt_worker_token(&token) .context("Could not decode scheduler auth jwt")?; - if token_server_id != server_id { + if token_worker_url != worker_url { bail!( "JWT server id ({:?}) did not match configured server id ({:?})", - token_server_id, - server_id + token_worker_url, + worker_url ) } token } }; - let server = Server::new(builder, &cache_dir, toolchain_cache_size) + let worker = Worker::new(builder, &cache_dir, toolchain_cache_size) .context("Failed to create cachepot server instance")?; - let http_server = dist::http::Server::new( + let http_server = dist::http::Worker::new( public_addr.0.to_url().clone(), scheduler_url.to_url().clone(), scheduler_auth, - server, + worker, ) .context("Failed to create cachepot HTTP server instance")?; void::unreachable(http_server.start().await?) @@ -381,13 +379,13 @@ fn init_logging() { } const MAX_PER_CORE_LOAD: f64 = 10f64; -const SERVER_REMEMBER_ERROR_TIMEOUT: Duration = Duration::from_secs(300); +const WORKER_REMEMBER_ERROR_TIMEOUT: Duration = Duration::from_secs(300); const UNCLAIMED_PENDING_TIMEOUT: Duration = Duration::from_secs(300); const UNCLAIMED_READY_TIMEOUT: Duration = Duration::from_secs(60); #[derive(Clone)] struct JobDetail { - server_id: ServerUrl, + worker_url: WorkerUrl, state: JobState, } @@ -399,10 +397,10 @@ pub struct Scheduler { // Currently running jobs, can never be Complete jobs: Mutex>, - servers: Mutex>, + workers: Mutex>, } -struct ServerDetails { +struct WorkerDetails { jobs_assigned: HashSet, // Jobs assigned that haven't seen a state change. Can only be pending // or ready. @@ -410,7 +408,7 @@ struct ServerDetails { last_seen: Instant, last_error: Option, num_cpus: usize, - server_nonce: ServerNonce, + worker_nonce: WorkerNonce, job_authorizer: Box, } @@ -419,36 +417,36 @@ impl Scheduler { Scheduler { job_count: AtomicUsize::new(0), jobs: Mutex::new(BTreeMap::new()), - servers: Mutex::new(HashMap::new()), + workers: Mutex::new(HashMap::new()), } } - fn prune_servers( + fn prune_workers( &self, - servers: &mut MutexGuard>, + workers: &mut MutexGuard>, jobs: &mut MutexGuard>, ) { let now = Instant::now(); let mut dead_servers = Vec::new(); - for (server_id, details) in servers.iter() { + for (worker_url, details) in workers.iter() { if now.duration_since(details.last_seen) > dist::http::HEARTBEAT_TIMEOUT { - dead_servers.push(server_id.clone()); + dead_servers.push(worker_url.clone()); } } - for server_id in dead_servers { + for worker_url in dead_servers { warn!( - "Server {} appears to be dead, pruning it in the scheduler", - server_id + "Worker {} appears to be dead, pruning it in the scheduler", + worker_url ); - let server_details = servers - .remove(&server_id) - .expect("server went missing from map"); - for job_id in server_details.jobs_assigned { + let worker_details = workers + .remove(&worker_url) + .expect("worker went missing from map"); + for job_id in worker_details.jobs_assigned { warn!( - "Non-terminated job {} was cleaned up in server pruning", + "Non-terminated job {} was cleaned up in worker pruning", job_id ); // A job may be missing here if it failed to allocate @@ -471,27 +469,27 @@ impl SchedulerIncoming for Scheduler { requester: &dyn SchedulerOutgoing, tc: Toolchain, ) -> Result { - let (job_id, server_id, auth) = { + let (job_id, worker_url, auth) = { // LOCKS - let mut servers = self.servers.lock().unwrap(); + let mut workers = self.workers.lock().unwrap(); let res = { let mut best = None; let mut best_err = None; let mut best_load: f64 = MAX_PER_CORE_LOAD; let now = Instant::now(); - for (server_id, details) in servers.iter_mut() { + for (worker_url, details) in workers.iter_mut() { let load = details.jobs_assigned.len() as f64 / details.num_cpus as f64; if let Some(last_error) = details.last_error { if load < MAX_PER_CORE_LOAD { - if now.duration_since(last_error) > SERVER_REMEMBER_ERROR_TIMEOUT { + if now.duration_since(last_error) > WORKER_REMEMBER_ERROR_TIMEOUT { details.last_error = None; } match best_err { Some(( _, - &mut ServerDetails { + &mut WorkerDetails { last_error: Some(best_last_err), .. }, @@ -499,25 +497,25 @@ impl SchedulerIncoming for Scheduler { if last_error < best_last_err { trace!( "Selected {:?}, its most recent error is {:?} ago", - server_id, + worker_url, now - last_error ); - best_err = Some((server_id.clone(), details)); + best_err = Some((worker_url.clone(), details)); } } _ => { trace!( "Selected {:?}, its most recent error is {:?} ago", - server_id, + worker_url, now - last_error ); - best_err = Some((server_id.clone(), details)); + best_err = Some((worker_url.clone(), details)); } } } } else if load < best_load { - best = Some((server_id.clone(), details)); - trace!("Selected {:?} as the server with the best load", server_id); + best = Some((worker_url.clone(), details)); + trace!("Selected {:?} as the worker with the best load", worker_url); best_load = load; if load == 0f64 { break; @@ -526,25 +524,25 @@ impl SchedulerIncoming for Scheduler { } // Assign the job to our best choice - if let Some((server_id, server_details)) = best.or(best_err) { + if let Some((worker_url, worker_details)) = best.or(best_err) { let job_count = self.job_count.fetch_add(1, Ordering::SeqCst) as u64; let job_id = JobId(job_count); - assert!(server_details.jobs_assigned.insert(job_id)); - assert!(server_details + assert!(worker_details.jobs_assigned.insert(job_id)); + assert!(worker_details .jobs_unclaimed .insert(job_id, Instant::now()) .is_none()); info!( - "Job {} created and will be assigned to server {:?}", - job_id, server_id + "Job {} created and will be assigned to server {}", + job_id, &worker_url ); - let auth = server_details + let auth = worker_details .job_authorizer .generate_token(job_id) .map_err(Error::from) .context("Could not create an auth token for this job")?; - Some((job_id, server_id, auth)) + Some((job_id, worker_url, auth)) } else { None } @@ -555,7 +553,7 @@ impl SchedulerIncoming for Scheduler { } else { let msg = format!( "Insufficient capacity across {} available servers", - servers.len() + workers.len() ); return Ok(AllocJobResult::Fail { msg }); } @@ -564,21 +562,21 @@ impl SchedulerIncoming for Scheduler { state, need_toolchain, } = requester - .do_assign_job(server_id.clone(), job_id, tc, auth.clone()) + .do_assign_job(worker_url.clone(), job_id, tc, auth.clone()) .await .with_context(|| { // LOCKS - let mut servers = self.servers.lock().unwrap(); - if let Some(entry) = servers.get_mut(&server_id) { + let mut workers = self.workers.lock().unwrap(); + if let Some(entry) = workers.get_mut(&worker_url) { entry.last_error = Some(Instant::now()); entry.jobs_unclaimed.remove(&job_id); if !entry.jobs_assigned.remove(&job_id) { - "assign job failed and job not known to the server" + "assign job failed and job not known to the worker" } else { - "assign job failed, job un-assigned from the server" + "assign job failed, job un-assigned from the worker" } } else { - "assign job failed and server not known" + "assign job failed and worker not known" } })?; { @@ -593,7 +591,7 @@ impl SchedulerIncoming for Scheduler { .insert( job_id, JobDetail { - server_id: server_id.clone(), + worker_url: worker_url.clone(), state } ) @@ -602,7 +600,7 @@ impl SchedulerIncoming for Scheduler { let job_alloc = JobAlloc { auth, job_id, - server_id: server_id.clone(), + worker_url: worker_url.clone(), }; Ok(AllocJobResult::Success { job_alloc, @@ -610,25 +608,25 @@ impl SchedulerIncoming for Scheduler { }) } - fn handle_heartbeat_server( + fn handle_heartbeat_worker( &self, - server_id: ServerUrl, - server_nonce: ServerNonce, + worker_url: WorkerUrl, + worker_nonce: WorkerNonce, num_cpus: usize, job_authorizer: Box, - ) -> Result { + ) -> Result { if num_cpus == 0 { bail!("Invalid number of CPUs (0) specified in heartbeat") } // LOCKS let mut jobs = self.jobs.lock().unwrap(); - let mut servers = self.servers.lock().unwrap(); + let mut workers = self.workers.lock().unwrap(); - self.prune_servers(&mut servers, &mut jobs); + self.prune_workers(&mut workers, &mut jobs); - match servers.get_mut(&server_id) { - Some(ref mut details) if details.server_nonce == server_nonce => { + match workers.get_mut(&worker_url) { + Some(ref mut details) if details.worker_nonce == worker_nonce => { let now = Instant::now(); details.last_seen = now; @@ -667,70 +665,70 @@ impl SchedulerIncoming for Scheduler { if !details.jobs_assigned.remove(&job_id) { warn!( "Stale job for server {} not assigned: {}", - server_id, job_id + &worker_url, job_id ); } if details.jobs_unclaimed.remove(&job_id).is_none() { - warn!("Unknown stale job for server {}: {}", server_id, job_id); + warn!("Unknown stale job for worker {}: {}", worker_url, job_id); } if jobs.remove(&job_id).is_none() { - warn!("Unknown stale job for server {}: {}", server_id, job_id); + warn!("Unknown stale job for worker {}: {}", worker_url, job_id); } } } - return Ok(HeartbeatServerResult { is_new: false }); + return Ok(HeartbeatWorkerResult { is_new: false }); } - Some(ref mut details) if details.server_nonce != server_nonce => { + Some(ref mut details) if details.worker_nonce != worker_nonce => { for job_id in details.jobs_assigned.iter() { if jobs.remove(job_id).is_none() { warn!( "Unknown job found when replacing server {}: {}", - server_id, job_id + worker_url, job_id ); } } } _ => (), } - info!("Registered new server {:?}", server_id); - servers.insert( - server_id, - ServerDetails { + info!("Registered new server {:?}", worker_url); + workers.insert( + worker_url, + WorkerDetails { last_seen: Instant::now(), last_error: None, jobs_assigned: HashSet::new(), jobs_unclaimed: HashMap::new(), num_cpus, - server_nonce, + worker_nonce, job_authorizer, }, ); - Ok(HeartbeatServerResult { is_new: true }) + Ok(HeartbeatWorkerResult { is_new: true }) } fn handle_update_job_state( &self, job_id: JobId, - server_id: ServerUrl, + worker_url: WorkerUrl, job_state: JobState, ) -> Result { // LOCKS let mut jobs = self.jobs.lock().unwrap(); - let mut servers = self.servers.lock().unwrap(); + let mut servers = self.workers.lock().unwrap(); if let btree_map::Entry::Occupied(mut entry) = jobs.entry(job_id) { let job_detail = entry.get(); - if job_detail.server_id != server_id { + if job_detail.worker_url != worker_url { bail!( "Job id {} is not registed on server {:?}", job_id, - server_id + worker_url ) } let now = Instant::now(); - let mut server_details = servers.get_mut(&server_id); + let mut server_details = servers.get_mut(&worker_url); if let Some(ref mut details) = server_details { details.last_seen = now; }; @@ -741,7 +739,7 @@ impl SchedulerIncoming for Scheduler { if let Some(details) = server_details { details.jobs_unclaimed.remove(&job_id); } else { - warn!("Job state updated, but server is not known to scheduler") + warn!("Job state updated, but worker is not known to scheduler") } entry.get_mut().state = job_state } @@ -750,7 +748,7 @@ impl SchedulerIncoming for Scheduler { if let Some(entry) = server_details { assert!(entry.jobs_assigned.remove(&job_id)) } else { - bail!("Job was marked as finished, but server is not known to scheduler") + bail!("Job was marked as finished, but worker is not known to scheduler") } } (from, to) => bail!("Invalid job state transition from {} to {}", from, to), @@ -765,9 +763,9 @@ impl SchedulerIncoming for Scheduler { fn handle_status(&self) -> Result { // LOCKS let mut jobs = self.jobs.lock().unwrap(); - let mut servers = self.servers.lock().unwrap(); + let mut servers = self.workers.lock().unwrap(); - self.prune_servers(&mut servers, &mut jobs); + self.prune_workers(&mut servers, &mut jobs); Ok(SchedulerStatusResult { num_servers: servers.len(), @@ -777,21 +775,21 @@ impl SchedulerIncoming for Scheduler { } } -pub struct Server { +pub struct Worker { builder: Box, cache: Mutex, job_toolchains: tokio::sync::Mutex>, } -impl Server { +impl Worker { pub fn new( builder: Box, cache_dir: &Path, toolchain_cache_size: u64, - ) -> Result { + ) -> Result { let cache = TcCache::new(&cache_dir.join("tc"), toolchain_cache_size) .context("Failed to create toolchain cache")?; - Ok(Server { + Ok(Worker { builder, cache: Mutex::new(cache), job_toolchains: tokio::sync::Mutex::new(HashMap::new()), @@ -800,7 +798,7 @@ impl Server { } #[async_trait] -impl ServerIncoming for Server { +impl WorkerIncoming for Worker { async fn handle_assign_job(&self, job_id: JobId, tc: Toolchain) -> Result { let need_toolchain = !self.cache.lock().unwrap().contains_toolchain(&tc); assert!(self @@ -822,7 +820,7 @@ impl ServerIncoming for Server { } async fn handle_submit_toolchain( &self, - requester: &dyn ServerOutgoing, + requester: &dyn WorkerOutgoing, job_id: JobId, tc_rdr: ToolchainReader<'_>, ) -> Result { @@ -850,7 +848,7 @@ impl ServerIncoming for Server { } async fn handle_run_job( &self, - requester: &dyn ServerOutgoing, + requester: &dyn WorkerOutgoing, job_id: JobId, command: CompileCommand, outputs: Vec, diff --git a/src/bin/cachepot-dist/token_check.rs b/src/bin/cachepot-dist/token_check.rs index 9d148462..48348080 100644 --- a/src/bin/cachepot-dist/token_check.rs +++ b/src/bin/cachepot-dist/token_check.rs @@ -1,7 +1,7 @@ use crate::jwt; use anyhow::{bail, Context, Result}; use async_trait::async_trait; -use cachepot::dist::http::{ClientAuthCheck, ClientVisibleMsg}; +use cachepot::dist::http::{CoordinatorAuthCheck, CoordinatorVisibleMsg}; use cachepot::util::RequestExt; use std::collections::HashMap; use std::result::Result as StdResult; @@ -53,13 +53,13 @@ pub struct EqCheck { } #[async_trait] -impl ClientAuthCheck for EqCheck { - async fn check(&self, token: &str) -> StdResult<(), ClientVisibleMsg> { +impl CoordinatorAuthCheck for EqCheck { + async fn check(&self, token: &str) -> StdResult<(), CoordinatorVisibleMsg> { if self.s == token { Ok(()) } else { warn!("User token {} != expected token {}", token, self.s); - Err(ClientVisibleMsg::from_nonsensitive( + Err(CoordinatorVisibleMsg::from_nonsensitive( "Fixed token mismatch".to_owned(), )) } @@ -84,11 +84,11 @@ pub struct MozillaCheck { } #[async_trait] -impl ClientAuthCheck for MozillaCheck { - async fn check(&self, token: &str) -> StdResult<(), ClientVisibleMsg> { +impl CoordinatorAuthCheck for MozillaCheck { + async fn check(&self, token: &str) -> StdResult<(), CoordinatorVisibleMsg> { self.check_mozilla(token).await.map_err(|e| { warn!("Mozilla token validation failed: {}", e); - ClientVisibleMsg::from_nonsensitive( + CoordinatorVisibleMsg::from_nonsensitive( "Failed to validate Mozilla OAuth token, run cachepot --dist-auth".to_owned(), ) }) @@ -165,7 +165,7 @@ impl MozillaCheck { } // The API didn't return a HTTP error code, let's check the response - let () = check_mozilla_profile(&user, &self.required_groups, &res_text) + check_mozilla_profile(&user, &self.required_groups, &res_text) .with_context(|| format!("Validation of the user profile failed for {}", user))?; // Validation success, cache the token @@ -248,13 +248,13 @@ pub struct ProxyTokenCheck { } #[async_trait] -impl ClientAuthCheck for ProxyTokenCheck { - async fn check(&self, token: &str) -> StdResult<(), ClientVisibleMsg> { +impl CoordinatorAuthCheck for ProxyTokenCheck { + async fn check(&self, token: &str) -> StdResult<(), CoordinatorVisibleMsg> { match self.check_token_with_forwarding(token).await { Ok(()) => Ok(()), Err(e) => { warn!("Proxying token validation failed: {}", e); - Err(ClientVisibleMsg::from_nonsensitive( + Err(CoordinatorVisibleMsg::from_nonsensitive( "Validation with token forwarding failed".to_owned(), )) } @@ -318,13 +318,13 @@ pub struct ValidJWTCheck { } #[async_trait] -impl ClientAuthCheck for ValidJWTCheck { - async fn check(&self, token: &str) -> StdResult<(), ClientVisibleMsg> { +impl CoordinatorAuthCheck for ValidJWTCheck { + async fn check(&self, token: &str) -> StdResult<(), CoordinatorVisibleMsg> { match self.check_jwt_validity(token) { Ok(()) => Ok(()), Err(e) => { warn!("JWT validation failed: {}", e); - Err(ClientVisibleMsg::from_nonsensitive( + Err(CoordinatorVisibleMsg::from_nonsensitive( "JWT could not be validated".to_owned(), )) } diff --git a/src/client.rs b/src/client.rs index 03cd5629..f6fa5164 100644 --- a/src/client.rs +++ b/src/client.rs @@ -21,34 +21,34 @@ use std::io::{self, BufReader, BufWriter, Read}; use std::net::TcpStream; /// A connection to an cachepot server. -pub struct ServerConnection { - /// A reader for the socket connected to the server. +pub struct CoordinatorConnection { + /// A reader for the socket connected to the coordinator. reader: BufReader, - /// A writer for the socket connected to the server. + /// A writer for the socket connected to the coordinator. writer: BufWriter, } -impl ServerConnection { +impl CoordinatorConnection { /// Create a new connection using `stream`. - pub fn new(stream: TcpStream) -> io::Result { + pub fn new(stream: TcpStream) -> io::Result { let writer = stream.try_clone()?; - Ok(ServerConnection { + Ok(CoordinatorConnection { reader: BufReader::new(stream), writer: BufWriter::new(writer), }) } - /// Send `request` to the server, read and return a `Response`. + /// Send `request` to the coordinator, read and return a `Response`. pub fn request(&mut self, request: Request) -> Result { - trace!("ServerConnection::request"); + trace!("CoordinatorConnection::request"); util::write_length_prefixed_bincode(&mut self.writer, request)?; - trace!("ServerConnection::request: sent request"); + trace!("CoordinatorConnection::request: sent request"); self.read_one_response() } - /// Read a single `Response` from the server. + /// Read a single `Response` from the coordinator. pub fn read_one_response(&mut self) -> Result { - trace!("ServerConnection::read_one_response"); + trace!("CoordinatorConnection::read_one_response"); let mut bytes = [0; 4]; self.reader .read_exact(&mut bytes) @@ -62,28 +62,30 @@ impl ServerConnection { } } -/// Establish a TCP connection to an cachepot server listening on `port`. -pub fn connect_to_server(port: u16) -> io::Result { - trace!("connect_to_server({})", port); +/// Establish a TCP connection to an cachepot coordinator listening on `port`. +pub fn connect_to_coordinator(port: u16) -> io::Result { + trace!("connect_to_coordinator({})", port); let stream = TcpStream::connect(("127.0.0.1", port))?; - ServerConnection::new(stream) + CoordinatorConnection::new(stream) } -/// Attempt to establish a TCP connection to an cachepot server listening on `port`. +/// Attempt to establish a TCP connection to an cachepot coordinator listening on `port`. /// /// If the connection fails, retry a few times. -pub fn connect_with_retry(port: u16) -> io::Result { +pub fn connect_with_retry(port: u16) -> io::Result { trace!("connect_with_retry({})", port); // TODOs: - // * Pass the server Child in here, so we can stop retrying + // * Pass the coordinator Child in here, so we can stop retrying // if the process exited. - // * Send a pipe handle to the server process so it can notify - // us once it starts the server instead of us polling. - match retry(Fixed::from_millis(500).take(10), || connect_to_server(port)) { + // * Send a pipe handle to the coordinator process so it can notify + // us once it starts the coordinator instead of us polling. + match retry(Fixed::from_millis(500).take(10), || { + connect_to_coordinator(port) + }) { Ok(conn) => Ok(conn), _ => Err(io::Error::new( io::ErrorKind::TimedOut, - "Connection to server timed out", + "Connection to coordinator timed out", )), } } diff --git a/src/cmdline.rs b/src/cmdline.rs index 1ecf4538..fe8506d3 100644 --- a/src/cmdline.rs +++ b/src/cmdline.rs @@ -60,11 +60,11 @@ pub struct Command2 { /// start background server #[structopt(long, group = "flags")] - start_server: bool, + start_coordinator: bool, /// stop background server #[structopt(long, group = "flags")] - stop_server: bool, + stop_coordinator: bool, /// show cache statistics #[structopt(short, long, group = "flags")] @@ -86,8 +86,13 @@ pub struct Command2 { )] package_toolchain: Vec, - #[structopt(long, hidden = true, group = "flags", env = "CACHEPOT_START_SERVER")] - internal_start_server: Option, + #[structopt( + long, + hidden = true, + group = "flags", + env = "CACHEPOT_START_COORDINATOR" + )] + internal_start_coordinator: Option, /// set output format of statistics #[structopt(long, default_value = "text", possible_values = StatsFormat::VARIANTS)] @@ -100,18 +105,18 @@ impl TryFrom for Command { type Error = anyhow::Error; fn try_from(cmd: Command2) -> Result { - if Some("1") == cmd.internal_start_server.as_deref() { - Ok(Command::InternalStartServer) + if Some("1") == cmd.internal_start_coordinator.as_deref() { + Ok(Command::InternalStartCoordinator) } else if cmd.show_stats { Ok(Command::ShowStats(cmd.stats_format)) } else if cmd.dist_status { Ok(Command::DistStatus) } else if cmd.zero_stats { Ok(Command::ZeroStats) - } else if cmd.start_server { - Ok(Command::StartServer) - } else if cmd.stop_server { - Ok(Command::StopServer) + } else if cmd.start_coordinator { + Ok(Command::StartCoordinator) + } else if cmd.stop_coordinator { + Ok(Command::StopCoordinator) } else if cmd.dist_auth { Ok(Command::DistAuth) } else if cmd.clear_cache { @@ -158,11 +163,11 @@ pub enum Command { /// Show cache statistics and exit. ShowStats(StatsFormat), /// Run background server. - InternalStartServer, - /// Start background server as a subprocess. - StartServer, + InternalStartCoordinator, + /// Start background coordinator as a subprocess. + StartCoordinator, /// Stop background server. - StopServer, + StopCoordinator, /// Zero cache statistics and exit. ZeroStats, /// Show the status of the distributed client. diff --git a/src/commands.rs b/src/commands.rs index 64f2a91f..d90b5ea0 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::client::{connect_to_server, connect_with_retry, ServerConnection}; +use crate::client::{connect_to_coordinator, connect_with_retry, CoordinatorConnection}; use crate::cmdline::{Command, StatsFormat}; use crate::compiler::ColorMode; use crate::config::Config; +use crate::coordinator::{self, CoordinatorInfo, CoordinatorStartup, DistInfo}; use crate::jobserver::Client; use crate::mock_command::{CommandChild, CommandCreatorSync, ProcessCommandCreator, RunCommand}; use crate::protocol::{Compile, CompileFinished, CompileResponse, Request, Response}; -use crate::server::{self, DistInfo, ServerInfo, ServerStartup}; use crate::util::daemonize; use crate::util::fs::{File, OpenOptions}; use atty::Stream; @@ -39,30 +39,30 @@ use which::which_in; use crate::errors::*; -/// The default cachepot server port. +/// The default cachepot coordinator port. pub const DEFAULT_PORT: u16 = 4226; -/// The number of milliseconds to wait for server startup. -const SERVER_STARTUP_TIMEOUT_MS: u32 = 10000; +/// The number of milliseconds to wait for coordinator startup. +const COORDINATOR_STARTUP_TIMEOUT_MS: u32 = 10000; -/// Get the port on which the server should listen. +/// Get the port on which the coordinator should listen. fn get_port() -> u16 { - env::var("CACHEPOT_SERVER_PORT") + env::var("CACHEPOT_COORDINATOR_PORT") .ok() .and_then(|s| s.parse().ok()) .unwrap_or(DEFAULT_PORT) } -async fn read_server_startup_status( - mut server: R, -) -> Result { - // This is an async equivalent of ServerConnection::read_one_response +async fn read_coordinator_startup_status( + mut coordinator: R, +) -> Result { + // This is an async equivalent of CoordinatorConnection::read_one_response let mut bytes = [0u8; 4]; - server.read_exact(&mut bytes[..]).await?; + coordinator.read_exact(&mut bytes[..]).await?; let len = BigEndian::read_u32(&bytes); let mut data = vec![0; len as usize]; - server.read_exact(data.as_mut_slice()).await?; + coordinator.read_exact(data.as_mut_slice()).await?; Ok(bincode::deserialize(&data)?) } @@ -70,10 +70,10 @@ async fn read_server_startup_status( /// Re-execute the current executable as a background server, and wait /// for it to start up. #[cfg(not(windows))] -fn run_server_process() -> Result { +fn run_coordinator_process() -> Result { use std::time::Duration; - trace!("run_server_process"); + trace!("run_coordinator_process"); let tempdir = tempfile::Builder::new().prefix("cachepot").tempdir()?; let socket_path = tempdir.path().join("sock"); let runtime = Runtime::new()?; @@ -81,7 +81,7 @@ fn run_server_process() -> Result { let workdir = exe_path.parent().expect("executable path has no parent?!"); let _child = process::Command::new(&exe_path) .current_dir(workdir) - .env("CACHEPOT_START_SERVER", "1") + .env("CACHEPOT_START_COORDINATOR", "1") .env("CACHEPOT_STARTUP_NOTIFY", &socket_path) .env("RUST_BACKTRACE", "1") .spawn()?; @@ -90,14 +90,14 @@ fn run_server_process() -> Result { let listener = tokio::net::UnixListener::bind(&socket_path)?; let (socket, _) = listener.accept().await?; - read_server_startup_status(socket).await + read_coordinator_startup_status(socket).await }; - let timeout = Duration::from_millis(SERVER_STARTUP_TIMEOUT_MS.into()); + let timeout = Duration::from_millis(COORDINATOR_STARTUP_TIMEOUT_MS.into()); runtime.block_on(async move { match tokio::time::timeout(timeout, startup).await { Ok(result) => result, - Err(_elapsed) => Ok(ServerStartup::TimedOut), + Err(_elapsed) => Ok(CoordinatorStartup::TimedOut), } }) } @@ -136,7 +136,7 @@ fn redirect_error_log() -> Result<()> { /// Re-execute the current executable as a background server. #[cfg(windows)] -fn run_server_process() -> Result { +fn run_coordinator_process() -> Result { use futures::StreamExt; use std::mem; use std::os::windows::ffi::OsStrExt; @@ -150,13 +150,13 @@ fn run_server_process() -> Result { CREATE_NEW_PROCESS_GROUP, CREATE_NO_WINDOW, CREATE_UNICODE_ENVIRONMENT, }; - trace!("run_server_process"); + trace!("run_coordinator_process"); // Create a mini event loop and register our named pipe server let runtime = Runtime::new()?; let pipe_name = format!(r"\\.\pipe\{}", Uuid::new_v4().to_simple_ref()); - // Spawn a server which should come back and connect to us + // Spawn a coordinator which should come back and connect to us let exe_path = env::current_exe()?; let mut exe = OsStr::new(&exe_path) .encode_wide() @@ -165,7 +165,10 @@ fn run_server_process() -> Result { let mut envp = { let mut v = vec![]; let extra_vars = vec![ - (OsString::from("CACHEPOT_START_SERVER"), OsString::from("1")), + ( + OsString::from("CACHEPOT_START_COORDINATOR"), + OsString::from("1"), + ), ( OsString::from("CACHEPOT_STARTUP_NOTIFY"), OsString::from(&pipe_name), @@ -230,22 +233,22 @@ fn run_server_process() -> Result { let socket = incoming.next().await; let socket = socket.unwrap(); // incoming() never returns None - read_server_startup_status(socket?).await + read_coordinator_startup_status(socket?).await }; - let timeout = Duration::from_millis(SERVER_STARTUP_TIMEOUT_MS.into()); + let timeout = Duration::from_millis(COORDINATOR_STARTUP_TIMEOUT_MS.into()); runtime.block_on(async move { match tokio::time::timeout(timeout, startup).await { Ok(result) => result, - Err(_elapsed) => Ok(ServerStartup::TimedOut), + Err(_elapsed) => Ok(CoordinatorStartup::TimedOut), } }) } -/// Attempt to connect to an cachepot server listening on `port`, or start one if no server is running. -fn connect_or_start_server(port: u16) -> Result { +/// Attempt to connect to an cachepot coordinator listening on `port`, or start one if no coordinator is running. +fn connect_or_start_server(port: u16) -> Result { trace!("connect_or_start_server({})", port); - match connect_to_server(port) { + match connect_to_coordinator(port) { Ok(server) => Ok(server), Err(ref e) if e.kind() == io::ErrorKind::ConnectionRefused @@ -253,8 +256,8 @@ fn connect_or_start_server(port: u16) -> Result { { // If the connection was refused we probably need to start // the server. - match run_server_process()? { - ServerStartup::Ok { port: actualport } => { + match run_coordinator_process()? { + CoordinatorStartup::Ok { port: actualport } => { if port != actualport { // bail as the next connect_with_retry will fail bail!( @@ -264,34 +267,36 @@ fn connect_or_start_server(port: u16) -> Result { ); } } - ServerStartup::AddrInUse => { - debug!("AddrInUse: possible parallel server bootstraps, retrying..") + CoordinatorStartup::AddrInUse => { + debug!("AddrInUse: possible parallel coordinator bootstraps, retrying..") + } + CoordinatorStartup::TimedOut => bail!("Timed out waiting for coordinator startup"), + CoordinatorStartup::Err { reason } => { + bail!("Coordinator startup failed: {}", reason) } - ServerStartup::TimedOut => bail!("Timed out waiting for server startup"), - ServerStartup::Err { reason } => bail!("Server startup failed: {}", reason), } - let server = connect_with_retry(port)?; - Ok(server) + let coordinator = connect_with_retry(port)?; + Ok(coordinator) } Err(e) => Err(e.into()), } } -/// Send a `ZeroStats` request to the server, and return the `ServerInfo` request if successful. -pub fn request_zero_stats(mut conn: ServerConnection) -> Result { +/// Send a `ZeroStats` request to the server, and return the `CoordinatorInfo` request if successful. +pub fn request_zero_stats(mut conn: CoordinatorConnection) -> Result { debug!("request_stats"); - let response = conn - .request(Request::ZeroStats) - .context("failed to send zero statistics command to server or failed to receive respone")?; + let response = conn.request(Request::ZeroStats).context( + "failed to send zero statistics command to coordinator or failed to receive respone", + )?; if let Response::Stats(stats) = response { Ok(*stats) } else { - bail!("Unexpected server response!") + bail!("Unexpected coordinator response!") } } -/// Send a `GetStats` request to the server, and return the `ServerInfo` request if successful. -pub fn request_stats(mut conn: ServerConnection) -> Result { +/// Send a `GetStats` request to the server, and return the `CoordinatorInfo` request if successful. +pub fn request_stats(mut conn: CoordinatorConnection) -> Result { debug!("request_stats"); let response = conn .request(Request::GetStats) @@ -299,12 +304,12 @@ pub fn request_stats(mut conn: ServerConnection) -> Result { if let Response::Stats(stats) = response { Ok(*stats) } else { - bail!("Unexpected server response!") + bail!("Unexpected coordinator response!") } } /// Send a `DistStatus` request to the server, and return `DistStatus` if successful. -pub fn request_dist_status(mut conn: ServerConnection) -> Result { +pub fn request_dist_status(mut conn: CoordinatorConnection) -> Result { debug!("request_dist_status"); let response = conn .request(Request::DistStatus) @@ -312,12 +317,12 @@ pub fn request_dist_status(mut conn: ServerConnection) -> Result { if let Response::DistStatus(info) = response { Ok(info) } else { - bail!("Unexpected server response!") + bail!("Unexpected coordinator response!") } } -/// Send a `Shutdown` request to the server, and return the `ServerInfo` contained within the response if successful. -pub fn request_shutdown(mut conn: ServerConnection) -> Result { +/// Send a `Shutdown` request to the server, and return the `CoordinatorInfo` contained within the response if successful. +pub fn request_shutdown(mut conn: CoordinatorConnection) -> Result { debug!("request_shutdown"); //TODO: better error mapping let response = conn @@ -326,13 +331,13 @@ pub fn request_shutdown(mut conn: ServerConnection) -> Result { if let Response::ShuttingDown(stats) = response { Ok(*stats) } else { - bail!("Unexpected server response!") + bail!("Unexpected coordinator response!") } } -/// Send a `Compile` request to the server, and return the server response if successful. +/// Send a `Compile` request to the server, and return the coordinator response if successful. fn request_compile( - conn: &mut ServerConnection, + conn: &mut CoordinatorConnection, exe: W, args: &[X], cwd: Y, @@ -361,7 +366,7 @@ where } } -pub fn request_clear_cache(mut conn: ServerConnection) -> Result<()> { +pub fn request_clear_cache(mut conn: CoordinatorConnection) -> Result<()> { debug!("clear_cache"); conn.request(Request::ClearCache) .context("Failed to send data to or receive data from server")?; @@ -413,7 +418,7 @@ fn handle_compile_finished( } Ok(()) } - // It might be nice if the server sent stdout/stderr as the process + // It might be nice if the coordinator sent stdout/stderr as the process // ran, but then it would have to also save them in the cache as // interleaved streams to really make it work. write_output( @@ -443,16 +448,16 @@ fn handle_compile_finished( /// Handle `response`, the response from sending a `Compile` request to the server. Return the compiler exit status. /// -/// If the server returned `CompileStarted`, wait for a `CompileFinished` and +/// If the coordinator returned `CompileStarted`, wait for a `CompileFinished` and /// print the results. /// -/// If the server returned `UnhandledCompile`, run the compilation command +/// If the coordinator returned `UnhandledCompile`, run the compilation command /// locally using `creator` and return the result. #[allow(clippy::too_many_arguments)] fn handle_compile_response( mut creator: T, runtime: &mut Runtime, - conn: &mut ServerConnection, + conn: &mut CoordinatorConnection, response: CompileResponse, exe: &Path, cmdline: Vec, @@ -465,7 +470,7 @@ where { match response { CompileResponse::CompileStarted => { - debug!("Server sent CompileStarted"); + debug!("Coordinator sent CompileStarted"); // Wait for CompileFinished. match conn.read_one_response() { Ok(Response::CompileFinished(result)) => { @@ -476,7 +481,7 @@ where match e.downcast_ref::() { Some(io_e) if io_e.kind() == io::ErrorKind::UnexpectedEof => { eprintln!( - "cachepot: warning: The server looks like it shut down \ + "cachepot: warning: The coordinator looks like it shut down \ unexpectedly, compiling locally instead" ); } @@ -489,11 +494,11 @@ where } } CompileResponse::UnsupportedCompiler(s) => { - debug!("Server sent UnsupportedCompiler: {:?}", s); + debug!("Coordinator sent UnsupportedCompiler: {:?}", s); bail!("Compiler not supported: {:?}", s); } CompileResponse::UnhandledCompile => { - debug!("Server sent UnhandledCompile"); + debug!("Coordinator sent UnhandledCompile"); } }; @@ -520,7 +525,7 @@ where })) } -/// Send a `Compile` request to the cachepot server `conn`, and handle the response. +/// Send a `Compile` request to the cachepot coordinator `conn`, and handle the response. /// /// The first entry in `cmdline` will be looked up in `path` if it is not /// an absolute path. @@ -529,7 +534,7 @@ where pub fn do_compile( creator: T, runtime: &mut Runtime, - mut conn: ServerConnection, + mut conn: CoordinatorConnection, exe: &Path, cmdline: Vec, cwd: &Path, @@ -565,33 +570,39 @@ pub fn run_command(cmd: Command) -> Result { StatsFormat::json => serde_json::to_writer(&mut io::stdout(), &stats)?, } } - Command::InternalStartServer => { - trace!("Command::InternalStartServer"); + Command::InternalStartCoordinator => { + trace!("Command::InternalStartCoordinator"); // Can't report failure here, we're already daemonized. daemonize()?; redirect_error_log()?; - server::start_server(config, get_port())?; + coordinator::start_coordinator(config, get_port())?; } - Command::StartServer => { - trace!("Command::StartServer"); + Command::StartCoordinator => { + trace!("Command::StartCoordinator"); println!("cachepot: Starting the server..."); - let startup = run_server_process().context("failed to start server process")?; + let startup = + run_coordinator_process().context("failed to start coordinator process")?; match startup { - ServerStartup::Ok { port } => { + CoordinatorStartup::Ok { port } => { if port != DEFAULT_PORT { println!("cachepot: Listening on port {}", port); } } - ServerStartup::TimedOut => bail!("Timed out waiting for server startup"), - ServerStartup::AddrInUse => bail!("Server startup failed: Address in use"), - ServerStartup::Err { reason } => bail!("Server startup failed: {}", reason), + CoordinatorStartup::TimedOut => bail!("Timed out waiting for coordinator startup"), + CoordinatorStartup::AddrInUse => { + bail!("Coordinator startup failed: Address in use") + } + CoordinatorStartup::Err { reason } => { + bail!("Coordinator startup failed: {}", reason) + } } } - Command::StopServer => { - trace!("Command::StopServer"); + Command::StopCoordinator => { + trace!("Command::StopCoordinator"); println!("Stopping cachepot server..."); - let server = connect_to_server(get_port()).context("couldn't connect to server")?; - let stats = request_shutdown(server)?; + let coordinator = + connect_to_coordinator(get_port()).context("couldn't connect to server")?; + let stats = request_shutdown(coordinator)?; stats.print(); } Command::ZeroStats => { diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 433b6737..5c640fe9 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -515,9 +515,9 @@ where } }?; let job_id = job_alloc.job_id; - let server_id = job_alloc.server_id.clone(); + let worker_url = job_alloc.worker_url.clone(); debug!("[{}]: Running job", out_pretty); - let ((job_id, server_id), (jres, path_transformer)) = dist_client + let ((job_id, worker_url), (jres, path_transformer)) = dist_client .do_run_job( job_alloc, dist_compile_cmd, @@ -525,11 +525,11 @@ where inputs_packager, ) .await - .map(|res| ((job_id, server_id.clone()), res)) + .map(|res| ((job_id, worker_url.clone()), res)) .with_context(|| { format!( "could not run distributed compilation job on {:?}", - server_id.clone() + worker_url.clone() ) })?; @@ -588,7 +588,7 @@ where try_or_cleanup!(outputs_rewriter .handle_outputs(&path_transformer, &output_paths, &extra_inputs) .with_context(|| "failed to rewrite outputs from compile")); - Ok((DistType::Ok(server_id), jc.output.into())) + Ok((DistType::Ok(worker_url), jc.output.into())) }; use futures::TryFutureExt; @@ -722,7 +722,7 @@ pub enum DistType { /// Distribution was not enabled. NoDist, /// Distributed compile success. - Ok(crate::config::ServerUrl), + Ok(crate::config::WorkerUrl), /// Distributed compile failed. Error, } @@ -1950,7 +1950,7 @@ LLVM version: 6.0", #[cfg(test)] #[cfg(feature = "dist-client")] mod test_dist { - use crate::config::ServerUrl; + use crate::config::WorkerUrl; use crate::dist::pkg; use crate::dist::{ self, AllocJobResult, CompileCommand, JobAlloc, JobComplete, JobId, OutputData, @@ -2091,7 +2091,7 @@ mod test_dist { job_alloc: JobAlloc { auth: "abcd".to_owned(), job_id: JobId(0), - server_id: ServerUrl::from_str("0.0.0.0:1").unwrap(), + worker_url: WorkerUrl::from_str("0.0.0.0:1").unwrap(), }, need_toolchain: true, }) @@ -2160,7 +2160,7 @@ mod test_dist { job_alloc: JobAlloc { auth: "abcd".to_owned(), job_id: JobId(0), - server_id: ServerUrl::from_str("0.0.0.0:1").unwrap(), + worker_url: WorkerUrl::from_str("0.0.0.0:1").unwrap(), }, need_toolchain: true, }) @@ -2241,7 +2241,7 @@ mod test_dist { job_alloc: JobAlloc { auth: "abcd".to_owned(), job_id: JobId(0), - server_id: ServerUrl::from_str("0.0.0.0:1").unwrap(), + worker_url: WorkerUrl::from_str("0.0.0.0:1").unwrap(), }, need_toolchain: true, }) diff --git a/src/config.rs b/src/config.rs index 94fcd7cd..9e3ac0dd 100644 --- a/src/config.rs +++ b/src/config.rs @@ -16,7 +16,7 @@ use crate::util::fs::{self, File}; use directories::ProjectDirs; use regex::Regex; use serde::de::{Deserialize, DeserializeOwned, Deserializer}; -#[cfg(any(feature = "dist-client", feature = "dist-server"))] +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] use serde::ser::{Serialize, Serializer}; use std::collections::HashMap; use std::env; @@ -89,10 +89,10 @@ pub fn parse_size(val: &str) -> Option { }) } -#[cfg(any(feature = "dist-client", feature = "dist-server"))] +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct HTTPUrl(reqwest::Url); -#[cfg(any(feature = "dist-client", feature = "dist-server"))] +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] impl Serialize for HTTPUrl { fn serialize(&self, serializer: S) -> StdResult where @@ -101,7 +101,7 @@ impl Serialize for HTTPUrl { serializer.serialize_str(self.0.as_str()) } } -#[cfg(any(feature = "dist-client", feature = "dist-server"))] +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] impl<'a> Deserialize<'a> for HTTPUrl { fn deserialize(deserializer: D) -> StdResult where @@ -113,7 +113,7 @@ impl<'a> Deserialize<'a> for HTTPUrl { Ok(HTTPUrl(url)) } } -#[cfg(any(feature = "dist-client", feature = "dist-server"))] +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] fn parse_http_url(url: &str) -> Result { let url = reqwest::Url::parse(url)?; @@ -126,7 +126,7 @@ fn parse_http_url(url: &str) -> Result { } Ok(url) } -#[cfg(any(feature = "dist-client", feature = "dist-server"))] +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] impl HTTPUrl { pub fn from_url(u: reqwest::Url) -> Self { HTTPUrl(u) @@ -143,7 +143,7 @@ impl HTTPUrl { .expect("HTTPUrl always has a valid host; qed") } } -#[cfg(any(feature = "dist-client", feature = "dist-server"))] +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] impl FromStr for HTTPUrl { type Err = anyhow::Error; @@ -156,7 +156,7 @@ impl FromStr for HTTPUrl { Ok(HTTPUrl(url)) } } -#[cfg(any(feature = "dist-client", feature = "dist-server"))] +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] impl fmt::Display for HTTPUrl { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.0) @@ -169,11 +169,11 @@ impl fmt::Display for HTTPUrl { // of such format is that it can be used in un-urlencoded form as params: // // `https://localhost:10500/api/v1/scheduler/server_certificate/localhost:10603/` -#[cfg(any(feature = "dist-client", feature = "dist-server"))] +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] #[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct ServerUrl(pub HTTPUrl); -#[cfg(any(feature = "dist-client", feature = "dist-server"))] -impl Serialize for ServerUrl { +pub struct WorkerUrl(pub HTTPUrl); +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] +impl Serialize for WorkerUrl { fn serialize(&self, serializer: S) -> StdResult where S: Serializer, @@ -187,8 +187,8 @@ impl Serialize for ServerUrl { serializer.serialize_str(&helper) } } -#[cfg(any(feature = "dist-client", feature = "dist-server"))] -impl<'a> Deserialize<'a> for ServerUrl { +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] +impl<'a> Deserialize<'a> for WorkerUrl { fn deserialize(deserializer: D) -> StdResult where D: Deserializer<'a>, @@ -197,21 +197,21 @@ impl<'a> Deserialize<'a> for ServerUrl { let helper: String = Deserialize::deserialize(deserializer)?; let helper = format!("https://{}", helper); let url = parse_http_url(&helper).map_err(D::Error::custom)?; - Ok(ServerUrl(HTTPUrl(url))) + Ok(WorkerUrl(HTTPUrl(url))) } } -#[cfg(any(feature = "dist-client", feature = "dist-server"))] -impl FromStr for ServerUrl { +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] +impl FromStr for WorkerUrl { type Err = anyhow::Error; fn from_str(s: &str) -> ::std::result::Result { let helper = format!("https://{}", s); - Ok(ServerUrl(HTTPUrl::from_str(&helper)?)) + Ok(WorkerUrl(HTTPUrl::from_str(&helper)?)) } } -#[cfg(any(feature = "dist-client", feature = "dist-server"))] -impl fmt::Display for ServerUrl { +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] +impl fmt::Display for WorkerUrl { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, @@ -465,9 +465,9 @@ impl Default for DistAuth { #[serde(deny_unknown_fields)] pub struct DistConfig { pub auth: DistAuth, - #[cfg(any(feature = "dist-client", feature = "dist-server"))] + #[cfg(any(feature = "dist-client", feature = "dist-worker"))] pub scheduler_url: Option, - #[cfg(not(any(feature = "dist-client", feature = "dist-server")))] + #[cfg(not(any(feature = "dist-client", feature = "dist-worker")))] pub scheduler_url: Option, pub cache_dir: PathBuf, pub toolchains: Vec, @@ -779,7 +779,7 @@ impl CachedConfig { } } -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] pub mod scheduler { use super::HTTPUrl; use std::path::Path; @@ -812,7 +812,7 @@ pub mod scheduler { #[derive(Debug, Serialize, Deserialize)] #[serde(tag = "type")] #[serde(deny_unknown_fields)] - pub enum ServerAuth { + pub enum WorkerAuth { #[serde(rename = "DANGEROUSLY_INSECURE")] Insecure, #[serde(rename = "jwt_hs256")] @@ -826,7 +826,7 @@ pub mod scheduler { pub struct Config { pub public_addr: HTTPUrl, pub client_auth: ClientAuth, - pub server_auth: ServerAuth, + pub worker_auth: WorkerAuth, } pub fn from_path(conf_path: &Path) -> Result> { @@ -834,9 +834,9 @@ pub mod scheduler { } } -#[cfg(feature = "dist-server")] -pub mod server { - use super::{HTTPUrl, ServerUrl}; +#[cfg(feature = "dist-worker")] +pub mod worker { + use super::{HTTPUrl, WorkerUrl}; use std::path::{Path, PathBuf}; use crate::errors::*; @@ -876,7 +876,7 @@ pub mod server { pub struct Config { pub builder: BuilderType, pub cache_dir: PathBuf, - pub public_addr: ServerUrl, + pub public_addr: WorkerUrl, pub scheduler_url: HTTPUrl, pub scheduler_auth: SchedulerAuth, #[serde(default = "default_toolchain_cache_size")] @@ -884,7 +884,7 @@ pub mod server { } pub fn from_path(conf_path: &Path) -> Result> { - super::try_read_config_file(conf_path).context("Failed to load server config file") + super::try_read_config_file(conf_path).context("Failed to load coordinator config file") } } @@ -1057,13 +1057,13 @@ public = false auth: DistAuth::Token { token: "secrettoken".to_owned() }, - #[cfg(any(feature = "dist-client", feature = "dist-server"))] + #[cfg(any(feature = "dist-client", feature = "dist-worker"))] scheduler_url: Some( parse_http_url("http://1.2.3.4:10600") .map(|url| { HTTPUrl::from_url(url) }) .expect("Scheduler url must be valid url str") ), - #[cfg(not(any(feature = "dist-client", feature = "dist-server")))] + #[cfg(not(any(feature = "dist-client", feature = "dist-worker")))] scheduler_url: Some("http://1.2.3.4:10600".to_owned()), cache_dir: PathBuf::from("/home/user/.cache/cachepot-dist-client"), toolchains: vec![], diff --git a/src/server.rs b/src/coordinator.rs similarity index 94% rename from src/server.rs rename to src/coordinator.rs index 27a3e24c..b134822a 100644 --- a/src/server.rs +++ b/src/coordinator.rs @@ -66,7 +66,7 @@ use tower::Service; use crate::errors::*; -/// If the server is idle for this many seconds, shut down. +/// If the coordinator is idle for this many seconds, shut down. const DEFAULT_IDLE_TIMEOUT: u64 = 600; /// If the dist client couldn't be created, retry creation at this number @@ -74,20 +74,20 @@ const DEFAULT_IDLE_TIMEOUT: u64 = 600; #[cfg(feature = "dist-client")] const DIST_CLIENT_RECREATE_TIMEOUT: Duration = Duration::from_secs(30); -/// Result of background server startup. +/// Result of background coordinator startup. #[derive(Debug, Serialize, Deserialize)] -pub enum ServerStartup { - /// Server started successfully on `port`. +pub enum CoordinatorStartup { + /// Coordinator started successfully on `port`. Ok { port: u16 }, - /// Server Addr already in suse + /// Coordinator Addr already in suse AddrInUse, - /// Timed out waiting for server startup. + /// Timed out waiting for coordinator startup. TimedOut, - /// Server encountered an error. + /// Coordinator encountered an error. Err { reason: String }, } -/// Get the time the server should idle for before shutting down. +/// Get the time the coordinator should idle for before shutting down. fn get_idle_timeout() -> u64 { // A value of 0 disables idle shutdown entirely. env::var("CACHEPOT_IDLE_TIMEOUT") @@ -96,12 +96,12 @@ fn get_idle_timeout() -> u64 { .unwrap_or(DEFAULT_IDLE_TIMEOUT) } -fn notify_server_startup_internal(mut w: W, status: ServerStartup) -> Result<()> { +fn notify_server_startup_internal(mut w: W, status: CoordinatorStartup) -> Result<()> { util::write_length_prefixed_bincode(&mut w, status) } #[cfg(unix)] -fn notify_server_startup(name: &Option, status: ServerStartup) -> Result<()> { +fn notify_server_startup(name: &Option, status: CoordinatorStartup) -> Result<()> { use std::os::unix::net::UnixStream; let name = match *name { Some(ref s) => s, @@ -113,7 +113,7 @@ fn notify_server_startup(name: &Option, status: ServerStartup) -> Resu } #[cfg(windows)] -fn notify_server_startup(name: &Option, status: ServerStartup) -> Result<()> { +fn notify_server_startup(name: &Option, status: CoordinatorStartup) -> Result<()> { use crate::util::fs::OpenOptions; let name = match *name { @@ -142,7 +142,7 @@ pub struct DistClientContainer { #[cfg(feature = "dist-client")] struct DistClientConfig { - // Reusable items tied to an CachepotServer instance + // Reusable items tied to an CachepotCoordinator instance pool: tokio::runtime::Handle, // From the static dist configuration @@ -399,7 +399,7 @@ impl DistClientContainer { /// /// Spins an event loop handling client connections until a client /// requests a shutdown. -pub fn start_server(config: &Config, port: u16) -> Result<()> { +pub fn start_coordinator(config: &Config, port: u16) -> Result<()> { info!("start_server: port: {}", port); let client = unsafe { Client::new() }; let runtime = tokio::runtime::Builder::new_multi_thread() @@ -409,14 +409,19 @@ pub fn start_server(config: &Config, port: u16) -> Result<()> { let pool = runtime.handle().clone(); let dist_client = DistClientContainer::new(config, &pool); let storage = storage_from_config(config, &pool); - let res = - CachepotServer::::new(port, runtime, client, dist_client, storage); + let res = CachepotCoordinator::::new( + port, + runtime, + client, + dist_client, + storage, + ); let notify = env::var_os("CACHEPOT_STARTUP_NOTIFY"); match res { Ok(srv) => { let port = srv.port(); info!("server started, listening on port {}", port); - notify_server_startup(¬ify, ServerStartup::Ok { port })?; + notify_server_startup(¬ify, CoordinatorStartup::Ok { port })?; srv.run(future::pending::<()>())?; Ok(()) } @@ -424,11 +429,11 @@ pub fn start_server(config: &Config, port: u16) -> Result<()> { error!("failed to start server: {}", e); match e.downcast_ref::() { Some(io_err) if io::ErrorKind::AddrInUse == io_err.kind() => { - notify_server_startup(¬ify, ServerStartup::AddrInUse)?; + notify_server_startup(¬ify, CoordinatorStartup::AddrInUse)?; } _ => { let reason = e.to_string(); - notify_server_startup(¬ify, ServerStartup::Err { reason })?; + notify_server_startup(¬ify, CoordinatorStartup::Err { reason })?; } }; Err(e) @@ -436,23 +441,23 @@ pub fn start_server(config: &Config, port: u16) -> Result<()> { } } -pub struct CachepotServer { +pub struct CachepotCoordinator { runtime: Runtime, listener: TcpListener, - rx: mpsc::Receiver, + rx: mpsc::Receiver, timeout: Duration, service: CachepotService, wait: WaitUntilZero, } -impl CachepotServer { +impl CachepotCoordinator { pub fn new( port: u16, runtime: Runtime, client: Client, dist_client: DistClientContainer, storage: Arc, - ) -> Result> { + ) -> Result> { let addr = SocketAddrV4::new(Ipv4Addr::new(127, 0, 0, 1), port); let listener = runtime.block_on(TcpListener::bind(&SocketAddr::V4(addr)))?; @@ -463,7 +468,7 @@ impl CachepotServer { let pool = runtime.handle().clone(); let service = CachepotService::new(dist_client, storage, &client, pool, tx, info); - Ok(CachepotServer { + Ok(CachepotCoordinator { runtime, listener, rx, @@ -473,13 +478,13 @@ impl CachepotServer { }) } - /// Configures how long this server will be idle before shutting down. + /// Configures how long this coordinator will be idle before shutting down. #[allow(dead_code)] pub fn set_idle_timeout(&mut self, timeout: Duration) { self.timeout = timeout; } - /// Set the storage this server will use. + /// Set the storage this coordinator will use. #[allow(dead_code)] pub fn set_storage(&mut self, storage: Arc) { self.service.storage = storage; @@ -491,29 +496,29 @@ impl CachepotServer { &self.service.rt } - /// Returns a reference to the command creator this server will use + /// Returns a reference to the command creator this coordinator will use #[allow(dead_code)] pub fn command_creator(&self) -> &C { &self.service.creator } - /// Returns the port that this server is bound to + /// Returns the port that this coordinator is bound to #[allow(dead_code)] pub fn port(&self) -> u16 { self.listener.local_addr().unwrap().port() } - /// Runs this server to completion. + /// Runs this coordinator to completion. /// - /// If the `shutdown` future resolves then the server will be shut down, - /// otherwise the server may naturally shut down if it becomes idle for too + /// If the `shutdown` future resolves then the coordinator will be shut down, + /// otherwise the coordinator may naturally shut down if it becomes idle for too /// long anyway. pub fn run(self, shutdown: F) -> io::Result<()> where F: Future, C: Send, { - let CachepotServer { + let CachepotCoordinator { runtime, listener, rx, @@ -524,7 +529,7 @@ impl CachepotServer { // Create our "server future" which will simply handle all incoming // connections in separate tasks. - let server = async move { + let coordinator = async move { loop { let (socket, _) = listener.accept().await?; trace!("incoming connection"); @@ -538,11 +543,11 @@ impl CachepotServer { } }; - // Right now there's a whole bunch of ways to shut down this server for + // Right now there's a whole bunch of ways to shut down this coordinator for // various purposes. These include: // // 1. The `shutdown` future above. - // 2. An RPC indicating the server should shut down + // 2. An RPC indicating the coordinator should shut down // 3. A period of inactivity (no requests serviced) // // These are all encapsulated wih the future that we're creating below. @@ -570,7 +575,7 @@ impl CachepotServer { runtime.block_on(async { futures::select! { - server = server.fuse() => server, + coordinator = coordinator.fuse() => coordinator, _res = shutdown.fuse() => Ok(()), _res = shutdown_idle.fuse() => Ok::<_, io::Error>(()), } @@ -583,7 +588,7 @@ impl CachepotServer { SHUTDOWN_TIMEOUT.as_secs() ); - // Once our server has shut down either due to inactivity or a manual + // Once our coordinator has shut down either due to inactivity or a manual // request we still need to give a bit of time for all active // connections to finish. This `wait` future will resolve once all // instances of `CachepotService` have been dropped. @@ -633,8 +638,8 @@ struct CachepotService where C: Send, { - /// Server statistics. - stats: Arc>, + /// Coordinator statistics. + stats: Arc>, /// Distributed cachepot client dist_client: Arc, @@ -664,10 +669,10 @@ where /// Message channel used to learn about requests received by this server. /// - /// Note that messages sent along this channel will keep the server alive + /// Note that messages sent along this channel will keep the coordinator alive /// (reset the idle timer) and this channel can also be used to shut down - /// the entire server immediately via a message. - tx: mpsc::Sender, + /// the entire coordinator immediately via a message. + tx: mpsc::Sender, /// Information tracking how many services (connected clients) are active. _info: ActiveInfo, @@ -680,8 +685,8 @@ type CachepotResponse = Message>; /// /// Whenever a request is receive a `Request` message is sent which will reset /// the idle shutdown timer, and otherwise a `Shutdown` message indicates that -/// a server shutdown was requested via an RPC. -pub enum ServerMessage { +/// a coordinator shutdown was requested via an RPC. +pub enum CoordinatorMessage { /// A message sent whenever a request is received. Request, /// Message sent whenever a shutdown request is received. @@ -702,7 +707,7 @@ where // Opportunistically let channel know that we've received a request. We // ignore failures here as well as backpressure as it's not imperative // that every message is received. - drop(self.tx.clone().start_send(ServerMessage::Request)); + drop(self.tx.clone().start_send(CoordinatorMessage::Request)); let me = self.clone(); Box::pin(async move { @@ -739,7 +744,7 @@ where let mut tx = me.tx.clone(); future::try_join( async { - let _ = tx.send(ServerMessage::Shutdown).await; + let _ = tx.send(CoordinatorMessage::Shutdown).await; Ok(()) }, me.get_info(), @@ -776,11 +781,11 @@ where storage: Arc, client: &Client, rt: tokio::runtime::Handle, - tx: mpsc::Sender, + tx: mpsc::Sender, info: ActiveInfo, ) -> CachepotService { CachepotService { - stats: Arc::new(RwLock::new(ServerStats::default())), + stats: Arc::new(RwLock::new(CoordinatorStats::default())), dist_client: Arc::new(dist_client), storage, compilers: Arc::new(RwLock::new(HashMap::new())), @@ -841,11 +846,11 @@ where } /// Get info and stats about the cache. - async fn get_info(&self) -> Result { + async fn get_info(&self) -> Result { let stats = self.stats.read().await.clone(); let cache_location = self.storage.location(); futures::try_join!(self.storage.current_size(), self.storage.max_size()).map( - move |(cache_size, max_cache_size)| ServerInfo { + move |(cache_size, max_cache_size)| CoordinatorInfo { stats, cache_location, cache_size, @@ -856,7 +861,7 @@ where /// Zero stats about the cache. async fn zero_stats(&self) { - *self.stats.write().await = ServerStats::default(); + *self.stats.write().await = CoordinatorStats::default(); } async fn clear_cache(&self) -> Result<()> { @@ -1149,9 +1154,10 @@ where match dist_type { DistType::NoDist => {} DistType::Ok(id) => { - let server_count = - stats.dist_compiles.entry(id.to_string()).or_insert(0); - *server_count += 1; + let coordinator = id.to_string(); + let coordinator_count = + stats.dist_compiles.entry(coordinator).or_insert(0); + *coordinator_count += 1; } DistType::Error => stats.dist_errors += 1, } @@ -1302,7 +1308,7 @@ impl PerLanguageCount { /// Statistics about the server. #[derive(Serialize, Deserialize, Clone, Debug)] -pub struct ServerStats { +pub struct CoordinatorStats { /// The count of client compile requests. pub compile_requests: u64, /// The count of client requests that used an unsupported compiler. @@ -1342,16 +1348,16 @@ pub struct ServerStats { /// Counts of reasons why compiles were not cached. pub not_cached: HashMap, /// The count of compilations that were successfully distributed indexed - /// by the server that ran those compilations. + /// by the coordinator that ran those compilations. pub dist_compiles: HashMap, /// The count of compilations that were distributed but failed and had to be re-run locally pub dist_errors: u64, } -/// Info and stats about the server. +/// Info and stats about the coordinator. #[derive(Serialize, Deserialize, Clone, Debug)] -pub struct ServerInfo { - pub stats: ServerStats, +pub struct CoordinatorInfo { + pub stats: CoordinatorStats, pub cache_location: String, pub cache_size: Option, pub max_cache_size: Option, @@ -1367,9 +1373,9 @@ pub enum DistInfo { SchedulerStatus(Option, dist::SchedulerStatusResult), } -impl Default for ServerStats { - fn default() -> ServerStats { - ServerStats { +impl Default for CoordinatorStats { + fn default() -> CoordinatorStats { + CoordinatorStats { compile_requests: u64::default(), requests_unsupported_compiler: u64::default(), requests_not_compile: u64::default(), @@ -1395,7 +1401,7 @@ impl Default for ServerStats { } } -impl ServerStats { +impl CoordinatorStats { /// Print stats to stdout in a human-readable format. /// /// Return the formatted width of each of the (name, value) columns. @@ -1541,7 +1547,7 @@ impl ServerStats { } } -impl ServerInfo { +impl CoordinatorInfo { /// Print info to stdout in a human-readable format. pub fn print(&self) { let (name_width, stat_width) = self.stats.print(); @@ -1707,7 +1713,7 @@ impl Sink> for Cach } struct ShutdownOrInactive { - rx: mpsc::Receiver, + rx: mpsc::Receiver, timeout: Option>>, timeout_dur: Duration, } @@ -1720,8 +1726,8 @@ impl Future for ShutdownOrInactive { match Pin::new(&mut self.rx).poll_next(cx) { Poll::Pending => break, // Shutdown received! - Poll::Ready(Some(ServerMessage::Shutdown)) => return Poll::Ready(()), - Poll::Ready(Some(ServerMessage::Request)) => { + Poll::Ready(Some(CoordinatorMessage::Shutdown)) => return Poll::Ready(()), + Poll::Ready(Some(CoordinatorMessage::Request)) => { if self.timeout_dur != Duration::new(0, 0) { self.timeout = Some(Box::pin(sleep(self.timeout_dur))); } diff --git a/src/dist/http.rs b/src/dist/http.rs index fd147c89..e26b82fd 100644 --- a/src/dist/http.rs +++ b/src/dist/http.rs @@ -13,21 +13,21 @@ // limitations under the License. #[cfg(feature = "dist-client")] pub use self::client::Client; -#[cfg(feature = "dist-server")] -pub use self::server::Server; -#[cfg(feature = "dist-server")] -pub use self::server::{ - ClientAuthCheck, ClientVisibleMsg, Scheduler, ServerAuthCheck, HEARTBEAT_TIMEOUT, +#[cfg(feature = "dist-worker")] +pub use self::worker::Worker; +#[cfg(feature = "dist-worker")] +pub use self::worker::{ + CoordinatorAuthCheck, CoordinatorVisibleMsg, Scheduler, WorkerAuthCheck, HEARTBEAT_TIMEOUT, }; mod common { - #[cfg(any(feature = "dist-client", feature = "dist-server"))] + #[cfg(any(feature = "dist-client", feature = "dist-worker"))] use hyperx::header; - #[cfg(feature = "dist-server")] + #[cfg(feature = "dist-worker")] use std::collections::HashMap; use std::fmt; - #[cfg(feature = "dist-server")] + #[cfg(feature = "dist-worker")] use crate::config; use crate::dist; @@ -56,7 +56,7 @@ mod common { } } - #[cfg(any(feature = "dist-client", feature = "dist-server"))] + #[cfg(any(feature = "dist-client", feature = "dist-worker"))] pub async fn bincode_req( req: reqwest::RequestBuilder, ) -> Result { @@ -99,17 +99,17 @@ mod common { }, } impl AllocJobHttpResponse { - #[cfg(feature = "dist-server")] + #[cfg(feature = "dist-worker")] pub fn from_alloc_job_result( res: dist::AllocJobResult, - certs: &HashMap, Vec)>, + certs: &HashMap, Vec)>, ) -> Self { match res { dist::AllocJobResult::Success { job_alloc, need_toolchain, } => { - if let Some((digest, _)) = certs.get(&job_alloc.server_id) { + if let Some((digest, _)) = certs.get(&job_alloc.worker_url) { AllocJobHttpResponse::Success { job_alloc, need_toolchain, @@ -117,7 +117,10 @@ mod common { } } else { AllocJobHttpResponse::Fail { - msg: format!("missing certificates for server {}", job_alloc.server_id), + msg: format!( + "missing certificates for server {}", + job_alloc.worker_url + ), } } } @@ -128,31 +131,31 @@ mod common { #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(deny_unknown_fields)] - pub struct ServerCertificateHttpResponse { + pub struct WorkerCertificateHttpResponse { pub cert_digest: Vec, pub cert_pem: Vec, } #[derive(Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] - pub struct HeartbeatServerHttpRequest { + pub struct HeartbeatWorkerHttpRequest { pub jwt_key: Vec, pub num_cpus: usize, - pub server_nonce: dist::ServerNonce, + pub worker_nonce: dist::WorkerNonce, pub cert_digest: Vec, pub cert_pem: Vec, } // cert_pem is quite long so elide it (you can retrieve it by hitting the server url anyway) - impl fmt::Debug for HeartbeatServerHttpRequest { + impl fmt::Debug for HeartbeatWorkerHttpRequest { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let HeartbeatServerHttpRequest { + let HeartbeatWorkerHttpRequest { jwt_key, num_cpus, - server_nonce, + worker_nonce, cert_digest, cert_pem, } = self; - write!(f, "HeartbeatServerHttpRequest {{ jwt_key: {:?}, num_cpus: {:?}, server_nonce: {:?}, cert_digest: {:?}, cert_pem: [...{} bytes...] }}", jwt_key, num_cpus, server_nonce, cert_digest, cert_pem.len()) + write!(f, "HeartbeatServerHttpRequest {{ jwt_key: {:?}, num_cpus: {:?}, worker_nonce: {:?}, cert_digest: {:?}, cert_pem: [...{} bytes...] }}", jwt_key, num_cpus, worker_nonce, cert_digest, cert_pem.len()) } } #[derive(Clone, Debug, Serialize, Deserialize)] @@ -164,7 +167,7 @@ mod common { } pub mod urls { - use crate::config::ServerUrl; + use crate::config::WorkerUrl; use crate::dist::JobId; pub fn scheduler_alloc_job(scheduler_url: &reqwest::Url) -> reqwest::Url { @@ -174,12 +177,12 @@ pub mod urls { } pub fn scheduler_server_certificate( scheduler_url: &reqwest::Url, - server_id: ServerUrl, + worker_url: WorkerUrl, ) -> reqwest::Url { scheduler_url .join(&format!( "/api/v1/scheduler/server_certificate/{}", - server_id + worker_url )) .expect("failed to create server certificate url") } @@ -199,29 +202,32 @@ pub mod urls { .expect("failed to create alloc job url") } - pub fn server_assign_job(server_id: ServerUrl, job_id: JobId) -> reqwest::Url { + pub fn server_assign_job(worker_url: WorkerUrl, job_id: JobId) -> reqwest::Url { let url = format!( - "https://{}/api/v1/distserver/assign_job/{}", - server_id, job_id + "https://{}/api/v1/distworker/assign_job/{}", + worker_url, job_id ); warn!("URL {}", url); reqwest::Url::parse(&url).expect("failed to create assign job url") } - pub fn server_submit_toolchain(server_id: ServerUrl, job_id: JobId) -> reqwest::Url { + pub fn server_submit_toolchain(worker_url: WorkerUrl, job_id: JobId) -> reqwest::Url { let url = format!( - "https://{}/api/v1/distserver/submit_toolchain/{}", - server_id, job_id + "https://{}/api/v1/distworker/submit_toolchain/{}", + worker_url, job_id ); reqwest::Url::parse(&url).expect("failed to create submit toolchain url") } - pub fn server_run_job(server_id: ServerUrl, job_id: JobId) -> reqwest::Url { - let url = format!("https://{}/api/v1/distserver/run_job/{}", server_id, job_id); + pub fn server_run_job(worker_url: WorkerUrl, job_id: JobId) -> reqwest::Url { + let url = format!( + "https://{}/api/v1/distworker/run_job/{}", + worker_url, job_id + ); reqwest::Url::parse(&url).expect("failed to create run job url") } } -#[cfg(feature = "dist-server")] -mod server { +#[cfg(feature = "dist-worker")] +mod worker { use crate::{config, jwt}; use rand::{rngs::OsRng, RngCore}; use std::collections::HashMap; @@ -231,13 +237,13 @@ mod server { use void::Void; use super::common::{ - bincode_req, AllocJobHttpResponse, HeartbeatServerHttpRequest, JobJwt, - ReqwestRequestBuilderExt, RunJobHttpRequest, ServerCertificateHttpResponse, + bincode_req, AllocJobHttpResponse, HeartbeatWorkerHttpRequest, JobJwt, + ReqwestRequestBuilderExt, RunJobHttpRequest, WorkerCertificateHttpResponse, }; use super::urls; use crate::dist::{ - self, AssignJobResult, HeartbeatServerResult, JobId, JobState, ServerNonce, Toolchain, - UpdateJobStateResult, + self, AssignJobResult, HeartbeatWorkerResult, JobId, JobState, Toolchain, + UpdateJobStateResult, WorkerNonce, WorkerUrl, }; use crate::errors::*; @@ -342,18 +348,18 @@ mod server { // Messages that are non-sensitive and can be sent to the client #[derive(Debug)] - pub struct ClientVisibleMsg(String); - impl ClientVisibleMsg { + pub struct CoordinatorVisibleMsg(String); + impl CoordinatorVisibleMsg { pub fn from_nonsensitive(s: String) -> Self { - ClientVisibleMsg(s) + CoordinatorVisibleMsg(s) } } #[async_trait] - pub trait ClientAuthCheck: Send + Sync { - async fn check(&self, token: &str) -> StdResult<(), ClientVisibleMsg>; + pub trait CoordinatorAuthCheck: Send + Sync { + async fn check(&self, token: &str) -> StdResult<(), CoordinatorVisibleMsg>; } - pub type ServerAuthCheck = Arc Option + Send + Sync>; + pub type WorkerAuthCheck = Arc Option + Send + Sync>; const JWT_KEY_LENGTH: usize = 256 / 8; lazy_static! { @@ -448,7 +454,7 @@ mod server { assert!(ja2.verify_token(job_id2, &token2).is_err()); } - mod distserver_api_v1 { + mod distworker_api_v1 { use thiserror::Error; pub use filters::api; @@ -482,8 +488,8 @@ mod server { use super::{handlers, Error}; use crate::dist::{ self, - http::server::{ClientVisibleMsg, ErrJson}, - JobAuthorizer, JobId, ServerIncoming, + http::worker::{CoordinatorVisibleMsg, ErrJson}, + JobAuthorizer, JobId, WorkerIncoming, }; fn bearer_http_auth(auth_header: &HeaderValue) -> Result { @@ -527,15 +533,15 @@ mod server { } fn with_requester( - requester: Arc, - ) -> impl Filter,), Error = Infallible> + Clone + requester: Arc, + ) -> impl Filter,), Error = Infallible> + Clone { warp::any().map(move || requester.clone()) } fn with_server_incoming_handler( - handler: Arc, - ) -> impl Filter,), Error = Infallible> + Clone + handler: Arc, + ) -> impl Filter,), Error = Infallible> + Clone { warp::any().map(move || handler.clone()) } @@ -559,16 +565,16 @@ mod server { } } - // POST /api/v1/distserver/assign_job/{job_id: JobId} + // POST /api/v1/distworker/assign_job/{job_id: JobId} fn assign_job( request_counter: Arc, job_authorizer: Arc, - handler: Arc, + handler: Arc, ) -> impl Filter + Clone { let with_request_id = warp::any().map(move || request_counter.fetch_add(1, atomic::Ordering::SeqCst)); - warp::path!("api" / "v1" / "distserver" / "assign_job" / JobId) + warp::path!("api" / "v1" / "distworker" / "assign_job" / JobId) .and(warp::post()) .and(with_job_authorizer(job_authorizer)) .and(warp::header::value(AUTHORIZATION.as_str())) @@ -581,14 +587,14 @@ mod server { .and_then(prepare_response) } - // POST /api/v1/distserver/submit_toolchain/{job_id: JobId} + // POST /api/v1/distworker/submit_toolchain/{job_id: JobId} fn submit_toolchain( _request_counter: Arc, job_authorizer: Arc, - handler: Arc, - requester: Arc, + handler: Arc, + requester: Arc, ) -> impl Filter + Clone { - warp::path!("api" / "v1" / "distserver" / "submit_toolchain" / JobId) + warp::path!("api" / "v1" / "distworker" / "submit_toolchain" / JobId) .and(warp::post()) .and(with_job_authorizer(job_authorizer)) .and(warp::header::value(AUTHORIZATION.as_str())) @@ -601,14 +607,14 @@ mod server { .and_then(prepare_response) } - // POST /api/v1/distserver/run_job/{job_id: JobId} + // POST /api/v1/distworker/run_job/{job_id: JobId} fn run_job( _request_counter: Arc, job_authorizer: Arc, - handler: Arc, - requester: Arc, + handler: Arc, + requester: Arc, ) -> impl Filter + Clone { - warp::path!("api" / "v1" / "distserver" / "run_job" / JobId) + warp::path!("api" / "v1" / "distworker" / "run_job" / JobId) .and(warp::post()) .and(with_job_authorizer(job_authorizer)) .and(warp::header::value(AUTHORIZATION.as_str())) @@ -623,8 +629,8 @@ mod server { pub fn api( job_authorizer: Arc, - server_incoming_handler: Arc, - requester: Arc, + server_incoming_handler: Arc, + requester: Arc, ) -> impl Filter + Clone { let request_count = Arc::new(atomic::AtomicUsize::new(0)); @@ -648,7 +654,10 @@ mod server { .recover(handle_rejection) } - fn make_401_with_body(short_err: &str, body: Option) -> Response { + fn make_401_with_body( + short_err: &str, + body: Option, + ) -> Response { let body = reply::with_status( body.map(|b| b.0).unwrap_or_default(), StatusCode::UNAUTHORIZED, @@ -688,7 +697,7 @@ mod server { Ok(make_401_with_body( "invalid_jwt", - Some(ClientVisibleMsg(json.into_data())), + Some(CoordinatorVisibleMsg(json.into_data())), ) .into_response()) } else { @@ -704,7 +713,7 @@ mod server { let json = ErrJson::from_err(&*err); Ok(make_401_with_body( "invalid_jwt", - Some(ClientVisibleMsg(json.into_data())), + Some(CoordinatorVisibleMsg(json.into_data())), ) .into_response()) } @@ -742,7 +751,7 @@ mod server { use crate::dist::{ AssignJobResult, InputsReader, RunJobResult, SubmitToolchainResult, ToolchainReader, }; - use crate::dist::{ServerIncoming, ServerOutgoing, Toolchain}; + use crate::dist::{Toolchain, WorkerIncoming, WorkerOutgoing}; use byteorder::{BigEndian, ReadBytesExt}; use flate2::read::ZlibDecoder as ZlibReadDecoder; use std::sync::Arc; @@ -751,7 +760,7 @@ mod server { pub async fn assign_job( job_id: JobId, toolchain: Toolchain, - handler: Arc, + handler: Arc, _req_id: usize, ) -> Result { let res = handler @@ -764,8 +773,8 @@ mod server { pub async fn submit_toolchain( job_id: JobId, - handler: Arc, - requester: Arc, + handler: Arc, + requester: Arc, body: bytes::Bytes, ) -> Result { let toolchain_rdr = ToolchainReader(Box::new(body.as_ref())); @@ -779,8 +788,8 @@ mod server { pub async fn run_job( job_id: JobId, - handler: Arc, - requester: Arc, + handler: Arc, + requester: Arc, body: bytes::Bytes, ) -> Result { use std::io::Read; @@ -856,7 +865,8 @@ mod server { pub(super) mod filters { use super::super::{ - ClientAuthCheck, ClientVisibleMsg, ErrJson, SchedulerRequester, ServerAuthCheck, + CoordinatorAuthCheck, CoordinatorVisibleMsg, ErrJson, SchedulerRequester, + WorkerAuthCheck, }; use super::{handlers, Error}; use crate::config; @@ -878,7 +888,7 @@ mod server { Filter, Rejection, Reply, }; - fn make_401_with_body(short_err: &str, body: ClientVisibleMsg) -> Response { + fn make_401_with_body(short_err: &str, body: CoordinatorVisibleMsg) -> Response { let body = reply::with_status(body.0, StatusCode::UNAUTHORIZED); reply::with_header( body, @@ -914,10 +924,11 @@ mod server { let err: Box = e.into(); let json = ErrJson::from_err(&*err); - Ok( - make_401_with_body("invalid_jwt", ClientVisibleMsg(json.into_data())) - .into_response(), + Ok(make_401_with_body( + "invalid_jwt", + CoordinatorVisibleMsg(json.into_data()), ) + .into_response()) } else { Ok( warp::reply::with_status(warp::reply(), StatusCode::NOT_FOUND) @@ -935,7 +946,7 @@ mod server { let json = ErrJson::from_err(&*err); Ok(make_401_with_body( "invalid_jwt", - ClientVisibleMsg(json.into_data()), + CoordinatorVisibleMsg(json.into_data()), ) .into_response()) } @@ -959,10 +970,10 @@ mod server { pub fn api( requester: Arc, - auth: Arc, + auth: Arc, s: Arc, - certificates: Arc, Vec)>>>, - check_server_auth: ServerAuthCheck, + certificates: Arc, Vec)>>>, + check_worker_auth: WorkerAuthCheck, ) -> impl Filter + Clone { alloc_job( requester.clone(), @@ -972,12 +983,12 @@ mod server { ) .or(server_certificate(certificates.clone())) .or(heartbeat_server( - check_server_auth.clone(), + check_worker_auth.clone(), s.clone(), certificates, requester, )) - .or(job_state(check_server_auth, s.clone())) + .or(job_state(check_worker_auth, s.clone())) .or(status(s)) .recover(handle_rejection) } @@ -985,9 +996,9 @@ mod server { // POST /api/v1/scheduler/alloc_job fn alloc_job( requester: Arc, - auth: Arc, + auth: Arc, s: Arc, - certificates: Arc, Vec)>>>, + certificates: Arc, Vec)>>>, ) -> impl Filter + Clone { warp::path!("api" / "v1" / "scheduler" / "alloc_job") .and(warp::post()) @@ -1004,12 +1015,12 @@ mod server { .and_then(prepare_response) } - // GET /api/v1/scheduler/server_certificate/{server_id: ServerId}) + // GET /api/v1/scheduler/server_certificate/{worker_url: ServerId}) fn server_certificate( - certificates: Arc, Vec)>>>, + certificates: Arc, Vec)>>>, ) -> impl Filter + Clone { - use config::ServerUrl; - warp::path!("api" / "v1" / "scheduler" / "server_certificate" / ServerUrl) + use config::WorkerUrl; + warp::path!("api" / "v1" / "scheduler" / "server_certificate" / WorkerUrl) .and(warp::get()) .and(with_certificates(certificates)) .and_then(handlers::server_certificate) @@ -1019,14 +1030,14 @@ mod server { // POST /api/v1/scheduler/heartbeat_server fn heartbeat_server( - check_server_auth: ServerAuthCheck, + check_worker_auth: WorkerAuthCheck, s: Arc, - certificates: Arc, Vec)>>>, + certificates: Arc, Vec)>>>, requester: Arc, ) -> impl Filter + Clone { warp::path!("api" / "v1" / "scheduler" / "heartbeat_server") .and(warp::post()) - .and(with_server_auth(check_server_auth)) + .and(with_server_auth(check_worker_auth)) .and(warp::header::headers_cloned()) .and(warp::addr::remote()) .and_then(auth_server) @@ -1041,13 +1052,13 @@ mod server { // POST /api/v1/scheduler/job_state/{job_id: JobId} fn job_state( - check_server_auth: ServerAuthCheck, + check_worker_auth: WorkerAuthCheck, s: Arc, ) -> impl Filter + Clone { warp::path!("api" / "v1" / "scheduler" / "job_state" / JobId) .and(warp::post()) .and( - with_server_auth(check_server_auth) + with_server_auth(check_worker_auth) .and(warp::header::headers_cloned()) .and(warp::addr::remote()) .and_then(auth_server), @@ -1121,23 +1132,23 @@ mod server { } fn with_certificates( - certificates: Arc, Vec)>>>, + certificates: Arc, Vec)>>>, ) -> impl Filter< - Extract = (Arc, Vec)>>>,), + Extract = (Arc, Vec)>>>,), Error = Infallible, > + Clone { warp::any().map(move || certificates.clone()) } fn with_server_auth( - check_server_auth: ServerAuthCheck, - ) -> impl Filter + Clone { - warp::any().map(move || check_server_auth.clone()) + check_worker_auth: WorkerAuthCheck, + ) -> impl Filter + Clone { + warp::any().map(move || check_worker_auth.clone()) } fn with_client_authorizer( - client_authorizer: Arc, - ) -> impl Filter,), Error = Infallible> + Clone + client_authorizer: Arc, + ) -> impl Filter,), Error = Infallible> + Clone { warp::any().map(move || client_authorizer.clone()) } @@ -1162,12 +1173,12 @@ mod server { } async fn authorize( - check_client_auth: Arc, + checker_coordinator_auth: Arc, auth_header: HeaderValue, ) -> Result<(), Rejection> { let bearer_auth = bearer_http_auth(&auth_header)?; - check_client_auth + checker_coordinator_auth .check(&bearer_auth) .await .map_err(|_| Error::BearerAuthFailed)?; @@ -1176,16 +1187,16 @@ mod server { } async fn auth_server( - check_server_auth: ServerAuthCheck, + check_worker_auth: WorkerAuthCheck, headers: HeaderMap, remote: Option, - ) -> Result { + ) -> Result { let auth_header = headers .get(AUTHORIZATION.as_str()) .ok_or(Error::NoAuthorizationHeader)?; - match check_server_auth(&bearer_http_auth(auth_header)?) { - Some(server_id) => { + match check_worker_auth(&bearer_http_auth(auth_header)?) { + Some(worker_url) => { let origin_ip = if let Some(header_val) = headers.get("X-Real-IP") { trace!("X-Real-IP: {:?}", header_val); match header_val.to_str().unwrap().parse() { @@ -1202,7 +1213,7 @@ mod server { remote.unwrap().ip() }; - let url = server_id.0.to_url(); + let url = worker_url.0.to_url(); let addrs = url.socket_addrs(|| None).unwrap_or_default(); if addrs.iter().find(|addr| addr.ip() == origin_ip).is_none() { @@ -1212,7 +1223,7 @@ mod server { Error::InvalidBearerTokenMismatchedAddress, )) } else { - Ok(server_id) + Ok(worker_url) } } None => Err(warp::reject::custom(Error::InvalidBearerToken)), @@ -1237,13 +1248,13 @@ mod server { pub(super) mod handlers { use super::super::AllocJobHttpResponse; - use super::super::{HeartbeatServerHttpRequest, ServerCertificateHttpResponse}; + use super::super::{HeartbeatWorkerHttpRequest, WorkerCertificateHttpResponse}; use super::super::{JWTJobAuthorizer, JobId, SchedulerRequester}; use super::Error; use crate::config; use crate::dist::{self}; use crate::dist::{ - HeartbeatServerResult, JobState, SchedulerStatusResult, UpdateJobStateResult, + HeartbeatWorkerResult, JobState, SchedulerStatusResult, UpdateJobStateResult, }; use std::collections::HashMap; use std::sync::Arc; @@ -1254,7 +1265,7 @@ mod server { handler: Arc, toolchain: dist::Toolchain, requester: Arc, - certs: Arc, Vec)>>>, + certs: Arc, Vec)>>>, ) -> Result { let alloc_job_res = handler .handle_alloc_job(requester.as_ref(), toolchain) @@ -1271,13 +1282,13 @@ mod server { } pub async fn server_certificate( - server_id: config::ServerUrl, - certificates: Arc, Vec)>>>, - ) -> Result { + worker_url: config::WorkerUrl, + certificates: Arc, Vec)>>>, + ) -> Result { let certs = certificates.lock().await; - let (cert_digest, cert_pem) = certs.get(&server_id).cloned().unwrap(); - let res = ServerCertificateHttpResponse { + let (cert_digest, cert_pem) = certs.get(&worker_url).cloned().unwrap(); + let res = WorkerCertificateHttpResponse { cert_digest, cert_pem, }; @@ -1286,16 +1297,16 @@ mod server { } pub async fn heartbeat_server( - server_id: config::ServerUrl, + worker_url: config::WorkerUrl, handler: Arc, - heartbeat_server: HeartbeatServerHttpRequest, - server_certificates: Arc, Vec)>>>, + heartbeat_server: HeartbeatWorkerHttpRequest, + server_certificates: Arc, Vec)>>>, requester: Arc, - ) -> Result { - let HeartbeatServerHttpRequest { + ) -> Result { + let HeartbeatWorkerHttpRequest { num_cpus, jwt_key, - server_nonce, + worker_nonce, cert_digest, cert_pem, } = heartbeat_server; @@ -1305,7 +1316,7 @@ mod server { maybe_update_certs( &mut *client, &mut certs, - server_id.clone(), + worker_url.clone(), cert_digest, cert_pem, ) @@ -1313,10 +1324,10 @@ mod server { .map_err(|_| Error::UpdateCerts)?; let job_authorizer = Box::new(JWTJobAuthorizer::new(jwt_key)); - let res: HeartbeatServerResult = handler - .handle_heartbeat_server( - server_id.clone(), - server_nonce, + let res: HeartbeatWorkerResult = handler + .handle_heartbeat_worker( + worker_url.clone(), + worker_nonce, num_cpus, job_authorizer, ) @@ -1327,12 +1338,12 @@ mod server { pub async fn job_state( job_id: JobId, - server_id: config::ServerUrl, + worker_url: config::WorkerUrl, handler: Arc, job_state: JobState, ) -> Result { let res = handler - .handle_update_job_state(job_id, server_id, job_state) + .handle_update_job_state(job_id, worker_url, job_state) .map_err(|_| Error::UpdateJobState)?; Ok(res) @@ -1348,17 +1359,17 @@ mod server { async fn maybe_update_certs( client: &mut reqwest::Client, - certs: &mut HashMap, Vec)>, - server_id: config::ServerUrl, + certs: &mut HashMap, Vec)>, + worker_url: config::WorkerUrl, cert_digest: Vec, cert_pem: Vec, ) -> Result<(), Error> { - if let Some((saved_cert_digest, _)) = certs.get(&server_id) { + if let Some((saved_cert_digest, _)) = certs.get(&worker_url) { if saved_cert_digest == &cert_digest { return Ok(()); } } - info!("Adding new certificate for {} to scheduler", server_id); + info!("Adding new certificate for {} to scheduler", worker_url); let _ = native_tls::Certificate::from_pem(&cert_pem) .map_err(|_| Error::BadCertificate)?; @@ -1372,7 +1383,7 @@ mod server { let new_client = client_builder.build().map_err(|_| Error::NoHTTPClient)?; // Use the updated certificates *client = new_client; - certs.insert(server_id, (cert_digest, cert_pem)); + certs.insert(worker_url, (cert_digest, cert_pem)); Ok(()) } } @@ -1382,23 +1393,23 @@ mod server { public_addr: reqwest::Url, handler: S, // Is this client permitted to use the scheduler? - check_client_auth: Box, + checker_coordinator_auth: Box, // Do we believe the server is who they appear to be? - check_server_auth: ServerAuthCheck, + check_worker_auth: WorkerAuthCheck, } impl Scheduler { pub fn new( public_addr: reqwest::Url, handler: S, - check_client_auth: Box, - check_server_auth: ServerAuthCheck, + checker_coordinator_auth: Box, + check_worker_auth: WorkerAuthCheck, ) -> Self { Self { public_addr, handler, - check_client_auth, - check_server_auth, + checker_coordinator_auth, + check_worker_auth, } } @@ -1406,8 +1417,8 @@ mod server { let Self { public_addr, handler, - check_client_auth, - check_server_auth, + checker_coordinator_auth, + check_worker_auth, } = self; let client = @@ -1419,15 +1430,15 @@ mod server { client: Mutex::new(client), }); - let check_client_auth = Arc::from(check_client_auth); + let checker_coordinator_auth = Arc::from(checker_coordinator_auth); let handler = Arc::from(handler); let server_certificates = Arc::new(Mutex::new(HashMap::new())); let api = scheduler_api_v1::api( requester, - check_client_auth, + checker_coordinator_auth, handler, server_certificates, - check_server_auth, + check_worker_auth, ); info!("Scheduler listening for clients on {}", public_addr); @@ -1448,12 +1459,12 @@ mod server { impl dist::SchedulerOutgoing for SchedulerRequester { async fn do_assign_job( &self, - server_id: config::ServerUrl, + worker_url: config::WorkerUrl, job_id: JobId, tc: Toolchain, auth: String, ) -> Result { - let url = urls::server_assign_job(server_id, job_id); + let url = urls::server_assign_job(worker_url, job_id); let req = self.client.lock().await.post(url); bincode_req(req.bearer_auth(auth).bincode(&tc)?) .await @@ -1461,7 +1472,7 @@ mod server { } } - pub struct Server { + pub struct Worker { public_addr: reqwest::Url, scheduler_url: reqwest::Url, scheduler_auth: String, @@ -1472,11 +1483,11 @@ mod server { // Key used to sign any requests relating to jobs jwt_key: Vec, // Randomly generated nonce to allow the scheduler to detect server restarts - server_nonce: ServerNonce, + worker_nonce: WorkerNonce, handler: S, } - impl Server { + impl Worker { pub fn new( public_addr: reqwest::Url, scheduler_url: reqwest::Url, @@ -1487,7 +1498,7 @@ mod server { .context("failed to create HTTPS certificate for server")?; let mut jwt_key = vec![0; JWT_KEY_LENGTH]; OsRng.fill_bytes(&mut jwt_key); - let server_nonce = ServerNonce::new(); + let worker_nonce = WorkerNonce::new(); Ok(Self { public_addr, @@ -1497,7 +1508,7 @@ mod server { cert_pem, privkey_pem, jwt_key, - server_nonce, + worker_nonce, handler, }) } @@ -1511,16 +1522,16 @@ mod server { cert_pem, privkey_pem, jwt_key, - server_nonce, + worker_nonce, handler, } = self; let handler = Arc::new(handler); - let heartbeat_req = HeartbeatServerHttpRequest { + let heartbeat_req = HeartbeatWorkerHttpRequest { num_cpus: num_cpus::get(), jwt_key: jwt_key.clone(), - server_nonce, + worker_nonce, cert_digest, cert_pem: cert_pem.clone(), }; @@ -1532,7 +1543,7 @@ mod server { scheduler_auth: scheduler_auth.clone(), }); - let api = distserver_api_v1::api(job_authorizer, handler, requester); + let api = distworker_api_v1::api(job_authorizer, handler, requester); tokio::spawn(async move { use tokio::time; @@ -1549,7 +1560,7 @@ mod server { ) .await { - Ok(HeartbeatServerResult { is_new }) => { + Ok(HeartbeatWorkerResult { is_new }) => { trace!("Heartbeat success is_new={}", is_new); // TODO: if is_new, terminate all running jobs time::sleep(HEARTBEAT_INTERVAL).await; @@ -1583,7 +1594,7 @@ mod server { } #[async_trait] - impl dist::ServerOutgoing for ServerRequester { + impl dist::WorkerOutgoing for ServerRequester { async fn do_update_job_state( &self, job_id: JobId, @@ -1624,7 +1635,7 @@ mod client { use super::common::{ bincode_req, AllocJobHttpResponse, ReqwestRequestBuilderExt, RunJobHttpRequest, - ServerCertificateHttpResponse, + WorkerCertificateHttpResponse, }; use super::urls; use crate::errors::*; @@ -1720,7 +1731,7 @@ mod client { need_toolchain, cert_digest, } => { - let server_id = job_alloc.server_id.clone(); + let worker_url = job_alloc.worker_url.clone(); let alloc_job_res = Ok(AllocJobResult::Success { job_alloc, need_toolchain, @@ -1728,10 +1739,10 @@ mod client { if server_certs.lock().await.contains_key(&cert_digest) { return alloc_job_res; } - info!("Need to request new certificate for server {}", server_id); - let url = urls::scheduler_server_certificate(&scheduler_url, server_id); + info!("Need to request new certificate for server {}", worker_url); + let url = urls::scheduler_server_certificate(&scheduler_url, worker_url); let req = client_async.lock().await.get(url); - let res: ServerCertificateHttpResponse = bincode_req(req) + let res: WorkerCertificateHttpResponse = bincode_req(req) .await .context("GET to scheduler server_certificate failed")?; @@ -1764,7 +1775,7 @@ mod client { ) -> Result { match self.tc_cache.get_toolchain(&tc) { Ok(Some(toolchain_file)) => { - let url = urls::server_submit_toolchain(job_alloc.server_id, job_alloc.job_id); + let url = urls::server_submit_toolchain(job_alloc.worker_url, job_alloc.job_id); let req = self.client_async.lock().await.post(url); let _toolchain_file_exists = toolchain_file.metadata()?; @@ -1791,7 +1802,7 @@ mod client { outputs: Vec, inputs_packager: Box, ) -> Result<(RunJobResult, PathTransformer)> { - let url = urls::server_run_job(job_alloc.server_id, job_alloc.job_id); + let url = urls::server_run_job(job_alloc.worker_url, job_alloc.job_id); let req = self.client_async.lock().await.post(url); let (path_transformer, compressed_body) = self @@ -1864,12 +1875,11 @@ mod client { #[cfg(all(test, feature = "vs_openssl"))] mod tests { - use crate::dist::http::server::create_https_cert_and_privkey; - use crate::dist::SocketAddr; + use crate::dist::http::worker::create_https_cert_and_privkey; use anyhow::{Context, Result}; fn legacy_create_https_cert_and_privkey( - addr: SocketAddr, + addr: &reqwest::Url, ) -> Result<(Vec, Vec, Vec)> { let rsa_key = openssl::rsa::Rsa::::generate(2048) .context("failed to generate rsa privkey")?; @@ -1920,7 +1930,7 @@ mod tests { // Add the SubjectAlternativeName let extension = openssl::x509::extension::SubjectAlternativeName::new() - .ip(&addr.ip().to_string()) + .uri(&addr.to_string()) .build(&builder.x509v3_context(None, None)) .context("failed to build SAN extension for x509")?; builder @@ -1962,11 +1972,16 @@ mod tests { #[test] fn create_cert_and_sk() { - let addr = "242.11.9.38:29114".parse().unwrap(); + use std::str::FromStr; + + let addr = reqwest::Url::from_str("242.11.9.38:29114").unwrap(); + let addr = &addr; struct Triple { + #[allow(unused)] pub cert_digest: Vec, pub cert_pem: Vec, + #[allow(unused)] pub privkey_pem: Vec, } diff --git a/src/dist/mod.rs b/src/dist/mod.rs index 5ad2d366..e4f537f4 100644 --- a/src/dist/mod.rs +++ b/src/dist/mod.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::compiler; -use crate::config::ServerUrl; +use crate::config::WorkerUrl; use rand::{rngs::OsRng, RngCore}; use std::ffi::OsString; use std::fmt; @@ -21,21 +21,21 @@ use std::io::{self, Read}; use std::path::{Path, PathBuf}; use std::process; use std::str::FromStr; -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] use std::sync::Mutex; use crate::errors::*; -#[cfg(any(feature = "dist-client", feature = "dist-server"))] +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] mod cache; #[cfg(feature = "dist-client")] pub mod client_auth; -#[cfg(any(feature = "dist-client", feature = "dist-server"))] +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] pub mod http; #[cfg(test)] mod test; -#[cfg(any(feature = "dist-client", feature = "dist-server"))] +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] pub use crate::dist::cache::TcCache; // TODO: paths (particularly outputs, which are accessed by an unsandboxed program) @@ -350,13 +350,12 @@ impl FromStr for JobId { u64::from_str(s).map(JobId) } } - #[derive(Eq, PartialEq, Clone, Debug, Serialize, Deserialize)] #[serde(deny_unknown_fields)] -pub struct ServerNonce(u64); -impl ServerNonce { +pub struct WorkerNonce(u64); +impl WorkerNonce { pub fn new() -> Self { - ServerNonce(OsRng.next_u64()) + WorkerNonce(OsRng.next_u64()) } } @@ -457,7 +456,7 @@ impl From for process::Output { #[serde(deny_unknown_fields)] pub struct OutputData(Vec, u64); impl OutputData { - #[cfg(any(feature = "dist-server", all(feature = "dist-client", test)))] + #[cfg(any(feature = "dist-worker", all(feature = "dist-client", test)))] pub fn try_from_reader(r: R) -> io::Result { use flate2::read::ZlibEncoder as ZlibReadEncoder; use flate2::Compression; @@ -499,7 +498,7 @@ impl fmt::Display for OutputDataLens { pub struct JobAlloc { pub auth: String, pub job_id: JobId, - pub server_id: ServerUrl, + pub worker_url: WorkerUrl, } #[derive(Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] @@ -531,11 +530,11 @@ pub enum UpdateJobStateResult { Fail { msg: String }, } -// HeartbeatServer +// HeartbeatWorker #[derive(Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] -pub struct HeartbeatServerResult { +pub struct HeartbeatWorkerResult { pub is_new: bool, } @@ -604,25 +603,25 @@ impl<'a> Read for InputsReader<'a> { } } -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] type ExtResult = ::std::result::Result; -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] #[async_trait] pub trait SchedulerOutgoing: Send + Sync { // To Server async fn do_assign_job( &self, - server_id: ServerUrl, + worker_url: WorkerUrl, job_id: JobId, tc: Toolchain, auth: String, ) -> Result; } -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] #[async_trait] -pub trait ServerOutgoing: Send + Sync { +pub trait WorkerOutgoing: Send + Sync { // To Scheduler async fn do_update_job_state( &self, @@ -632,13 +631,13 @@ pub trait ServerOutgoing: Send + Sync { } // Trait to handle the creation and verification of job authorization tokens -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] pub trait JobAuthorizer: Send + Sync { fn generate_token(&self, job_id: JobId) -> Result; fn verify_token(&self, job_id: JobId, token: &str) -> Result<()>; } -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] #[async_trait] pub trait SchedulerIncoming: Send + Sync { // From Client @@ -648,44 +647,44 @@ pub trait SchedulerIncoming: Send + Sync { tc: Toolchain, ) -> ExtResult; // From Server - fn handle_heartbeat_server( + fn handle_heartbeat_worker( &self, - server_id: ServerUrl, - server_nonce: ServerNonce, + worker_url: WorkerUrl, + worker_nonce: WorkerNonce, num_cpus: usize, job_authorizer: Box, - ) -> ExtResult; + ) -> ExtResult; // From Server fn handle_update_job_state( &self, job_id: JobId, - server_id: ServerUrl, + worker_url: WorkerUrl, job_state: JobState, ) -> ExtResult; // From anyone fn handle_status(&self) -> ExtResult; } -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] #[async_trait] -pub trait ServerIncoming: Send + Sync { +pub trait WorkerIncoming: Send + Sync { // From Scheduler async fn handle_assign_job( &self, job_id: JobId, tc: Toolchain, ) -> ExtResult; - // From Client + // From Coordinator async fn handle_submit_toolchain( &self, - requester: &dyn ServerOutgoing, + requester: &dyn WorkerOutgoing, job_id: JobId, tc_rdr: ToolchainReader<'_>, ) -> ExtResult; - // From Client + // From Coordinator async fn handle_run_job( &self, - requester: &dyn ServerOutgoing, + requester: &dyn WorkerOutgoing, job_id: JobId, command: CompileCommand, outputs: Vec, @@ -693,7 +692,7 @@ pub trait ServerIncoming: Send + Sync { ) -> ExtResult; } -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] pub trait BuilderIncoming: Send + Sync { // From Server fn run_build( diff --git a/src/lib.rs b/src/lib.rs index edd0fb75..fd691079 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -51,12 +51,12 @@ mod cmdline; mod commands; mod compiler; pub mod config; +pub mod coordinator; pub mod dist; mod jobserver; pub mod lru_disk_cache; mod mock_command; mod protocol; -pub mod server; #[doc(hidden)] pub mod util; diff --git a/src/protocol.rs b/src/protocol.rs index 3c74e594..0039e68f 100644 --- a/src/protocol.rs +++ b/src/protocol.rs @@ -1,5 +1,5 @@ use crate::compiler::ColorMode; -use crate::server::{DistInfo, ServerInfo}; +use crate::coordinator::{CoordinatorInfo, DistInfo}; use std::ffi::OsString; /// A client request. @@ -7,40 +7,40 @@ use std::ffi::OsString; pub enum Request { /// Zero the server's statistics. ZeroStats, - /// Get server statistics. + /// Get coordinator statistics. GetStats, /// Get dist status. DistStatus, - /// Shut the server down gracefully. + /// Shut the coordinator down gracefully. Shutdown, /// Execute a compile or fetch a cached compilation result. Compile(Compile), ClearCache, } -/// A server response. +/// A coordinator response. #[derive(Serialize, Deserialize, Debug)] pub enum Response { /// Response for `Request::Compile`. Compile(CompileResponse), - /// Response for `Request::GetStats`, containing server statistics. - Stats(Box), + /// Response for `Request::GetStats`, containing coordinator statistics. + Stats(Box), /// Response for `Request::DistStatus`, containing client info. DistStatus(DistInfo), - /// Response for `Request::Shutdown`, containing server statistics. - ShuttingDown(Box), + /// Response for `Request::Shutdown`, containing coordinator statistics. + ShuttingDown(Box), /// Second response for `Request::Compile`, containing the results of the compilation. CompileFinished(CompileFinished), /// Response for Request::ClearCache. ClearCacheComplete, } -/// Possible responses from the server for a `Compile` request. +/// Possible responses from the coordinator for a `Compile` request. #[derive(Serialize, Deserialize, Debug)] pub enum CompileResponse { /// The compilation was started. CompileStarted, - /// The server could not handle this compilation request. + /// The coordinator could not handle this compilation request. UnhandledCompile, /// The compiler was not supported. UnsupportedCompiler(OsString), diff --git a/src/test/tests.rs b/src/test/tests.rs index 67270ecf..81268555 100644 --- a/src/test/tests.rs +++ b/src/test/tests.rs @@ -13,11 +13,11 @@ // limitations under the License. use crate::cache::disk::DiskCache; -use crate::client::connect_to_server; +use crate::client::connect_to_coordinator; use crate::commands::{do_compile, request_shutdown, request_stats}; +use crate::coordinator::{CachepotCoordinator, CoordinatorMessage, DistClientContainer}; use crate::jobserver::Client; use crate::mock_command::*; -use crate::server::{CachepotServer, DistClientContainer, ServerMessage}; use crate::test::utils::*; use crate::util::fs::File; use futures::channel::oneshot::{self, Sender}; @@ -35,7 +35,7 @@ use tokio::runtime::Runtime; /// Options for running the server in tests. #[derive(Default)] -struct ServerOptions { +struct CoordinatorOptions { /// The server's idle shutdown timeout. idle_timeout: Option, /// The maximum size of the disk cache. @@ -44,23 +44,23 @@ struct ServerOptions { /// Run a server on a background thread, and return a tuple of useful things. /// -/// * The port on which the server is listening. +/// * The port on which the coordinator is listening. /// * A `Sender` which can be used to send messages to the server. -/// (Most usefully, ServerMessage::Shutdown.) +/// (Most usefully, CoordinatorMessage::Shutdown.) /// * An `Arc`-and-`Mutex`-wrapped `MockCommandCreator` which the server will /// use for all process creation. /// * The `JoinHandle` for the server thread. -fn run_server_thread( +fn run_coordinator_thread( cache_dir: &Path, options: T, ) -> ( u16, - Sender, + Sender, Arc>, thread::JoinHandle<()>, ) where - T: Into> + Send + 'static, + T: Into> + Send + 'static, { let options = options.into(); let cache_dir = cache_dir.to_path_buf(); @@ -79,8 +79,8 @@ where let storage = Arc::new(DiskCache::new(&cache_dir, cache_size, runtime.handle())); let client = unsafe { Client::new() }; - let srv = CachepotServer::new(0, runtime, client, dist_client, storage).unwrap(); - let mut srv: CachepotServer>> = srv; + let srv = CachepotCoordinator::new(0, runtime, client, dist_client, storage).unwrap(); + let mut srv: CachepotCoordinator>> = srv; assert!(srv.port() > 0); if let Some(options) = options { if let Some(timeout) = options.idle_timeout { @@ -97,11 +97,11 @@ where } #[test] -fn test_server_shutdown() { +fn test_coordinator_shutdown() { let f = TestFixture::new(); - let (port, _sender, _storage, child) = run_server_thread(f.tempdir.path(), None); + let (port, _sender, _storage, child) = run_coordinator_thread(f.tempdir.path(), None); // Connect to the server. - let conn = connect_to_server(port).unwrap(); + let conn = connect_to_coordinator(port).unwrap(); // Ask it to shut down request_shutdown(conn).unwrap(); // Ensure that it shuts down. @@ -110,29 +110,29 @@ fn test_server_shutdown() { /// The server will shutdown when requested when the idle timeout is disabled. #[test] -fn test_server_shutdown_no_idle() { +fn test_coordinator_shutdown_no_idle() { let f = TestFixture::new(); // Set a ridiculously low idle timeout. - let (port, _sender, _storage, child) = run_server_thread( + let (port, _sender, _storage, child) = run_coordinator_thread( f.tempdir.path(), - ServerOptions { + CoordinatorOptions { idle_timeout: Some(0), ..Default::default() }, ); - let conn = connect_to_server(port).unwrap(); + let conn = connect_to_coordinator(port).unwrap(); request_shutdown(conn).unwrap(); child.join().unwrap(); } #[test] -fn test_server_idle_timeout() { +fn test_coordinator_idle_timeout() { let f = TestFixture::new(); // Set a ridiculously low idle timeout. - let (_port, _sender, _storage, child) = run_server_thread( + let (_port, _sender, _storage, child) = run_coordinator_thread( f.tempdir.path(), - ServerOptions { + CoordinatorOptions { idle_timeout: Some(1), ..Default::default() }, @@ -147,14 +147,14 @@ fn test_server_idle_timeout() { #[test] fn test_server_stats() { let f = TestFixture::new(); - let (port, sender, _storage, child) = run_server_thread(f.tempdir.path(), None); + let (port, sender, _storage, child) = run_coordinator_thread(f.tempdir.path(), None); // Connect to the server. - let conn = connect_to_server(port).unwrap(); + let conn = connect_to_coordinator(port).unwrap(); // Ask it for stats. let info = request_stats(conn).unwrap(); assert_eq!(0, info.stats.compile_requests); // Now signal it to shut down. - sender.send(ServerMessage::Shutdown).ok().unwrap(); + sender.send(CoordinatorMessage::Shutdown).ok().unwrap(); // Ensure that it shuts down. child.join().unwrap(); } @@ -162,9 +162,9 @@ fn test_server_stats() { #[test] fn test_server_unsupported_compiler() { let f = TestFixture::new(); - let (port, sender, server_creator, child) = run_server_thread(f.tempdir.path(), None); + let (port, sender, server_creator, child) = run_coordinator_thread(f.tempdir.path(), None); // Connect to the server. - let conn = connect_to_server(port).unwrap(); + let conn = connect_to_coordinator(port).unwrap(); { let mut c = server_creator.lock().unwrap(); // The server will check the compiler, so pretend to be an unsupported @@ -202,7 +202,7 @@ fn test_server_unsupported_compiler() { // Make sure we ran the mock processes. assert_eq!(0, server_creator.lock().unwrap().children.len()); // Shut down the server. - sender.send(ServerMessage::Shutdown).ok().unwrap(); + sender.send(CoordinatorMessage::Shutdown).ok().unwrap(); // Ensure that it shuts down. child.join().unwrap(); } @@ -211,13 +211,13 @@ fn test_server_unsupported_compiler() { fn test_server_compile() { let _ = env_logger::Builder::new().is_test(true).try_init(); let f = TestFixture::new(); - let (port, sender, server_creator, child) = run_server_thread(f.tempdir.path(), None); + let (port, sender, server_creator, child) = run_coordinator_thread(f.tempdir.path(), None); // Connect to the server. const PREPROCESSOR_STDOUT: &[u8] = b"preprocessor stdout"; const PREPROCESSOR_STDERR: &[u8] = b"preprocessor stderr"; const STDOUT: &[u8] = b"some stdout"; const STDERR: &[u8] = b"some stderr"; - let conn = connect_to_server(port).unwrap(); + let conn = connect_to_coordinator(port).unwrap(); { let mut c = server_creator.lock().unwrap(); // The server will check the compiler. Pretend it's GCC. @@ -271,7 +271,7 @@ fn test_server_compile() { assert_eq!(STDOUT, stdout.into_inner().as_slice()); assert_eq!(STDERR, stderr.into_inner().as_slice()); // Shut down the server. - sender.send(ServerMessage::Shutdown).ok().unwrap(); + sender.send(CoordinatorMessage::Shutdown).ok().unwrap(); // Ensure that it shuts down. child.join().unwrap(); } @@ -280,14 +280,14 @@ fn test_server_compile() { // test fails intermittently on macos: // https://github.com/mozilla/sccache/issues/234 #[cfg(not(target_os = "macos"))] -fn test_server_port_in_use() { +fn test_coordinator_port_in_use() { // Bind an arbitrary free port. let listener = TcpListener::bind("127.0.0.1:0").unwrap(); let cachepot = find_cachepot_binary(); let output = Command::new(&cachepot) - .arg("--start-server") + .arg("--start-coordinator") .env( - "CACHEPOT_SERVER_PORT", + "CACHEPOT_COORDINATOR_PORT", listener.local_addr().unwrap().port().to_string(), ) .output() @@ -299,6 +299,6 @@ fn test_server_port_in_use() { "=====stdout=====\n{}\n=====stderr=====\n{}\n================", stdout, stderr, ); - const MSG: &str = "Server startup failed:"; + const MSG: &str = "Coordinator startup failed:"; assert!(stderr.contains(MSG), "stderr did not contain '{}':", MSG); } diff --git a/src/util.rs b/src/util.rs index 7cd16a27..99cbd96b 100644 --- a/src/util.rs +++ b/src/util.rs @@ -463,7 +463,7 @@ pub fn daemonize() -> Result<()> { // We don't have a parent process any more once we've reached this point, // which means that no one's probably listening for our exit status. - // In order to assist with debugging crashes of the server we configure our + // In order to assist with debugging crashes of the coordinator we configure our // rlimit to allow runtime dumps and we also install a signal handler for // segfaults which at least prints out what just happened. unsafe { @@ -544,7 +544,7 @@ pub fn daemonize() -> Result<()> { // More context: // https://github.com/seanmonstar/reqwest/issues/1328 // https://github.com/briansmith/webpki/issues/54 -#[cfg(any(feature = "dist-client", feature = "dist-server"))] +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] pub fn native_tls_no_sni_client_builder<'a, I, T>(root_certs: I) -> Result where I: Iterator, diff --git a/systemd/Dockerfile.build.cachepot-dist b/systemd/Dockerfile.build.cachepot-dist index 40bd5b40..8b7493e9 100644 --- a/systemd/Dockerfile.build.cachepot-dist +++ b/systemd/Dockerfile.build.cachepot-dist @@ -11,7 +11,7 @@ RUN cd /bubblewrap-0.3.1 && \ FROM rust:1.57-buster as cachepot-build RUN git clone https://github.com/paritytech/cachepot.git --depth=1 && \ cd cachepot && \ - cargo build --bin cachepot-dist --release --features="dist-server" + cargo build --bin cachepot-dist --release --features="dist-worker" FROM ubuntu:20.04 RUN apt-get update && \ diff --git a/systemd/cachepot-server.service b/systemd/cachepot-worker.service similarity index 82% rename from systemd/cachepot-server.service rename to systemd/cachepot-worker.service index 6ebedebb..087839ed 100644 --- a/systemd/cachepot-server.service +++ b/systemd/cachepot-worker.service @@ -1,5 +1,5 @@ [Unit] -Description=cachepot server +Description=cachepot worker After=suspend.target After=hibernate.target @@ -20,7 +20,7 @@ Group=root Environment=CACHEPOT_NO_DAEMON=1 Environment=RUST_LOG=cachepot=trace,cachepot-dist=trace ExecPreStart=-/mnt/cache-dir -ExecStart=/usr/local/bin/cachepot-dist server --config /etc/cachepot/server.conf +ExecStart=/usr/local/bin/cachepot-dist worker --config /etc/cachepot/worker.conf SELinuxContext=system_u:object_r:unreserved_port_t:s0 [Install] diff --git a/systemd/config/client.conf b/systemd/config/client.conf index b946b277..b4388473 100644 --- a/systemd/config/client.conf +++ b/systemd/config/client.conf @@ -3,7 +3,7 @@ # setup of a HTTPS server in front of the scheduler) scheduler_url = "http://172.25.0.10:10600" # Used for mapping local toolchains to remote cross-compile toolchains. Empty in -# this example where the client and build server are both Linux. +# this example where the client and build worker are both Linux. toolchains = [] # Size of the local toolchain cache, in bytes (5GB here, 10GB if unspecified). toolchain_cache_size = 5368709120 diff --git a/systemd/config/server.conf b/systemd/config/worker.conf similarity index 100% rename from systemd/config/server.conf rename to systemd/config/worker.conf diff --git a/systemd/docker-compose.yaml b/systemd/docker-compose.yaml index 31707203..e60683cb 100644 --- a/systemd/docker-compose.yaml +++ b/systemd/docker-compose.yaml @@ -1,12 +1,12 @@ # An example setup for a distributed cachepot cluster that uses # - a single scheduler (172.25.0.10) -# - a single build server (172.25.0.11) +# - a single build worker (172.25.0.11) # both served from a local network. # To test this locally, make sure to set up `dist.scheduler_url` and `dist.auth` # correctly, as used by the `cachepot` binary (the "client") and verify that the # connection works by running `cachepot --dist-status`. # It's worth noting that the security is virtually none using this exact setup. -# To correctly set everything up (e.g. set up auth across scheduler <-> server +# To correctly set everything up (e.g. set up auth across scheduler <-> worker # and client <-> scheduler, use HTTPS for the scheduler) refer to the # `docs/Distributed.md` file. version: "3.6" @@ -21,7 +21,7 @@ services: CACHEPOT_NO_DAEMON: 1 CACHEPOT_CONFIG_TOML: | public_addr = "http://172.25.0.10:10600" - [server_auth] + [worker_auth] type = "DANGEROUSLY_INSECURE" [client_auth] type = "token" @@ -32,7 +32,7 @@ services: dist_network: ipv4_address: 172.25.0.10 command: /bin/sh -c "echo \"$$CACHEPOT_CONFIG_TOML\" > config.toml; cachepot-dist scheduler --config config.toml" - server: + worker: # XXX: For the time being, due to the usage of bubblewrap and overlayfs # we are required to run this in the privileged mode (as we do in the # integration tests). Please keep that in mind when running the container. @@ -71,8 +71,8 @@ services: ipv4_address: 172.25.0.11 tmpfs: - /cachepot-dirs - command: /bin/sh -c "echo \"$$CACHEPOT_CONFIG_TOML\" > config.toml; cachepot-dist server --config config.toml" -# We need to set static IPs for the services as scheduler/server always use it + command: /bin/sh -c "echo \"$$CACHEPOT_CONFIG_TOML\" > config.toml; cachepot-dist worker --config config.toml" +# We need to set static IPs for the services as scheduler/worker always use it # to authorize themselves (even when using the DANGEROUSLY_INSECURE scheme) networks: dist_network: diff --git a/tests/cachepot_cargo.rs b/tests/cachepot_cargo.rs index f423bc83..b1632cf3 100644 --- a/tests/cachepot_cargo.rs +++ b/tests/cachepot_cargo.rs @@ -40,10 +40,10 @@ fn test_rust_cargo_cmd(cmd: &str, extra_envs: &[(&str, &std::ffi::OsStr)]) { } fn stop() { - trace!("cachepot --stop-server"); + trace!("cachepot --stop-coordinator"); drop( cachepot_command() - .arg("--stop-server") + .arg("--stop-coordinator") .stdout(Stdio::null()) .stderr(Stdio::null()) .status(), @@ -80,9 +80,9 @@ fn test_rust_cargo_cmd(cmd: &str, extra_envs: &[(&str, &std::ffi::OsStr)]) { let cargo_dir = tempdir.path().join("cargo"); fs::create_dir(&cargo_dir).unwrap(); // Start a new cachepot server. - trace!("cachepot --start-server"); + trace!("cachepot --start-coordinator"); cachepot_command() - .arg("--start-server") + .arg("--start-coordinator") .env("CACHEPOT_DIR", &cache_dir) .assert() .success(); diff --git a/tests/dist.rs b/tests/dist.rs index 5ce67c39..f647229d 100644 --- a/tests/dist.rs +++ b/tests/dist.rs @@ -1,4 +1,4 @@ -#![cfg(all(feature = "dist-client", feature = "dist-server"))] +#![cfg(all(feature = "dist-client", feature = "dist-worker"))] extern crate assert_cmd; #[macro_use] @@ -13,8 +13,8 @@ use crate::harness::{ use async_trait::async_trait; use cachepot::config::HTTPUrl; use cachepot::dist::{ - AssignJobResult, CompileCommand, InputsReader, JobId, JobState, RunJobResult, ServerIncoming, - ServerOutgoing, SubmitToolchainResult, Toolchain, ToolchainReader, + AssignJobResult, CompileCommand, InputsReader, JobId, JobState, RunJobResult, + SubmitToolchainResult, Toolchain, ToolchainReader, WorkerIncoming, WorkerOutgoing, }; use serial_test::serial; use std::ffi::OsStr; @@ -165,7 +165,7 @@ async fn test_dist_nobuilder() { struct FailingServer; #[async_trait] -impl ServerIncoming for FailingServer { +impl WorkerIncoming for FailingServer { async fn handle_assign_job(&self, _job_id: JobId, _tc: Toolchain) -> Result { let need_toolchain = false; let state = JobState::Ready; @@ -176,7 +176,7 @@ impl ServerIncoming for FailingServer { } async fn handle_submit_toolchain( &self, - _requester: &dyn ServerOutgoing, + _requester: &dyn WorkerOutgoing, _job_id: JobId, _tc_rdr: ToolchainReader<'_>, ) -> Result { @@ -184,7 +184,7 @@ impl ServerIncoming for FailingServer { } async fn handle_run_job( &self, - requester: &dyn ServerOutgoing, + requester: &dyn WorkerOutgoing, job_id: JobId, _command: CompileCommand, _outputs: Vec, diff --git a/tests/harness/mod.rs b/tests/harness/mod.rs index 1846dfe8..c65934eb 100644 --- a/tests/harness/mod.rs +++ b/tests/harness/mod.rs @@ -1,7 +1,7 @@ -#[cfg(any(feature = "dist-client", feature = "dist-server"))] -use cachepot::config::{HTTPUrl, ServerUrl}; +#[cfg(any(feature = "dist-client", feature = "dist-worker"))] +use cachepot::config::{HTTPUrl, WorkerUrl}; +use cachepot::coordinator::CoordinatorInfo; use cachepot::dist::{self, SchedulerStatusResult}; -use cachepot::server::ServerInfo; use cachepot::util::fs; use std::env; use std::io::Write; @@ -16,7 +16,7 @@ use std::time::Duration; use assert_cmd::prelude::*; use predicates::prelude::*; use serde::Serialize; -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] use tokio::task::JoinHandle; use uuid::Uuid; @@ -26,7 +26,7 @@ const DIST_DOCKERFILE: &str = include_str!("Dockerfile.cachepot-dist"); const DIST_IMAGE_BWRAP_PATH: &str = "/bwrap"; const MAX_STARTUP_WAIT: Duration = Duration::from_secs(5); -const DIST_SERVER_TOKEN: &str = "THIS IS THE TEST TOKEN"; +const DIST_WORKER_TOKEN: &str = "THIS IS THE TEST TOKEN"; const CONFIGS_CONTAINER_PATH: &str = "/cachepot-bits"; const BUILD_DIR_CONTAINER_PATH: &str = "/cachepot-bits/build-dir"; @@ -38,9 +38,9 @@ const TC_CACHE_SIZE: u64 = 1024 * 1024 * 1024; // 1 gig pub fn start_local_daemon(cfg_path: &Path, cached_cfg_path: &Path) { // Don't run this with run() because on Windows `wait_with_output` // will hang because the internal server process is not detached. - trace!("cachepot --start-server"); + trace!("cachepot --start-coordinator"); let _status = cachepot_command() - .arg("--start-server") + .arg("--start-coordinator") .env("CACHEPOT_CONF", cfg_path) .env("CACHEPOT_CACHED_CONF", cached_cfg_path) .status() @@ -48,17 +48,17 @@ pub fn start_local_daemon(cfg_path: &Path, cached_cfg_path: &Path) { .success(); } pub fn stop_local_daemon() { - trace!("cachepot --stop-server"); + trace!("cachepot --stop-coordinator"); drop( cachepot_command() - .arg("--stop-server") + .arg("--stop-coordinator") .stdout(Stdio::null()) .stderr(Stdio::null()) .status(), ); } -pub fn get_stats(f: F) { +pub fn get_stats(f: F) { cachepot_command() .args(&["--show-stats", "--stats-format=json"]) .assert() @@ -107,7 +107,7 @@ pub fn cachepot_command() -> Command { cmd } -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] pub fn cachepot_dist_path() -> PathBuf { assert_cmd::cargo::cargo_bin("cachepot-dist") } @@ -141,35 +141,35 @@ pub fn cachepot_client_cfg(tmpdir: &Path) -> cachepot::config::FileConfig { }, } } -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] fn cachepot_scheduler_cfg() -> cachepot::config::scheduler::Config { cachepot::config::scheduler::Config { public_addr: HTTPUrl::from_str(&format!("http://0.0.0.0:{}", SCHEDULER_PORT)).unwrap(), client_auth: cachepot::config::scheduler::ClientAuth::Insecure, - server_auth: cachepot::config::scheduler::ServerAuth::Token { - token: DIST_SERVER_TOKEN.to_owned(), + worker_auth: cachepot::config::scheduler::WorkerAuth::Token { + token: DIST_WORKER_TOKEN.to_owned(), }, } } -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] fn cachepot_server_cfg( tmpdir: &Path, scheduler_url: HTTPUrl, server_ip: IpAddr, -) -> cachepot::config::server::Config { +) -> cachepot::config::worker::Config { let relpath = "server-cache"; fs::create_dir(tmpdir.join(relpath)).unwrap(); - cachepot::config::server::Config { - builder: cachepot::config::server::BuilderType::Overlay { + cachepot::config::worker::Config { + builder: cachepot::config::worker::BuilderType::Overlay { build_dir: BUILD_DIR_CONTAINER_PATH.into(), bwrap_path: DIST_IMAGE_BWRAP_PATH.into(), }, cache_dir: Path::new(CONFIGS_CONTAINER_PATH).join(relpath), - public_addr: ServerUrl::from_str(&format!("{}:{}", server_ip, SERVER_PORT)).unwrap(), + public_addr: WorkerUrl::from_str(&format!("{}:{}", server_ip, SERVER_PORT)).unwrap(), scheduler_url, - scheduler_auth: cachepot::config::server::SchedulerAuth::Token { - token: DIST_SERVER_TOKEN.to_owned(), + scheduler_auth: cachepot::config::worker::SchedulerAuth::Token { + token: DIST_WORKER_TOKEN.to_owned(), }, toolchain_cache_size: TC_CACHE_SIZE, } @@ -177,12 +177,12 @@ fn cachepot_server_cfg( // TODO: this is copied from the cachepot-dist binary - it's not clear where would be a better place to put the // code so that it can be included here -#[cfg(feature = "dist-server")] -fn create_server_token(server_id: ServerUrl, auth_token: &str) -> String { - format!("{} {}", server_id, auth_token) +#[cfg(feature = "dist-worker")] +fn create_server_token(worker_url: WorkerUrl, auth_token: &str) -> String { + format!("{} {}", worker_url, auth_token) } -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] pub enum ServerHandle { Container { cid: String, @@ -194,7 +194,7 @@ pub enum ServerHandle { }, } -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] pub struct DistSystem { cachepot_dist: PathBuf, tmpdir: PathBuf, @@ -205,7 +205,7 @@ pub struct DistSystem { client: reqwest::Client, } -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] impl DistSystem { pub fn new(cachepot_dist: &Path, tmpdir: &Path) -> Self { // Make sure the docker image is available, building it if necessary @@ -360,7 +360,7 @@ impl DistSystem { r#" set -o errexit && while [ ! -f {cfg}.ready ]; do sleep 0.1; done && - exec /cachepot-dist server --config {cfg} + exec /cachepot-dist worker --config {cfg} "#, cfg = server_cfg_container_path.to_str().unwrap() ), @@ -390,7 +390,7 @@ impl DistSystem { handle } - pub async fn add_custom_server( + pub async fn add_custom_server( &mut self, handler: S, ) -> ServerHandle { @@ -399,10 +399,10 @@ impl DistSystem { let listener = tokio::net::TcpListener::bind(SocketAddr::from((ip, 0))) .await .unwrap(); - ServerUrl::from_str(&format!("{}", listener.local_addr().unwrap())).unwrap() + WorkerUrl::from_str(&format!("{}", listener.local_addr().unwrap())).unwrap() }; - let token = create_server_token(server_addr.clone(), DIST_SERVER_TOKEN); - let server = dist::http::Server::new( + let token = create_server_token(server_addr.clone(), DIST_WORKER_TOKEN); + let server = dist::http::Worker::new( server_addr.0.to_url().clone(), self.scheduler_url().to_url().clone(), token, @@ -522,7 +522,7 @@ impl DistSystem { } // If you want containers to hang around (e.g. for debugging), commend out the "rm -f" lines -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] impl Drop for DistSystem { fn drop(&mut self) { let mut did_err = false; @@ -635,7 +635,7 @@ fn check_output(output: &Output) { } } -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] fn native_tls_no_sni_client_builder_danger() -> reqwest::ClientBuilder { let tls = native_tls::TlsConnector::builder() .danger_accept_invalid_certs(true) @@ -649,7 +649,7 @@ fn native_tls_no_sni_client_builder_danger() -> reqwest::ClientBuilder { .use_preconfigured_tls(tls) } -#[cfg(feature = "dist-server")] +#[cfg(feature = "dist-worker")] async fn wait_for_http( client: &reqwest::Client, url: HTTPUrl,