Skip to content

Commit

Permalink
Patch cuda compat mounting on TF 2.18 Training SM (#4562)
Browse files Browse the repository at this point in the history
* Patch cuda compat mounting on TF 2.18

* disable ap

* fix import

* build test 2.18

* fix if statement

* fix allowlist

* revert toml
  • Loading branch information
sirutBuasai authored Feb 16, 2025
1 parent 29e4d1e commit b3891aa
Show file tree
Hide file tree
Showing 6 changed files with 1,400 additions and 206 deletions.
16 changes: 8 additions & 8 deletions tensorflow/training/buildspec-2-18-sm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ framework: &FRAMEWORK tensorflow
version: &VERSION 2.18.0
short_version: &SHORT_VERSION "2.18"
arch_type: x86
autopatch_build: "True"
# autopatch_build: "True"

repository_info:
training_repository: &TRAINING_REPOSITORY
Expand All @@ -19,6 +19,9 @@ repository_info:

context:
training_context: &TRAINING_CONTEXT
start_cuda_compat:
source: docker/build_artifacts/start_cuda_compat.sh
target: start_cuda_compat.sh
dockerd-entrypoint:
source: docker/build_artifacts/dockerd-entrypoint.py
target: dockerd-entrypoint.py
Expand All @@ -36,7 +39,7 @@ images:
tag_python_version: &TAG_PYTHON_VERSION py310
os_version: &OS_VERSION ubuntu22.04
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
# latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ]
# build_tag_override: "pr:2.16.2-cpu-py310-ubuntu20.04-sagemaker-pr-4362-autopatch"
target: sagemaker
Expand All @@ -52,12 +55,9 @@ images:
tag_python_version: &TAG_PYTHON_VERSION py310
cuda_version: &CUDA_VERSION cu125
os_version: &OS_VERSION ubuntu22.04
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION,
"-sagemaker" ]
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-",
*OS_VERSION, "-sagemaker" ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION,
/Dockerfile., *DEVICE_TYPE ]
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
# latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ]
# build_tag_override: "pr:2.16.2-gpu-py310-cu123-ubuntu20.04-sagemaker-pr-4362-autopatch"
target: sagemaker
enable_test_promotion: true
Expand Down
Loading

0 comments on commit b3891aa

Please sign in to comment.