Skip to content

Commit

Permalink
Patch CUDA Compat mounting for TF inference (#4567)
Browse files Browse the repository at this point in the history
* Patch CUDA Compat mounting for TF inference

* build test tf 2.18

* formatting

* update pip versino pin

* fix pip

* test 216 sm

* remove dlc.py

* add allowlist

* fix allowlist

* add back deep_learning_containers.py

* test no entrypoint

* fix allowlist 216

* build test tf 2.18

* fix allowlist 218

* add entrypoint

* add entrypoint

* test without deep_learning_conatainers.py

* use dockerd_entrypoint.sh"

* correct chmod

* print telemetry logs

* run logger

* add more logging

* use bin env

* preserve quoting

* test 218

* build test 216

* revert toml
  • Loading branch information
sirutBuasai authored Feb 19, 2025
1 parent 1dad848 commit 7c9758f
Show file tree
Hide file tree
Showing 9 changed files with 1,989 additions and 167 deletions.
20 changes: 11 additions & 9 deletions tensorflow/inference/buildspec-2-16-sm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ framework: &FRAMEWORK tensorflow
version: &VERSION 2.16.1
short_version: &SHORT_VERSION 2.16
arch_type: x86
autopatch_build: "True"
# autopatch_build: "True"

repository_info:
inference_repository: &INFERENCE_REPOSITORY
Expand All @@ -19,6 +19,12 @@ repository_info:

context:
inference_context: &INFERENCE_CONTEXT
start_cuda_compat:
source: docker/build_artifacts/start_cuda_compat.sh
target: start_cuda_compat.sh
dockerd_entrypoint:
source: docker/build_artifacts/dockerd_entrypoint.sh
target: dockerd_entrypoint.sh
sagemaker_package_name:
source: docker/build_artifacts/sagemaker
target: sagemaker
Expand All @@ -43,8 +49,7 @@ images:
tag_python_version: &TAG_PYTHON_VERSION py310
os_version: &OS_VERSION ubuntu20.04
tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
latest_release_tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION,
"-sagemaker" ]
latest_release_tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ]
target: sagemaker
enable_test_promotion: true
Expand All @@ -60,12 +65,9 @@ images:
tag_python_version: &TAG_PYTHON_VERSION py310
cuda_version: &CUDA_VERSION cu122
os_version: &OS_VERSION ubuntu20.04
tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION,
"-sagemaker" ]
latest_release_tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION,
"-", *OS_VERSION, "-sagemaker" ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile.,
*DEVICE_TYPE ]
tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
latest_release_tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ]
target: sagemaker
enable_test_promotion: true
context:
Expand Down
20 changes: 11 additions & 9 deletions tensorflow/inference/buildspec-2-18-sm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ framework: &FRAMEWORK tensorflow
version: &VERSION 2.18.0
short_version: &SHORT_VERSION 2.18
arch_type: x86
autopatch_build: "True"
# autopatch_build: "True"

repository_info:
inference_repository: &INFERENCE_REPOSITORY
Expand All @@ -19,6 +19,12 @@ repository_info:

context:
inference_context: &INFERENCE_CONTEXT
start_cuda_compat:
source: docker/build_artifacts/start_cuda_compat.sh
target: start_cuda_compat.sh
dockerd_entrypoint:
source: docker/build_artifacts/dockerd_entrypoint.sh
target: dockerd_entrypoint.sh
sagemaker_package_name:
source: docker/build_artifacts/sagemaker
target: sagemaker
Expand All @@ -43,8 +49,7 @@ images:
tag_python_version: &TAG_PYTHON_VERSION py310
os_version: &OS_VERSION ubuntu20.04
tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
latest_release_tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION,
"-sagemaker" ]
latest_release_tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ]
target: sagemaker
enable_test_promotion: true
Expand All @@ -60,12 +65,9 @@ images:
tag_python_version: &TAG_PYTHON_VERSION py310
cuda_version: &CUDA_VERSION cu122
os_version: &OS_VERSION ubuntu20.04
tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION,
"-sagemaker" ]
latest_release_tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION,
"-", *OS_VERSION, "-sagemaker" ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile.,
*DEVICE_TYPE ]
tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
latest_release_tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ]
target: sagemaker
enable_test_promotion: true
context:
Expand Down
Loading

0 comments on commit 7c9758f

Please sign in to comment.