Skip to content

Commit 92e493c

Browse files
beniericjmahlik
andauthored
fix: remove deprecated distutils (#4837)
* fix: remove deprecated distutils imports by local mode * fix: remove deprecated distutils api calls in workflows Also remove from tests. Closes #4534 * fix pytorch * use latest py_version * fix format * use latest default sklearn * unit test * unit test --------- Co-authored-by: Justin Mahlik <[email protected]>
1 parent ec89f7d commit 92e493c

File tree

11 files changed

+26
-34
lines changed

11 files changed

+26
-34
lines changed

.pylintrc

+1-1
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ ignore-mixin-members=yes
310310
# (useful for modules/projects where namespaces are manipulated during runtime
311311
# and thus existing member attributes cannot be deduced by static analysis. It
312312
# supports qualified module names, as well as Unix pattern matching.
313-
ignored-modules=distutils
313+
ignored-modules=
314314

315315
# List of class names for which member attributes should not be checked (useful
316316
# for classes with dynamically set attributes). This supports the use of

src/sagemaker/local/image.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
import tarfile
3131
import tempfile
3232

33-
from distutils.spawn import find_executable
3433
from threading import Thread
3534
from typing import Dict, List
3635
from six.moves.urllib.parse import urlparse
@@ -170,7 +169,7 @@ def _get_compose_cmd_prefix():
170169
compose_cmd_prefix.extend(["docker", "compose"])
171170
return compose_cmd_prefix
172171

173-
if find_executable("docker-compose") is not None:
172+
if shutil.which("docker-compose") is not None:
174173
logger.info("'Docker Compose' found using Docker Compose CLI.")
175174
compose_cmd_prefix.extend(["docker-compose"])
176175
return compose_cmd_prefix

src/sagemaker/local/utils.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import re
2222
import errno
2323

24-
from distutils.dir_util import copy_tree
2524
from six.moves.urllib.parse import urlparse
2625

2726
from sagemaker import s3
@@ -102,7 +101,7 @@ def move_to_destination(source, destination, job_name, sagemaker_session, prefix
102101

103102

104103
def recursive_copy(source, destination):
105-
"""A wrapper around distutils.dir_util.copy_tree.
104+
"""A wrapper around shutil.copy_tree.
106105
107106
This won't throw any exception when the source directory does not exist.
108107
@@ -111,7 +110,7 @@ def recursive_copy(source, destination):
111110
destination (str): destination path
112111
"""
113112
if os.path.isdir(source):
114-
copy_tree(source, destination)
113+
shutil.copytree(source, destination, dirs_exist_ok=True)
115114

116115

117116
def kill_child_processes(pid):

src/sagemaker/workflow/_repack_model.py

+1-8
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,6 @@
2727
# is unpacked for inference, the custom entry point will be used.
2828
# Reference: https://docs.aws.amazon.com/sagemaker/latest/dg/amazon-sagemaker-toolkits.html
2929

30-
# distutils.dir_util.copy_tree works way better than the half-baked
31-
# shutil.copytree which bombs on previously existing target dirs...
32-
# alas ... https://bugs.python.org/issue10948
33-
# we'll go ahead and use the copy_tree function anyways because this
34-
# repacking is some short-lived hackery, right??
35-
from distutils.dir_util import copy_tree
36-
3730
from os.path import abspath, realpath, dirname, normpath, join as joinpath
3831

3932
logger = logging.getLogger(__name__)
@@ -188,7 +181,7 @@ def repack(inference_script, model_archive, dependencies=None, source_dir=None):
188181

189182
# copy the "src" dir, which includes the previous training job's model and the
190183
# custom inference script, to the output of this training job
191-
copy_tree(src_dir, "/opt/ml/model")
184+
shutil.copytree(src_dir, "/opt/ml/model", dirs_exist_ok=True)
192185

193186

194187
if __name__ == "__main__": # pragma: no cover

src/sagemaker/workflow/_utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646

4747
logger = logging.getLogger(__name__)
4848

49-
FRAMEWORK_VERSION = "0.23-1"
49+
FRAMEWORK_VERSION = "1.2-1"
5050
INSTANCE_TYPE = "ml.m5.large"
5151
REPACK_SCRIPT = "_repack_model.py"
5252
REPACK_SCRIPT_LAUNCHER = "_repack_script_launcher.sh"

tests/data/_repack_model.py

+1-8
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,6 @@
2626
# is unpacked for inference, the custom entry point will be used.
2727
# Reference: https://docs.aws.amazon.com/sagemaker/latest/dg/amazon-sagemaker-toolkits.html
2828

29-
# distutils.dir_util.copy_tree works way better than the half-baked
30-
# shutil.copytree which bombs on previously existing target dirs...
31-
# alas ... https://bugs.python.org/issue10948
32-
# we'll go ahead and use the copy_tree function anyways because this
33-
# repacking is some short-lived hackery, right??
34-
from distutils.dir_util import copy_tree
35-
3629

3730
def repack(inference_script, model_archive, dependencies=None, source_dir=None): # pragma: no cover
3831
"""Repack custom dependencies and code into an existing model TAR archive
@@ -92,7 +85,7 @@ def repack(inference_script, model_archive, dependencies=None, source_dir=None):
9285

9386
# copy the "src" dir, which includes the previous training job's model and the
9487
# custom inference script, to the output of this training job
95-
copy_tree(src_dir, "/opt/ml/model")
88+
shutil.copytree(src_dir, "/opt/ml/model", dirs_exist_ok=True)
9689

9790

9891
if __name__ == "__main__": # pragma: no cover

tests/integ/sagemaker/workflow/test_retry.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,8 @@ def test_model_registration_with_model_repack(
148148
role,
149149
pipeline_name,
150150
region_name,
151+
pytorch_training_latest_version,
152+
pytorch_training_latest_py_version,
151153
):
152154
base_dir = os.path.join(DATA_DIR, "pytorch_mnist")
153155
entry_point = os.path.join(base_dir, "mnist.py")
@@ -166,8 +168,8 @@ def test_model_registration_with_model_repack(
166168
pytorch_estimator = PyTorch(
167169
entry_point=entry_point,
168170
role=role,
169-
framework_version="1.5.0",
170-
py_version="py3",
171+
framework_version=pytorch_training_latest_version,
172+
py_version=pytorch_training_latest_py_version,
171173
instance_count=instance_count,
172174
instance_type=instance_type,
173175
sagemaker_session=pipeline_session,

tests/unit/sagemaker/local/test_local_image.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def test_get_compose_cmd_prefix_with_docker_cli():
160160
"subprocess.check_output",
161161
side_effect=subprocess.CalledProcessError(returncode=1, cmd="docker compose version"),
162162
)
163-
@patch("sagemaker.local.image.find_executable", Mock(return_value="/usr/bin/docker-compose"))
163+
@patch("sagemaker.local.image.shutil.which", Mock(return_value="/usr/bin/docker-compose"))
164164
def test_get_compose_cmd_prefix_with_docker_compose_cli(check_output):
165165
compose_cmd_prefix = _SageMakerContainer._get_compose_cmd_prefix()
166166
assert compose_cmd_prefix == ["docker-compose"]
@@ -170,7 +170,7 @@ def test_get_compose_cmd_prefix_with_docker_compose_cli(check_output):
170170
"subprocess.check_output",
171171
side_effect=subprocess.CalledProcessError(returncode=1, cmd="docker compose version"),
172172
)
173-
@patch("sagemaker.local.image.find_executable", Mock(return_value=None))
173+
@patch("sagemaker.local.image.shutil.which", Mock(return_value=None))
174174
def test_get_compose_cmd_prefix_raises_import_error(check_output):
175175
with pytest.raises(ImportError) as e:
176176
_SageMakerContainer._get_compose_cmd_prefix()

tests/unit/sagemaker/local/test_local_utils.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,11 @@ def test_move_to_destination_illegal_destination():
8585

8686

8787
@patch("sagemaker.local.utils.os.path")
88-
@patch("sagemaker.local.utils.copy_tree")
88+
@patch("sagemaker.local.utils.shutil.copytree")
8989
def test_recursive_copy(copy_tree, m_os_path):
9090
m_os_path.isdir.return_value = True
9191
sagemaker.local.utils.recursive_copy("source", "destination")
92-
copy_tree.assert_called_with("source", "destination")
92+
copy_tree.assert_called_with("source", "destination", dirs_exist_ok=True)
9393

9494

9595
@patch("sagemaker.local.utils.os")

tests/unit/sagemaker/workflow/test_step_collections.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555

5656
MODEL_NAME = "gisele"
5757
MODEL_REPACKING_IMAGE_URI = (
58-
"246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3"
58+
"246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:1.2-1-cpu-py3"
5959
)
6060

6161

@@ -1219,8 +1219,7 @@ def test_estimator_transformer_with_model_repack_with_estimator(estimator, sourc
12191219
assert arguments == {
12201220
"AlgorithmSpecification": {
12211221
"TrainingInputMode": "File",
1222-
"TrainingImage": "246618743249.dkr.ecr.us-west-2.amazonaws.com/"
1223-
+ "sagemaker-scikit-learn:0.23-1-cpu-py3",
1222+
"TrainingImage": MODEL_REPACKING_IMAGE_URI,
12241223
},
12251224
"ProfilerConfig": {"DisableProfiler": True},
12261225
"OutputDataConfig": {"S3OutputPath": "s3://my-bucket/"},

tests/unit/test_chainer.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import logging
1616
import json
1717
import os
18-
from distutils.util import strtobool
1918

2019
import pytest
2120
from mock import MagicMock, Mock, ANY
@@ -174,7 +173,15 @@ def test_additional_hyperparameters(sagemaker_session, chainer_version, chainer_
174173
framework_version=chainer_version,
175174
py_version=chainer_py_version,
176175
)
177-
assert bool(strtobool(chainer.hyperparameters()["sagemaker_use_mpi"]))
176+
177+
assert chainer.hyperparameters()["sagemaker_use_mpi"].lower() in (
178+
"y",
179+
"yes",
180+
"t",
181+
"true",
182+
"on",
183+
"1",
184+
)
178185
assert int(chainer.hyperparameters()["sagemaker_num_processes"]) == 4
179186
assert int(chainer.hyperparameters()["sagemaker_process_slots_per_host"]) == 10
180187
assert (

0 commit comments

Comments
 (0)