Skip to content

Commit 3ceaddb

Browse files
danabensDan Choi
authored and
Dan Choi
committed
feature: add SageMaker lineage, workflow and pipelines support (#448) (#461) (#479) (#485) (#504) (#508) (#513)
1 parent 5905c00 commit 3ceaddb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+9757
-40
lines changed

.dictionary

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
args
2+
arn
3+
autoscaling
4+
aws
5+
bool
6+
boolean
7+
boto
8+
botocore
9+
clienterror
10+
cloudwatch
11+
cron
12+
config
13+
dataset
14+
datasets
15+
datetime
16+
desc
17+
docstring
18+
entrypoint
19+
env
20+
iam
21+
hyperparameter
22+
hyperparameters
23+
jupyter
24+
kms
25+
kwargs
26+
neo
27+
noqa
28+
rc
29+
runtime
30+
sagemaker
31+
stdout
32+
str
33+
subdirectories
34+
subnet
35+
subnets
36+
unexpectedstatusexception
37+
uri
38+
vpc

.pylintrc

+1-1
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ spelling-dict=
292292
spelling-ignore-words=
293293

294294
# A path to a file that contains private dictionary; one word per line.
295-
spelling-private-dict-file=
295+
spelling-private-dict-file=.dictionary
296296

297297
# Tells whether to store unknown words to indicated private dictionary in
298298
# --spelling-private-dict-file option instead of raising a message.

doc/api/utility/inputs.rst

+1
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ Inputs
55
:members:
66
:undoc-members:
77
:show-inheritance:
8+
:noindex:

setup.py

+2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def read_version():
3333

3434
# Declare minimal set for installation
3535
required_packages = [
36+
"attrs",
3637
"boto3>=1.14.12",
3738
"google-pasta",
3839
"numpy>=1.9.0",
@@ -73,6 +74,7 @@ def read_version():
7374
"apache-airflow==1.10.11",
7475
"fabric>=2.0",
7576
"requests>=2.20.0, <3",
77+
"sagemaker-experiments",
7678
],
7779
)
7880

src/sagemaker/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
from sagemaker.local.local_session import LocalSession # noqa: F401
5050

5151
from sagemaker.model import Model, ModelPackage # noqa: F401
52+
from sagemaker.model_metrics import ModelMetrics, MetricsSource # noqa: F401
5253
from sagemaker.pipeline import PipelineModel # noqa: F401
5354
from sagemaker.predictor import Predictor # noqa: F401
5455
from sagemaker.processing import Processor, ScriptProcessor # noqa: F401

src/sagemaker/analytics.py

+95-31
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
1111
# ANY KIND, either express or implied. See the License for the specific
1212
# language governing permissions and limitations under the License.
13-
"""Placeholder docstring"""
13+
"""Placeholder docstring."""
1414
from __future__ import print_function, absolute_import
1515

1616
from abc import ABCMeta, abstractmethod
@@ -22,6 +22,7 @@
2222

2323
from sagemaker.session import Session
2424
from sagemaker.utils import DeferredError
25+
from sagemaker.lineage import artifact
2526

2627
logger = logging.getLogger(__name__)
2728

@@ -36,8 +37,8 @@
3637

3738

3839
class AnalyticsMetricsBase(with_metaclass(ABCMeta, object)):
39-
"""Base class for tuning job or training job analytics classes. Understands
40-
common functionality like persistence and caching.
40+
"""Base class for tuning job or training job analytics classes.
41+
Understands common functionality like persistence and caching.
4142
"""
4243

4344
def __init__(self):
@@ -52,8 +53,8 @@ def export_csv(self, filename):
5253
self.dataframe().to_csv(filename)
5354

5455
def dataframe(self, force_refresh=False):
55-
"""A pandas dataframe with lots of interesting results about this
56-
object. Created by calling SageMaker List and Describe APIs and
56+
"""A pandas dataframe with lots of interesting results about this object.
57+
Created by calling SageMaker List and Describe APIs and
5758
converting them into a convenient tabular summary.
5859
5960
Args:
@@ -71,17 +72,15 @@ def _fetch_dataframe(self):
7172
"""Sub-class must calculate the dataframe and return it."""
7273

7374
def clear_cache(self):
74-
"""Clear the object of all local caches of API methods, so that the next
75-
time any properties are accessed they will be refreshed from the
75+
"""Clear the object of all local caches of API methods.
76+
So that the next time any properties are accessed they will be refreshed from the
7677
service.
7778
"""
7879
self._dataframe = None
7980

8081

8182
class HyperparameterTuningJobAnalytics(AnalyticsMetricsBase):
82-
"""Fetch results about a hyperparameter tuning job and make them accessible
83-
for analytics.
84-
"""
83+
"""Fetch results about a hyperparameter tuning job and make them accessible for analytics."""
8584

8685
def __init__(self, hyperparameter_tuning_job_name, sagemaker_session=None):
8786
"""Initialize a ``HyperparameterTuningJobAnalytics`` instance.
@@ -104,7 +103,7 @@ def __init__(self, hyperparameter_tuning_job_name, sagemaker_session=None):
104103

105104
@property
106105
def name(self):
107-
"""Name of the HyperparameterTuningJob being analyzed"""
106+
"""Name of the HyperparameterTuningJob being analyzed."""
108107
return self._tuning_job_name
109108

110109
def __repr__(self):
@@ -156,8 +155,8 @@ def reshape(training_summary):
156155

157156
@property
158157
def tuning_ranges(self):
159-
"""A dictionary describing the ranges of all tuned hyperparameters. The
160-
keys are the names of the hyperparameter, and the values are the ranges.
158+
"""A dictionary describing the ranges of all tuned hyperparameters.
159+
The keys are the names of the hyperparameter, and the values are the ranges.
161160
162161
The output can take one of two forms:
163162
@@ -208,16 +207,15 @@ def tuning_ranges(self):
208207
}
209208

210209
def _prepare_parameter_ranges(self, parameter_ranges):
211-
"""Convert parameter ranges a dictionary using the parameter range names as the keys"""
210+
"""Convert parameter ranges a dictionary using the parameter range names as the keys."""
212211
out = {}
213212
for _, ranges in parameter_ranges.items():
214213
for param in ranges:
215214
out[param["Name"]] = param
216215
return out
217216

218217
def description(self, force_refresh=False):
219-
"""Call ``DescribeHyperParameterTuningJob`` for the hyperparameter
220-
tuning job.
218+
"""Call ``DescribeHyperParameterTuningJob`` for the hyperparameter tuning job.
221219
222220
Args:
223221
force_refresh (bool): Set to True to fetch the latest data from
@@ -236,8 +234,7 @@ def description(self, force_refresh=False):
236234
return self._tuning_job_describe_result
237235

238236
def training_job_summaries(self, force_refresh=False):
239-
"""A (paginated) list of everything from
240-
``ListTrainingJobsForTuningJob``.
237+
"""A (paginated) list of everything from ``ListTrainingJobsForTuningJob``.
241238
242239
Args:
243240
force_refresh (bool): Set to True to fetch the latest data from
@@ -270,9 +267,7 @@ def training_job_summaries(self, force_refresh=False):
270267

271268

272269
class TrainingJobAnalytics(AnalyticsMetricsBase):
273-
"""Fetch training curve data from CloudWatch Metrics for a specific training
274-
job.
275-
"""
270+
"""Fetch training curve data from CloudWatch Metrics for a specific training job."""
276271

277272
CLOUDWATCH_NAMESPACE = "/aws/sagemaker/TrainingJobs"
278273

@@ -318,7 +313,7 @@ def __init__(
318313

319314
@property
320315
def name(self):
321-
"""Name of the TrainingJob being analyzed"""
316+
"""Name of the TrainingJob being analyzed."""
322317
return self._training_job_name
323318

324319
def __repr__(self):
@@ -365,7 +360,7 @@ def _fetch_dataframe(self):
365360
return pd.DataFrame(self._data)
366361

367362
def _fetch_metric(self, metric_name):
368-
"""Fetch all the values of a named metric, and add them to _data
363+
"""Fetch all the values of a named metric, and add them to _data.
369364
370365
Args:
371366
metric_name: The metric name to fetch.
@@ -425,6 +420,75 @@ def _metric_names_for_training_job(self):
425420
return metric_names
426421

427422

423+
class ArtifactAnalytics(AnalyticsMetricsBase):
424+
"""Fetch artifact data and make them accessible for analytics."""
425+
426+
def __init__(
427+
self,
428+
sort_by=None,
429+
sort_order=None,
430+
source_uri=None,
431+
artifact_type=None,
432+
sagemaker_session=None,
433+
):
434+
"""Initialize a ``ArtifactAnalytics`` instance.
435+
436+
Args:
437+
sort_by (str, optional): The name of the resource property used to sort
438+
the set of artifacts. Currently only support for sort by Name
439+
sort_order(str optional): How trial components are ordered, valid values are Ascending
440+
and Descending. The default is Descending.
441+
source_uri(dict optional): The artifact source uri for filtering.
442+
artifact_type(dict optional): The artifact type for filtering.
443+
sagemaker_session (obj, optional): Sagemaker session. Defaults to None.
444+
"""
445+
self._sort_by = sort_by if sort_by == "Name" else None
446+
self._sort_order = sort_order
447+
self._source_uri = source_uri
448+
self._artifact_type = artifact_type
449+
self._sagemaker_session = sagemaker_session
450+
super(ArtifactAnalytics, self).__init__()
451+
self.clear_cache()
452+
453+
def __repr__(self):
454+
"""Human-readable representation override."""
455+
return "<sagemaker.ArtifactAnalytics>"
456+
457+
def _reshape_source_type(self, artifact_source_types):
458+
"""Reshape artifact source type."""
459+
out = OrderedDict()
460+
for artifact_source_type in artifact_source_types:
461+
out["ArtifactSourceType"] = artifact_source_type
462+
return out
463+
464+
def _reshape(self, artifact_summary):
465+
"""Reshape artifact summary."""
466+
out = OrderedDict()
467+
out["ArtifactName"] = artifact_summary.artifact_name
468+
out["ArtifactArn"] = artifact_summary.artifact_arn
469+
out["ArtifactType"] = artifact_summary.artifact_type
470+
out["ArtifactSourceUri"] = artifact_summary.source.source_uri
471+
out["CreationTime"] = artifact_summary.creation_time
472+
out["LastModifiedTime"] = artifact_summary.last_modified_time
473+
return out
474+
475+
def _fetch_dataframe(self):
476+
"""Return a pandas dataframe with all artifacts."""
477+
df = pd.DataFrame([self._reshape(artifact) for artifact in self._get_list_artifacts()])
478+
return df
479+
480+
def _get_list_artifacts(self):
481+
"""List artifacts."""
482+
artifacts = artifact.Artifact.list(
483+
source_uri=self._source_uri,
484+
artifact_type=self._artifact_type,
485+
sort_by=self._sort_by,
486+
sort_order=self._sort_order,
487+
sagemaker_session=self._sagemaker_session,
488+
)
489+
return artifacts
490+
491+
428492
class ExperimentAnalytics(AnalyticsMetricsBase):
429493
"""Fetch trial component data and make them accessible for analytics."""
430494

@@ -486,7 +550,7 @@ def __init__(
486550

487551
@property
488552
def name(self):
489-
"""Name of the Experiment being analyzed"""
553+
"""Name of the Experiment being analyzed."""
490554
return self._experiment_name
491555

492556
def __repr__(self):
@@ -499,7 +563,7 @@ def clear_cache(self):
499563
self._trial_components = None
500564

501565
def _reshape_parameters(self, parameters):
502-
"""Reshape trial component parameters to a pandas column
566+
"""Reshape trial component parameters to a pandas column.
503567
Args:
504568
parameters: trial component parameters
505569
Returns:
@@ -513,7 +577,7 @@ def _reshape_parameters(self, parameters):
513577
return out
514578

515579
def _reshape_metrics(self, metrics):
516-
"""Reshape trial component metrics to a pandas column
580+
"""Reshape trial component metrics to a pandas column.
517581
Args:
518582
metrics: trial component metrics
519583
Returns:
@@ -533,7 +597,7 @@ def _reshape_metrics(self, metrics):
533597
return out
534598

535599
def _reshape_artifacts(self, artifacts, _artifact_names):
536-
"""Reshape trial component input/output artifacts to a pandas column
600+
"""Reshape trial component input/output artifacts to a pandas column.
537601
Args:
538602
artifacts: trial component input/output artifacts
539603
Returns:
@@ -548,7 +612,8 @@ def _reshape_artifacts(self, artifacts, _artifact_names):
548612
return out
549613

550614
def _reshape_parents(self, parents):
551-
"""Reshape trial component parents to a pandas column
615+
"""Reshape trial component parents to a pandas column.
616+
552617
Args:
553618
parents: trial component parents (trials and experiments)
554619
Returns:
@@ -565,7 +630,7 @@ def _reshape_parents(self, parents):
565630
return out
566631

567632
def _reshape(self, trial_component):
568-
"""Reshape trial component data to pandas columns
633+
"""Reshape trial component data to pandas columns.
569634
Args:
570635
trial_component: dict representing a trial component
571636
Returns:
@@ -633,8 +698,7 @@ def _get_trial_components(self, force_refresh=False):
633698
return self._search(self._search_expression, self._sort_by, self._sort_order)
634699

635700
def _search(self, search_expression, sort_by, sort_order):
636-
"""
637-
Perform a search query using SageMaker Search and return the matching trial components
701+
"""Perform a search query using SageMaker Search and return the matching trial components.
638702
639703
Args:
640704
search_expression: Search expression to filter trial components.

src/sagemaker/apiutils/.pydocstylerc

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[pydocstyle]
2+
inherit = false

src/sagemaker/apiutils/__init__.py

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
"""Utilities for providing a more pythonic style wrapper of boto."""

0 commit comments

Comments
 (0)