10
10
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11
11
# ANY KIND, either express or implied. See the License for the specific
12
12
# language governing permissions and limitations under the License.
13
- """Placeholder docstring"""
13
+ """Placeholder docstring. """
14
14
from __future__ import print_function , absolute_import
15
15
16
16
from abc import ABCMeta , abstractmethod
22
22
23
23
from sagemaker .session import Session
24
24
from sagemaker .utils import DeferredError
25
+ from sagemaker .lineage import artifact
25
26
26
27
logger = logging .getLogger (__name__ )
27
28
36
37
37
38
38
39
class AnalyticsMetricsBase (with_metaclass (ABCMeta , object )):
39
- """Base class for tuning job or training job analytics classes. Understands
40
- common functionality like persistence and caching.
40
+ """Base class for tuning job or training job analytics classes.
41
+ Understands common functionality like persistence and caching.
41
42
"""
42
43
43
44
def __init__ (self ):
@@ -52,8 +53,8 @@ def export_csv(self, filename):
52
53
self .dataframe ().to_csv (filename )
53
54
54
55
def dataframe (self , force_refresh = False ):
55
- """A pandas dataframe with lots of interesting results about this
56
- object. Created by calling SageMaker List and Describe APIs and
56
+ """A pandas dataframe with lots of interesting results about this object.
57
+ Created by calling SageMaker List and Describe APIs and
57
58
converting them into a convenient tabular summary.
58
59
59
60
Args:
@@ -71,17 +72,15 @@ def _fetch_dataframe(self):
71
72
"""Sub-class must calculate the dataframe and return it."""
72
73
73
74
def clear_cache (self ):
74
- """Clear the object of all local caches of API methods, so that the next
75
- time any properties are accessed they will be refreshed from the
75
+ """Clear the object of all local caches of API methods.
76
+ So that the next time any properties are accessed they will be refreshed from the
76
77
service.
77
78
"""
78
79
self ._dataframe = None
79
80
80
81
81
82
class HyperparameterTuningJobAnalytics (AnalyticsMetricsBase ):
82
- """Fetch results about a hyperparameter tuning job and make them accessible
83
- for analytics.
84
- """
83
+ """Fetch results about a hyperparameter tuning job and make them accessible for analytics."""
85
84
86
85
def __init__ (self , hyperparameter_tuning_job_name , sagemaker_session = None ):
87
86
"""Initialize a ``HyperparameterTuningJobAnalytics`` instance.
@@ -104,7 +103,7 @@ def __init__(self, hyperparameter_tuning_job_name, sagemaker_session=None):
104
103
105
104
@property
106
105
def name (self ):
107
- """Name of the HyperparameterTuningJob being analyzed"""
106
+ """Name of the HyperparameterTuningJob being analyzed. """
108
107
return self ._tuning_job_name
109
108
110
109
def __repr__ (self ):
@@ -156,8 +155,8 @@ def reshape(training_summary):
156
155
157
156
@property
158
157
def tuning_ranges (self ):
159
- """A dictionary describing the ranges of all tuned hyperparameters. The
160
- keys are the names of the hyperparameter, and the values are the ranges.
158
+ """A dictionary describing the ranges of all tuned hyperparameters.
159
+ The keys are the names of the hyperparameter, and the values are the ranges.
161
160
162
161
The output can take one of two forms:
163
162
@@ -208,16 +207,15 @@ def tuning_ranges(self):
208
207
}
209
208
210
209
def _prepare_parameter_ranges (self , parameter_ranges ):
211
- """Convert parameter ranges a dictionary using the parameter range names as the keys"""
210
+ """Convert parameter ranges a dictionary using the parameter range names as the keys. """
212
211
out = {}
213
212
for _ , ranges in parameter_ranges .items ():
214
213
for param in ranges :
215
214
out [param ["Name" ]] = param
216
215
return out
217
216
218
217
def description (self , force_refresh = False ):
219
- """Call ``DescribeHyperParameterTuningJob`` for the hyperparameter
220
- tuning job.
218
+ """Call ``DescribeHyperParameterTuningJob`` for the hyperparameter tuning job.
221
219
222
220
Args:
223
221
force_refresh (bool): Set to True to fetch the latest data from
@@ -236,8 +234,7 @@ def description(self, force_refresh=False):
236
234
return self ._tuning_job_describe_result
237
235
238
236
def training_job_summaries (self , force_refresh = False ):
239
- """A (paginated) list of everything from
240
- ``ListTrainingJobsForTuningJob``.
237
+ """A (paginated) list of everything from ``ListTrainingJobsForTuningJob``.
241
238
242
239
Args:
243
240
force_refresh (bool): Set to True to fetch the latest data from
@@ -270,9 +267,7 @@ def training_job_summaries(self, force_refresh=False):
270
267
271
268
272
269
class TrainingJobAnalytics (AnalyticsMetricsBase ):
273
- """Fetch training curve data from CloudWatch Metrics for a specific training
274
- job.
275
- """
270
+ """Fetch training curve data from CloudWatch Metrics for a specific training job."""
276
271
277
272
CLOUDWATCH_NAMESPACE = "/aws/sagemaker/TrainingJobs"
278
273
@@ -318,7 +313,7 @@ def __init__(
318
313
319
314
@property
320
315
def name (self ):
321
- """Name of the TrainingJob being analyzed"""
316
+ """Name of the TrainingJob being analyzed. """
322
317
return self ._training_job_name
323
318
324
319
def __repr__ (self ):
@@ -365,7 +360,7 @@ def _fetch_dataframe(self):
365
360
return pd .DataFrame (self ._data )
366
361
367
362
def _fetch_metric (self , metric_name ):
368
- """Fetch all the values of a named metric, and add them to _data
363
+ """Fetch all the values of a named metric, and add them to _data.
369
364
370
365
Args:
371
366
metric_name: The metric name to fetch.
@@ -425,6 +420,75 @@ def _metric_names_for_training_job(self):
425
420
return metric_names
426
421
427
422
423
+ class ArtifactAnalytics (AnalyticsMetricsBase ):
424
+ """Fetch artifact data and make them accessible for analytics."""
425
+
426
+ def __init__ (
427
+ self ,
428
+ sort_by = None ,
429
+ sort_order = None ,
430
+ source_uri = None ,
431
+ artifact_type = None ,
432
+ sagemaker_session = None ,
433
+ ):
434
+ """Initialize a ``ArtifactAnalytics`` instance.
435
+
436
+ Args:
437
+ sort_by (str, optional): The name of the resource property used to sort
438
+ the set of artifacts. Currently only support for sort by Name
439
+ sort_order(str optional): How trial components are ordered, valid values are Ascending
440
+ and Descending. The default is Descending.
441
+ source_uri(dict optional): The artifact source uri for filtering.
442
+ artifact_type(dict optional): The artifact type for filtering.
443
+ sagemaker_session (obj, optional): Sagemaker session. Defaults to None.
444
+ """
445
+ self ._sort_by = sort_by if sort_by == "Name" else None
446
+ self ._sort_order = sort_order
447
+ self ._source_uri = source_uri
448
+ self ._artifact_type = artifact_type
449
+ self ._sagemaker_session = sagemaker_session
450
+ super (ArtifactAnalytics , self ).__init__ ()
451
+ self .clear_cache ()
452
+
453
+ def __repr__ (self ):
454
+ """Human-readable representation override."""
455
+ return "<sagemaker.ArtifactAnalytics>"
456
+
457
+ def _reshape_source_type (self , artifact_source_types ):
458
+ """Reshape artifact source type."""
459
+ out = OrderedDict ()
460
+ for artifact_source_type in artifact_source_types :
461
+ out ["ArtifactSourceType" ] = artifact_source_type
462
+ return out
463
+
464
+ def _reshape (self , artifact_summary ):
465
+ """Reshape artifact summary."""
466
+ out = OrderedDict ()
467
+ out ["ArtifactName" ] = artifact_summary .artifact_name
468
+ out ["ArtifactArn" ] = artifact_summary .artifact_arn
469
+ out ["ArtifactType" ] = artifact_summary .artifact_type
470
+ out ["ArtifactSourceUri" ] = artifact_summary .source .source_uri
471
+ out ["CreationTime" ] = artifact_summary .creation_time
472
+ out ["LastModifiedTime" ] = artifact_summary .last_modified_time
473
+ return out
474
+
475
+ def _fetch_dataframe (self ):
476
+ """Return a pandas dataframe with all artifacts."""
477
+ df = pd .DataFrame ([self ._reshape (artifact ) for artifact in self ._get_list_artifacts ()])
478
+ return df
479
+
480
+ def _get_list_artifacts (self ):
481
+ """List artifacts."""
482
+ artifacts = artifact .Artifact .list (
483
+ source_uri = self ._source_uri ,
484
+ artifact_type = self ._artifact_type ,
485
+ sort_by = self ._sort_by ,
486
+ sort_order = self ._sort_order ,
487
+ sagemaker_session = self ._sagemaker_session ,
488
+ )
489
+ return artifacts
490
+
491
+
428
492
class ExperimentAnalytics (AnalyticsMetricsBase ):
429
493
"""Fetch trial component data and make them accessible for analytics."""
430
494
@@ -486,7 +550,7 @@ def __init__(
486
550
487
551
@property
488
552
def name (self ):
489
- """Name of the Experiment being analyzed"""
553
+ """Name of the Experiment being analyzed. """
490
554
return self ._experiment_name
491
555
492
556
def __repr__ (self ):
@@ -499,7 +563,7 @@ def clear_cache(self):
499
563
self ._trial_components = None
500
564
501
565
def _reshape_parameters (self , parameters ):
502
- """Reshape trial component parameters to a pandas column
566
+ """Reshape trial component parameters to a pandas column.
503
567
Args:
504
568
parameters: trial component parameters
505
569
Returns:
@@ -513,7 +577,7 @@ def _reshape_parameters(self, parameters):
513
577
return out
514
578
515
579
def _reshape_metrics (self , metrics ):
516
- """Reshape trial component metrics to a pandas column
580
+ """Reshape trial component metrics to a pandas column.
517
581
Args:
518
582
metrics: trial component metrics
519
583
Returns:
@@ -533,7 +597,7 @@ def _reshape_metrics(self, metrics):
533
597
return out
534
598
535
599
def _reshape_artifacts (self , artifacts , _artifact_names ):
536
- """Reshape trial component input/output artifacts to a pandas column
600
+ """Reshape trial component input/output artifacts to a pandas column.
537
601
Args:
538
602
artifacts: trial component input/output artifacts
539
603
Returns:
@@ -548,7 +612,8 @@ def _reshape_artifacts(self, artifacts, _artifact_names):
548
612
return out
549
613
550
614
def _reshape_parents (self , parents ):
551
- """Reshape trial component parents to a pandas column
615
+ """Reshape trial component parents to a pandas column.
616
+
552
617
Args:
553
618
parents: trial component parents (trials and experiments)
554
619
Returns:
@@ -565,7 +630,7 @@ def _reshape_parents(self, parents):
565
630
return out
566
631
567
632
def _reshape (self , trial_component ):
568
- """Reshape trial component data to pandas columns
633
+ """Reshape trial component data to pandas columns.
569
634
Args:
570
635
trial_component: dict representing a trial component
571
636
Returns:
@@ -633,8 +698,7 @@ def _get_trial_components(self, force_refresh=False):
633
698
return self ._search (self ._search_expression , self ._sort_by , self ._sort_order )
634
699
635
700
def _search (self , search_expression , sort_by , sort_order ):
636
- """
637
- Perform a search query using SageMaker Search and return the matching trial components
701
+ """Perform a search query using SageMaker Search and return the matching trial components.
638
702
639
703
Args:
640
704
search_expression: Search expression to filter trial components.
0 commit comments