1
1
import logging
2
- from typing import Dict , List , Optional
2
+ from typing import Dict , Iterable , List , Optional
3
3
4
+ from datahub .emitter .mce_builder import get_sys_time
5
+ from datahub .emitter .mcp import MetadataChangeProposalWrapper
6
+ from datahub .ingestion .api .workunit import MetadataWorkUnit
4
7
from datahub .ingestion .source .snowflake .constants import SnowflakeObjectDomain
5
8
from datahub .ingestion .source .snowflake .snowflake_config import (
6
9
SnowflakeV2Config ,
12
15
SnowflakeTag ,
13
16
_SnowflakeTagCache ,
14
17
)
15
- from datahub .ingestion .source .snowflake .snowflake_utils import SnowflakeCommonMixin
18
+ from datahub .ingestion .source .snowflake .snowflake_utils import (
19
+ SnowflakeCommonMixin ,
20
+ SnowflakeIdentifierBuilder ,
21
+ )
22
+ from datahub .metadata ._urns .urn_defs import (
23
+ ContainerUrn ,
24
+ DatasetUrn ,
25
+ DataTypeUrn ,
26
+ EntityTypeUrn ,
27
+ SchemaFieldUrn ,
28
+ StructuredPropertyUrn ,
29
+ )
30
+ from datahub .metadata .com .linkedin .pegasus2avro .common import AuditStamp
31
+ from datahub .metadata .com .linkedin .pegasus2avro .structured import (
32
+ StructuredPropertyDefinition ,
33
+ )
16
34
17
35
logger : logging .Logger = logging .getLogger (__name__ )
18
36
@@ -23,11 +41,12 @@ def __init__(
23
41
config : SnowflakeV2Config ,
24
42
data_dictionary : SnowflakeDataDictionary ,
25
43
report : SnowflakeV2Report ,
44
+ snowflake_identifiers : SnowflakeIdentifierBuilder ,
26
45
) -> None :
27
46
self .config = config
28
47
self .data_dictionary = data_dictionary
29
48
self .report = report
30
-
49
+ self . snowflake_identifiers = snowflake_identifiers
31
50
self .tag_cache : Dict [str , _SnowflakeTagCache ] = {}
32
51
33
52
def _get_tags_on_object_without_propagation (
@@ -59,6 +78,45 @@ def _get_tags_on_object_without_propagation(
59
78
raise ValueError (f"Unknown domain { domain } " )
60
79
return tags
61
80
81
+ def create_structured_property_templates (self ) -> Iterable [MetadataWorkUnit ]:
82
+ for tag in self .data_dictionary .get_all_tags ():
83
+ if not self .config .tag_pattern .allowed (tag .tag_identifier ()):
84
+ continue
85
+ if not self .config .database_pattern .allowed (tag .database ):
86
+ continue
87
+ if not self .config .schema_pattern .allowed (f"{ tag .database } .{ tag .schema } " ):
88
+ continue
89
+
90
+ if self .config .extract_tags_as_structured_properties :
91
+ self .report .num_structured_property_templates_created += 1
92
+ for workunit in self .gen_tag_as_structured_property_workunits (tag ):
93
+ yield workunit
94
+
95
+ def gen_tag_as_structured_property_workunits (
96
+ self , tag : SnowflakeTag
97
+ ) -> Iterable [MetadataWorkUnit ]:
98
+ identifier = self .snowflake_identifiers .snowflake_identifier (
99
+ tag .structured_property_identifier ()
100
+ )
101
+ urn = StructuredPropertyUrn (identifier ).urn ()
102
+ aspect = StructuredPropertyDefinition (
103
+ qualifiedName = identifier ,
104
+ displayName = tag .name ,
105
+ valueType = DataTypeUrn ("datahub.string" ).urn (),
106
+ entityTypes = [
107
+ EntityTypeUrn (f"datahub.{ ContainerUrn .ENTITY_TYPE } " ).urn (),
108
+ EntityTypeUrn (f"datahub.{ DatasetUrn .ENTITY_TYPE } " ).urn (),
109
+ EntityTypeUrn (f"datahub.{ SchemaFieldUrn .ENTITY_TYPE } " ).urn (),
110
+ ],
111
+ lastModified = AuditStamp (
112
+ time = get_sys_time (), actor = "urn:li:corpuser:datahub"
113
+ ),
114
+ )
115
+ yield MetadataChangeProposalWrapper (
116
+ entityUrn = urn ,
117
+ aspect = aspect ,
118
+ ).as_workunit ()
119
+
62
120
def _get_tags_on_object_with_propagation (
63
121
self ,
64
122
domain : str ,
0 commit comments