51
51
BooleanTypeClass ,
52
52
BytesTypeClass ,
53
53
DataPlatformInstanceClass ,
54
+ DatasetLineageTypeClass ,
54
55
DatasetProfileClass ,
55
56
DatasetPropertiesClass ,
56
57
DateTypeClass ,
69
70
StringTypeClass ,
70
71
SubTypesClass ,
71
72
TagAssociationClass ,
73
+ UpstreamClass ,
74
+ UpstreamLineageClass ,
72
75
)
73
76
from datahub .utilities import config_clean
74
77
from datahub .utilities .lossy_collections import LossyList
@@ -151,7 +154,8 @@ class SalesforceConfig(
151
154
description = "Regex patterns for profiles to filter in ingestion, allowed by the `object_pattern`." ,
152
155
)
153
156
154
- set_referenced_entities_as_upstream : bool = Field (
157
+ # Given lack of ERD visual graph view support, this alternate is useful.
158
+ use_referenced_entities_as_upstreams : bool = Field (
155
159
default = False ,
156
160
description = "If enabled, referenced entities will be treated as upstream entities." ,
157
161
)
@@ -170,6 +174,10 @@ def remove_trailing_slash(cls, v):
170
174
class SalesforceSourceReport (StaleEntityRemovalSourceReport ):
171
175
filtered : LossyList [str ] = dataclass_field (default_factory = LossyList )
172
176
177
+ objects_with_calculated_field : LossyList [str ] = dataclass_field (
178
+ default_factory = LossyList
179
+ )
180
+
173
181
def report_dropped (self , ent_name : str ) -> None :
174
182
self .filtered .append (ent_name )
175
183
@@ -343,7 +351,7 @@ def get_custom_object_details(self, sObjectDeveloperName: str) -> dict:
343
351
customObject = custom_objects_response ["records" ][0 ]
344
352
return customObject
345
353
346
- def get_fields_for_object (self , sObjectName , sObject ) :
354
+ def get_fields_for_object (self , sObjectName : str , sObjectDurableId : str ) -> list :
347
355
sObject_fields_query_url = (
348
356
self .base_url
349
357
+ "tooling/query?q=SELECT "
@@ -353,7 +361,7 @@ def get_fields_for_object(self, sObjectName, sObject):
353
361
+ "IsCompound, IsComponent, ReferenceTo, FieldDefinition.ComplianceGroup,"
354
362
+ "RelationshipName, IsNillable, FieldDefinition.Description, InlineHelpText, "
355
363
+ "IsCalculated FROM EntityParticle WHERE EntityDefinitionId='{}'" .format (
356
- sObject [ "DurableId" ]
364
+ sObjectDurableId
357
365
)
358
366
)
359
367
@@ -532,10 +540,33 @@ def get_salesforce_object_workunits(
532
540
533
541
yield self .get_properties_workunit (sObject , customObject , datasetUrn )
534
542
543
+ allFields = self .sf_api .get_fields_for_object (sObjectName , sObject ["DurableId" ])
544
+
545
+ customFields = self .sf_api .get_custom_fields_for_object (
546
+ sObjectName , sObject ["DurableId" ]
547
+ )
548
+
549
+ if any (field ["IsCalculated" ] for field in allFields ):
550
+ # Although formula is present in Metadata column of CustomField entity,
551
+ # we can not use it as it allows querying only for one field at a time
552
+ # and that would not be performant
553
+ self .report .objects_with_calculated_field .append (sObjectName )
554
+ calculated_field_formulae = self .get_calculated_field_formulae (sObjectName )
555
+ else :
556
+ calculated_field_formulae = {}
557
+
535
558
yield from self .get_schema_metadata_workunit (
536
- sObjectName , sObject , customObject , datasetUrn
559
+ sObjectName ,
560
+ allFields ,
561
+ customFields ,
562
+ customObject ,
563
+ datasetUrn ,
564
+ calculated_field_formulae ,
537
565
)
538
566
567
+ if self .config .use_referenced_entities_as_upstreams :
568
+ yield from self .get_upstream_workunit (datasetUrn , allFields )
569
+
539
570
yield self .get_subtypes_workunit (sObjectName , datasetUrn )
540
571
541
572
if self .config .platform_instance is not None :
@@ -549,6 +580,30 @@ def get_salesforce_object_workunits(
549
580
):
550
581
yield from self .get_profile_workunit (sObjectName , datasetUrn )
551
582
583
+ def get_upstream_workunit (
584
+ self , datasetUrn : str , allFields : List [dict ]
585
+ ) -> Iterable [MetadataWorkUnit ]:
586
+ upstreams : List [UpstreamClass ] = []
587
+ for field in allFields :
588
+ if field ["DataType" ] == "reference" and field ["ReferenceTo" ]["referenceTo" ]:
589
+ for referenced_sObjectName in field ["ReferenceTo" ]["referenceTo" ]:
590
+ upstreams .append (
591
+ UpstreamClass (
592
+ dataset = builder .make_dataset_urn_with_platform_instance (
593
+ self .platform ,
594
+ referenced_sObjectName ,
595
+ self .config .platform_instance ,
596
+ self .config .env ,
597
+ ),
598
+ type = DatasetLineageTypeClass .TRANSFORMED ,
599
+ )
600
+ )
601
+
602
+ if upstreams :
603
+ yield MetadataChangeProposalWrapper (
604
+ entityUrn = datasetUrn , aspect = UpstreamLineageClass (upstreams = upstreams )
605
+ ).as_workunit ()
606
+
552
607
def get_domain_workunit (
553
608
self , dataset_name : str , datasetUrn : str
554
609
) -> Iterable [MetadataWorkUnit ]:
@@ -806,37 +861,39 @@ def get_audit_stamp(self, date: str, username: str) -> AuditStampClass:
806
861
actor = builder .make_user_urn (username ),
807
862
)
808
863
809
- def get_field_formulae (self , describe_object_result : dict ) -> Dict [str , str ]:
864
+ def get_calculated_field_formulae (self , sObjectName : str ) -> Dict [str , str ]:
810
865
# extract field wise formula and return response
811
- calculated_fields = {}
812
- for field in describe_object_result ["fields" ]:
813
- if field ["calculatedFormula" ]:
814
- calculated_fields [field ["name" ]] = field ["calculatedFormula" ]
866
+ # Includes entries for calculated fields only
815
867
868
+ calculated_fields = {}
869
+ try :
870
+ describe_object_result = self .sf_api .describe_object (sObjectName )
871
+ for field in describe_object_result ["fields" ]:
872
+ if field ["calculatedFormula" ]:
873
+ calculated_fields [field ["name" ]] = field ["calculatedFormula" ]
874
+ except Exception as e :
875
+ self .report .warning (
876
+ message = "Failed to get calculated field formulae" ,
877
+ context = sObjectName ,
878
+ exc = e ,
879
+ )
816
880
return calculated_fields
817
881
818
882
def get_schema_metadata_workunit (
819
883
self ,
820
884
sObjectName : str ,
821
- sObject : dict ,
885
+ all_fields : List [dict ],
886
+ custom_fields : dict ,
822
887
customObject : dict ,
823
888
datasetUrn : str ,
889
+ calculated_field_formulae : Dict [str , str ],
824
890
) -> Iterable [MetadataWorkUnit ]:
825
- all_fields = self .sf_api .get_fields_for_object (sObjectName , sObject )
826
-
827
- customFields = self .sf_api .get_custom_fields_for_object (
828
- sObjectName , sObject ["DurableId" ]
829
- )
830
-
831
- describe_object_result = self .sf_api .describe_object (sObjectName )
832
- calculated_fields = self .get_field_formulae (describe_object_result )
833
-
834
891
fields : List [SchemaFieldClass ] = []
835
892
primaryKeys : List [str ] = []
836
893
foreignKeys : List [ForeignKeyConstraintClass ] = []
837
894
838
895
for field in all_fields :
839
- customField = customFields .get (field ["DeveloperName" ], {})
896
+ customField = custom_fields .get (field ["DeveloperName" ], {})
840
897
841
898
fieldName = field ["QualifiedApiName" ]
842
899
fieldType = field ["DataType" ]
@@ -851,7 +908,7 @@ def get_schema_metadata_workunit(
851
908
fieldType ,
852
909
field ,
853
910
customField ,
854
- calculated_fields .get (fieldName ),
911
+ calculated_field_formulae .get (fieldName ),
855
912
)
856
913
fields .append (schemaField )
857
914
0 commit comments