80
80
OwnerClass ,
81
81
OwnershipClass ,
82
82
OwnershipTypeClass ,
83
+ SchemaFieldDataTypeClass ,
83
84
TagAssociationClass ,
84
85
UpstreamClass ,
85
86
UpstreamLineageClass ,
@@ -738,33 +739,37 @@ def generate_virtual_dataset_lineage(
738
739
def generate_physical_dataset_lineage (
739
740
self ,
740
741
dataset_response : dict ,
741
- upstream_warehouse_platform : str ,
742
+ upstream_dataset : str ,
742
743
datasource_urn : str ,
743
744
) -> UpstreamLineageClass :
744
745
# To generate column level lineage, we can manually decode the metadata
745
746
# to produce the ColumnLineageInfo
746
747
columns = dataset_response .get ("result" , {}).get ("columns" , [])
748
+ print (f"\n \n columns are: { columns } \n \n " )
747
749
cll : List [ColumnLineageInfo ] = []
748
750
749
751
for column in columns :
752
+ print ("\n was here\n " )
750
753
cll .append (
751
754
ColumnLineageInfo (
752
755
downstream = DownstreamColumnRef (
753
756
table = None ,
754
757
column = column .get ("column_name" , "" ),
755
- column_type = column .get ("type" , "" ),
758
+ column_type = SchemaFieldDataTypeClass ( column .get ("type" ) , "" ),
756
759
native_column_type = column .get ("type" , "" ),
757
760
),
758
761
upstreams = [
759
762
ColumnRef (
760
- table = upstream_warehouse_platform ,
763
+ table = upstream_dataset ,
761
764
column = column .get ("column_name" , "" ),
762
765
)
763
766
],
764
767
logic = None ,
765
768
)
766
769
)
770
+ print (f"\n \n cll now is: { cll } \n \n " )
767
771
772
+ print (f"\n \n cll is; { cll } \n \n " )
768
773
fine_grained_lineages : List [FineGrainedLineageClass ] = []
769
774
770
775
for cll_info in cll :
@@ -786,10 +791,6 @@ def generate_physical_dataset_lineage(
786
791
)
787
792
)
788
793
789
- upstream_dataset = self .get_datasource_urn_from_id (
790
- dataset_response , upstream_warehouse_platform
791
- )
792
-
793
794
upstream_lineage = UpstreamLineageClass (
794
795
upstreams = [
795
796
UpstreamClass (
@@ -799,6 +800,7 @@ def generate_physical_dataset_lineage(
799
800
],
800
801
fineGrainedLineages = fine_grained_lineages ,
801
802
)
803
+ print (f"\n \n at the end, column level lineage is; { upstream_lineage } \n \n " )
802
804
return upstream_lineage
803
805
804
806
def construct_dataset_from_dataset_data (
@@ -841,11 +843,15 @@ def construct_dataset_from_dataset_data(
841
843
if upstream_warehouse_platform in warehouse_naming :
842
844
upstream_warehouse_platform = warehouse_naming [upstream_warehouse_platform ]
843
845
846
+ upstream_dataset = self .get_datasource_urn_from_id (
847
+ dataset_response , upstream_warehouse_platform
848
+ )
849
+
844
850
# Sometimes the field will be null instead of not existing
845
851
if sql == "null" or not sql :
846
852
tag_urn = f"urn:li:tag:{ self .platform } :physical"
847
853
upstream_lineage = self .generate_physical_dataset_lineage (
848
- dataset_response , upstream_warehouse_platform , datasource_urn
854
+ dataset_response , upstream_dataset , datasource_urn
849
855
)
850
856
else :
851
857
tag_urn = f"urn:li:tag:{ self .platform } :virtual"
@@ -856,6 +862,7 @@ def construct_dataset_from_dataset_data(
856
862
platform_instance = None ,
857
863
env = self .config .env ,
858
864
)
865
+ print (f"\n \n \n parsed query object: { parsed_query_object .column_lineage } " )
859
866
upstream_lineage = self .generate_virtual_dataset_lineage (
860
867
parsed_query_object , datasource_urn
861
868
)
0 commit comments