1
+ import logging
2
+ import pathlib
3
+ from typing import List
4
+
1
5
import pytest
2
6
3
- from datahub .metadata .urns import (
4
- CorpUserUrn ,
5
- DashboardUrn ,
6
- DataPlatformUrn ,
7
- DatasetUrn ,
8
- Urn ,
9
- )
7
+ from datahub .metadata .urns import CorpUserUrn , DatasetUrn , Urn
10
8
from datahub .utilities .urns .error import InvalidUrnError
11
9
12
10
pytestmark = pytest .mark .filterwarnings ("ignore::DeprecationWarning" )
13
11
12
+ _CURRENT_DIR = pathlib .Path (__file__ ).parent
13
+ logger = logging .getLogger (__name__ )
14
+
14
15
15
16
def test_parse_urn () -> None :
16
17
simple_urn_str = "urn:li:dataPlatform:abc"
@@ -40,38 +41,12 @@ def test_url_encode_urn() -> None:
40
41
)
41
42
42
43
43
- def test_invalid_urn () -> None :
44
- with pytest .raises (InvalidUrnError ):
45
- Urn .from_string ("urn:li:abc" )
46
-
47
- with pytest .raises (InvalidUrnError ):
48
- Urn .from_string ("urn:li:abc:" )
49
-
50
- with pytest .raises (InvalidUrnError ):
51
- Urn .from_string ("urn:li:abc:()" )
52
-
53
- with pytest .raises (InvalidUrnError ):
54
- Urn .from_string ("urn:li:abc:(abc,)" )
55
-
56
- with pytest .raises (InvalidUrnError ):
57
- Urn .from_string ("urn:li:corpuser:abc)" )
58
-
59
-
60
44
def test_urn_colon () -> None :
61
- # Colon characters are valid in urns, and should not mess up parsing.
62
-
63
- urn = Urn .from_string (
64
- "urn:li:dashboard:(looker,dashboards.thelook::customer_lookup)"
65
- )
66
- assert isinstance (urn , DashboardUrn )
67
-
68
- assert DataPlatformUrn .from_string ("urn:li:dataPlatform:abc:def" )
69
- assert DatasetUrn .from_string (
70
- "urn:li:dataset:(urn:li:dataPlatform:abc:def,table_name,PROD)"
71
- )
72
- assert Urn .
from_string (
"urn:li:corpuser:foo:[email protected] " )
45
+ # There's a bunch of other, simpler tests for special characters in the valid_urns test.
73
46
47
+ # This test ensures that the type dispatch and fields work fine here.
74
48
# I'm not sure why you'd ever want this, but technically it's a valid urn.
49
+
75
50
urn = Urn .from_string ("urn:li:corpuser::" )
76
51
assert isinstance (urn , CorpUserUrn )
77
52
assert urn .username == ":"
@@ -85,9 +60,48 @@ def test_urn_coercion() -> None:
85
60
assert urn == Urn .from_string (urn .urn ())
86
61
87
62
88
- def test_urn_type_dispatch () -> None :
63
+ def test_urn_type_dispatch_1 () -> None :
89
64
urn = Urn .from_string ("urn:li:dataset:(urn:li:dataPlatform:abc,def,PROD)" )
90
65
assert isinstance (urn , DatasetUrn )
91
66
92
67
with pytest .raises (InvalidUrnError , match = "Passed an urn of type corpuser" ):
93
68
DatasetUrn .from_string ("urn:li:corpuser:foo" )
69
+
70
+
71
+ def test_urn_type_dispatch_2 () -> None :
72
+ urn = "urn:li:dataJob:(urn:li:dataFlow:(airflow,flow_id,prod),job_id)"
73
+ assert Urn .from_string (urn ).urn () == urn
74
+
75
+ with pytest .raises (InvalidUrnError , match = "Passed an urn of type dataJob" ):
76
+ CorpUserUrn .from_string (urn )
77
+
78
+
79
+ def _load_urns (file_name : pathlib .Path ) -> List [str ]:
80
+ urns = [
81
+ line .strip ()
82
+ for line in file_name .read_text ().splitlines ()
83
+ if line .strip () and not line .startswith ("#" )
84
+ ]
85
+ assert len (urns ) > 0 , f"No urns found in { file_name } "
86
+ return urns
87
+
88
+
89
+ def test_valid_urns () -> None :
90
+ valid_urns_file = _CURRENT_DIR / "valid_urns.txt"
91
+ valid_urns = _load_urns (valid_urns_file )
92
+
93
+ for valid_urn in valid_urns :
94
+ logger .info (f"Testing valid URN: { valid_urn } " )
95
+ parsed_urn = Urn .from_string (valid_urn )
96
+ assert parsed_urn .urn () == valid_urn
97
+
98
+
99
+ def test_invalid_urns () -> None :
100
+ invalid_urns_file = _CURRENT_DIR / "invalid_urns.txt"
101
+ invalid_urns = _load_urns (invalid_urns_file )
102
+
103
+ # Test each invalid URN
104
+ for invalid_urn in invalid_urns :
105
+ with pytest .raises (InvalidUrnError ):
106
+ logger .info (f"Testing invalid URN: { invalid_urn } " )
107
+ Urn .from_string (invalid_urn )
0 commit comments