Skip to content

Commit d5ab001

Browse files
hsheth2devin-ai-integration[bot]harshal.sheth@acryl.io
authored
feat(ingest/git): add subdir support to GitReference (#12131)
Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: [email protected] <[email protected]>
1 parent 42cad3d commit d5ab001

File tree

2 files changed

+38
-5
lines changed

2 files changed

+38
-5
lines changed

metadata-ingestion/src/datahub/configuration/git.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,11 @@ class GitReference(ConfigModel):
2424
"main",
2525
description="Branch on which your files live by default. Typically main or master. This can also be a commit hash.",
2626
)
27-
27+
url_subdir: Optional[str] = Field(
28+
default=None,
29+
description="Prefix to prepend when generating URLs for files - useful when files are in a subdirectory. "
30+
"Only affects URL generation, not git operations.",
31+
)
2832
url_template: Optional[str] = Field(
2933
None,
3034
description=f"Template for generating a URL to a file in the repo e.g. '{_GITHUB_URL_TEMPLATE}'. We can infer this for GitHub and GitLab repos, and it is otherwise required."
@@ -68,6 +72,8 @@ def infer_url_template(cls, url_template: Optional[str], values: dict) -> str:
6872

6973
def get_url_for_file_path(self, file_path: str) -> str:
7074
assert self.url_template
75+
if self.url_subdir:
76+
file_path = f"{self.url_subdir}/{file_path}"
7177
return self.url_template.format(
7278
repo_url=self.repo, branch=self.branch, file_path=file_path
7379
)

metadata-ingestion/tests/integration/git/test_git_clone.py

+31-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import pathlib
23

34
import pytest
45
from pydantic import SecretStr
@@ -12,7 +13,7 @@
1213
LOOKML_TEST_SSH_KEY = os.environ.get("DATAHUB_LOOKML_GIT_TEST_SSH_KEY")
1314

1415

15-
def test_base_url_guessing():
16+
def test_base_url_guessing() -> None:
1617
# Basic GitHub repo.
1718
config = GitInfo(repo="https://github.com/datahub-project/datahub", branch="master")
1819
assert config.repo_ssh_locator == "[email protected]:datahub-project/datahub.git"
@@ -70,7 +71,7 @@ def test_base_url_guessing():
7071
)
7172

7273

73-
def test_github_branch():
74+
def test_github_branch() -> None:
7475
config = GitInfo(
7576
repo="owner/repo",
7677
)
@@ -83,11 +84,37 @@ def test_github_branch():
8384
assert config.branch_for_clone == "main"
8485

8586

87+
def test_url_subdir() -> None:
88+
git_ref = GitReference(repo="https://github.com/org/repo", url_subdir="dbt")
89+
assert (
90+
git_ref.get_url_for_file_path("model.sql")
91+
== "https://github.com/org/repo/blob/main/dbt/model.sql"
92+
)
93+
94+
git_ref = GitReference(repo="https://gitlab.com/org/repo", url_subdir="dbt")
95+
assert (
96+
git_ref.get_url_for_file_path("model.sql")
97+
== "https://gitlab.com/org/repo/-/blob/main/dbt/model.sql"
98+
)
99+
100+
git_ref = GitReference(repo="https://github.com/org/repo", url_subdir="")
101+
assert (
102+
git_ref.get_url_for_file_path("model.sql")
103+
== "https://github.com/org/repo/blob/main/model.sql"
104+
)
105+
106+
git_ref = GitReference(repo="https://github.com/org/repo", url_subdir="dbt/models")
107+
assert (
108+
git_ref.get_url_for_file_path("model.sql")
109+
== "https://github.com/org/repo/blob/main/dbt/models/model.sql"
110+
)
111+
112+
86113
def test_sanitize_repo_url() -> None:
87114
assert_doctest(datahub.ingestion.source.git.git_import)
88115

89116

90-
def test_git_clone_public(tmp_path):
117+
def test_git_clone_public(tmp_path: pathlib.Path) -> None:
91118
git_clone = GitClone(str(tmp_path))
92119
checkout_dir = git_clone.clone(
93120
ssh_key=None,
@@ -107,7 +134,7 @@ def test_git_clone_public(tmp_path):
107134
LOOKML_TEST_SSH_KEY is None,
108135
reason="DATAHUB_LOOKML_GIT_TEST_SSH_KEY env variable is not configured",
109136
)
110-
def test_git_clone_private(tmp_path):
137+
def test_git_clone_private(tmp_path: pathlib.Path) -> None:
111138
git_clone = GitClone(str(tmp_path))
112139
secret_key = SecretStr(LOOKML_TEST_SSH_KEY) if LOOKML_TEST_SSH_KEY else None
113140

0 commit comments

Comments
 (0)