Skip to content

Commit 8f9659f

Browse files
authored
fix(ingest/s3): incorrectly parsing path in s3_uri (#12135)
1 parent 67cdbb0 commit 8f9659f

File tree

1 file changed

+2
-3
lines changed
  • metadata-ingestion/src/datahub/ingestion/source/s3

1 file changed

+2
-3
lines changed

metadata-ingestion/src/datahub/ingestion/source/s3/source.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from itertools import groupby
1010
from pathlib import PurePath
1111
from typing import Any, Dict, Iterable, List, Optional, Tuple
12+
from urllib.parse import urlparse
1213

1314
import smart_open.compression as so_compression
1415
from more_itertools import peekable
@@ -993,9 +994,7 @@ def s3_browser(self, path_spec: PathSpec, sample_size: int) -> Iterable[BrowsePa
993994
folders = []
994995
for dir in dirs_to_process:
995996
logger.info(f"Getting files from folder: {dir}")
996-
prefix_to_process = dir.rstrip("\\").lstrip(
997-
self.create_s3_path(bucket_name, "/")
998-
)
997+
prefix_to_process = urlparse(dir).path.lstrip("/")
999998

1000999
folders.extend(
10011000
self.get_folder_info(

0 commit comments

Comments
 (0)