Skip to content

Commit 0fb16e3

Browse files
✨feat(source-microsoft-sharepoint): Provide ability to sync other sites than Main sharepoint site (#54658)
1 parent b517356 commit 0fb16e3

File tree

8 files changed

+74
-11
lines changed

8 files changed

+74
-11
lines changed

airbyte-integrations/connectors/source-microsoft-sharepoint/integration_tests/spec.json

+7
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,13 @@
512512
"default": ".",
513513
"order": 4,
514514
"type": "string"
515+
},
516+
"site_url": {
517+
"title": "Site URL",
518+
"description": "Url of SharePoint site to search for files. Leave empty to search in the main site.",
519+
"default": "",
520+
"order": 5,
521+
"type": "string"
515522
}
516523
},
517524
"required": ["streams", "credentials"]

airbyte-integrations/connectors/source-microsoft-sharepoint/metadata.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ data:
2020
connectorSubtype: file
2121
connectorType: source
2222
definitionId: 59353119-f0f2-4e5a-a8ba-15d887bc34f6
23-
dockerImageTag: 0.7.2
23+
dockerImageTag: 0.8.0
2424
dockerRepository: airbyte/source-microsoft-sharepoint
2525
githubIssueLabel: source-microsoft-sharepoint
2626
icon: microsoft-sharepoint.svg

airbyte-integrations/connectors/source-microsoft-sharepoint/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
33
build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
6-
version = "0.7.2"
6+
version = "0.8.0"
77
name = "source-microsoft-sharepoint"
88
description = "Source implementation for Microsoft SharePoint."
99
authors = [ "Airbyte <[email protected]>",]

airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/source.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616

1717
class SourceMicrosoftSharePoint(FileBasedSource):
18-
SCOPES = ["offline_access", "Files.Read.All"]
18+
SCOPES = ["offline_access", "Files.Read.All", "Sites.Read.All", "Sites.Selected"]
1919

2020
def __init__(self, catalog: Optional[ConfiguredAirbyteCatalog], config: Optional[Mapping[str, Any]], state: Optional[TState]):
2121
super().__init__(

airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/spec.py

+6
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@ class Config:
111111
order=4,
112112
default=".",
113113
)
114+
site_url: str = Field(
115+
title="Site URL",
116+
description="Url of SharePoint site to search for files. Leave empty to search in the main site.",
117+
order=5,
118+
default="",
119+
)
114120

115121
@classmethod
116122
def documentation_url(cls) -> str:

airbyte-integrations/connectors/source-microsoft-sharepoint/source_microsoft_sharepoint/stream_reader.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -202,13 +202,29 @@ def _get_files_by_drive_name(self, drives, folder_path):
202202

203203
yield from self._list_directories_and_files(folder, folder_path_url)
204204

205+
def get_site_drive(self):
206+
try:
207+
if not self.config.site_url:
208+
# get main site drives
209+
drives = execute_query_with_retry(self.one_drive_client.drives.get())
210+
else:
211+
# get drives for site drives provided in the config
212+
drives = execute_query_with_retry(self.one_drive_client.sites.get_by_url(self.config.site_url).drives.get())
213+
214+
return drives
215+
except Exception as ex:
216+
site = self.config.site_url if self.config.site_url else "default"
217+
raise AirbyteTracedException(
218+
f"Failed to retrieve drives from sharepoint {site} site. Error: {str(ex)}", failure_type=FailureType.config_error
219+
)
220+
205221
@property
206222
@lru_cache(maxsize=None)
207223
def drives(self):
208224
"""
209225
Retrieves and caches SharePoint drives, including the user's drive based on authentication type.
210226
"""
211-
drives = execute_query_with_retry(self.one_drive_client.drives.get())
227+
drives = self.get_site_drive()
212228

213229
# skip this step for application authentication flow
214230
if self.config.credentials.auth_type != "Client" or (
@@ -299,7 +315,7 @@ def open_file(self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str
299315
def _get_file_transfer_paths(self, file: RemoteFile, local_directory: str) -> List[str]:
300316
preserve_directory_structure = self.preserve_directory_structure()
301317
file_path = file.uri
302-
match = re.search(r"sharepoint\.com/Shared%20Documents(.*)", file_path)
318+
match = re.search(r"sharepoint\.com(?:/sites/[^/]+)?/Shared%20Documents(.*)", file_path)
303319
if match:
304320
file_path = match.group(1)
305321

airbyte-integrations/connectors/source-microsoft-sharepoint/unit_tests/test_stream_reader.py

+34-6
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def setup_reader_class():
3939
config.start_date = None
4040
config.credentials = Mock()
4141
config.folder_path = "."
42+
config.site_url = ""
4243
config.credentials.auth_type = "Client"
4344
config.search_scope = "ALL"
4445
reader.config = config # Set up the necessary configuration
@@ -193,17 +194,44 @@ def test_open_file(mock_smart_open, file_extension, expected_compression):
193194

194195

195196
@pytest.mark.parametrize(
196-
"file_extension, expected_paths",
197+
"file_uri, file_extension, expected_paths",
197198
[
198-
("txt.gz", {"bytes": ANY, "file_relative_path": "file.txt.gz", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt.gz"}),
199-
("txt.bz2", {"bytes": ANY, "file_relative_path": "file.txt.bz2", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt.bz2"}),
200-
("txt", {"bytes": ANY, "file_relative_path": "file.txt", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt"}),
199+
(
200+
"https://my_favorite_sharepoint.sharepoint.com/Shared%20Documents/file",
201+
"txt.gz",
202+
{"bytes": ANY, "file_relative_path": "file.txt.gz", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt.gz"},
203+
),
204+
(
205+
"https://my_favorite_sharepoint.sharepoint.com/Shared%20Documents/file",
206+
"txt.bz2",
207+
{"bytes": ANY, "file_relative_path": "file.txt.bz2", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt.bz2"},
208+
),
209+
(
210+
"https://my_favorite_sharepoint.sharepoint.com/Shared%20Documents/file",
211+
"txt",
212+
{"bytes": ANY, "file_relative_path": "file.txt", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt"},
213+
),
214+
(
215+
"https://my_favorite_sharepoint.sharepoint.com/sites/NOT_DEFAULT_SITE/Shared%20Documents/file",
216+
"txt.gz",
217+
{"bytes": ANY, "file_relative_path": "file.txt.gz", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt.gz"},
218+
),
219+
(
220+
"https://my_favorite_sharepoint.sharepoint.com/sites/NOT_DEFAULT_SITE/Shared%20Documents/file",
221+
"txt.bz2",
222+
{"bytes": ANY, "file_relative_path": "file.txt.bz2", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt.bz2"},
223+
),
224+
(
225+
"https://my_favorite_sharepoint.sharepoint.com/sites/NOT_DEFAULT_SITE/Shared%20Documents/file",
226+
"txt",
227+
{"bytes": ANY, "file_relative_path": "file.txt", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt"},
228+
),
201229
],
202230
)
203231
@patch("source_microsoft_sharepoint.stream_reader.SourceMicrosoftSharePointStreamReader.get_access_token")
204232
@patch("source_microsoft_sharepoint.stream_reader.requests.get")
205233
@patch("source_microsoft_sharepoint.stream_reader.requests.head")
206-
def test_get_file(mock_requests_head, mock_requests_get, mock_get_access_token, file_extension, expected_paths):
234+
def test_get_file(mock_requests_head, mock_requests_get, mock_get_access_token, file_uri, file_extension, expected_paths):
207235
"""
208236
Test the get_file method in SourceMicrosoftSharePointStreamReader.
209237
@@ -218,7 +246,7 @@ def test_get_file(mock_requests_head, mock_requests_get, mock_get_access_token,
218246
file_extension (str): The file extension to test (e.g., 'txt.gz').
219247
expected_paths (dict): The expected paths and file size in the result.
220248
"""
221-
file_uri = f"https://my_favorite_sharepoint.sharepoint.com/Shared%20Documents/file.{file_extension}"
249+
file_uri = f"{file_uri}.{file_extension}"
222250
mock_file = Mock(download_url=f"https://example.com/file.{file_extension}", uri=file_uri)
223251
mock_logger = Mock()
224252
mock_get_access_token.return_value = "dummy_access_token"

docs/integrations/sources/microsoft-sharepoint.md

+6
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,11 @@ Format options will not be taken into account. Instead, files will be transferre
267267

268268
If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled.
269269

270+
### Multi-Site Support
271+
272+
By providing a url to the site URL field, the connector will be able to access the files in the specific sharepoint site.
273+
The site url should be in the format `https://<tenan_name>.sharepoint.com/sites/<site>`. If no field is provided, the connector will access the files in the main site.
274+
270275
### Supported sync modes
271276

272277
The Microsoft SharePoint source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes):
@@ -300,6 +305,7 @@ The connector is restricted by normal Microsoft Graph [requests limitation](http
300305

301306
| Version | Date | Pull Request | Subject |
302307
|:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------------------------------|
308+
| 0.8.0 | 2025-03-12 | [54658](https://github.com/airbytehq/airbyte/pull/54658) | Provide ability to sync other sites than Main sharepoint site |
303309
| 0.7.2 | 2025-03-08 | [55427](https://github.com/airbytehq/airbyte/pull/55427) | Update dependencies |
304310
| 0.7.1 | 2025-03-01 | [54749](https://github.com/airbytehq/airbyte/pull/54749) | Update dependencies |
305311
| 0.7.0 | 2025-02-27 | [54200](https://github.com/airbytehq/airbyte/pull/54200) | Add advanced Oauth |

0 commit comments

Comments
 (0)