Skip to content

Commit bd11bec

Browse files
authored
lfs: optimize path filtering when include contains a single path (#355)
1 parent a99a943 commit bd11bec

File tree

1 file changed

+23
-1
lines changed

1 file changed

+23
-1
lines changed

src/scmrepo/git/lfs/fetch.py

+23-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import fnmatch
22
import io
33
import os
4+
import re
45
from collections.abc import Iterable, Iterator
56
from typing import TYPE_CHECKING, Callable, Optional
67

@@ -98,14 +99,35 @@ def get_fetch_url(scm: "Git", remote: Optional[str] = None): # noqa: C901,PLR09
9899
return scm.get_remote_url(remote)
99100

100101

102+
_ROOT_PATH_PREFIX_REGEX = re.compile(r"^(?P<prefix>[^*?\[]*(?:/|$))")
103+
104+
101105
def _collect_objects(
102106
scm: "Git",
103107
rev: str,
104108
include: Optional[list[str]],
105109
exclude: Optional[list[str]],
106110
) -> Iterator[Pointer]:
107111
fs = scm.get_fs(rev)
108-
for path in _filter_paths(fs.find("/"), include, exclude):
112+
# Optimize path filtering if the `include` list contains exactly one path.
113+
# First, determine the root directory wherein to initiate the file search.
114+
# If the `include` path is a Unix filename pattern, determine the static
115+
# path prefix and set it as the root directory. Second, if the path and the
116+
# root are identical or the Unix filename pattern matches *any* (i.e., `**`)
117+
# file under the root directory, unset `include` to avoid unnecessary
118+
# filtering work.
119+
if (
120+
include
121+
and len(include) == 1
122+
and (result := _ROOT_PATH_PREFIX_REGEX.match(path := include[0]))
123+
):
124+
root = result.group("prefix")
125+
if path in {root, f'{root.rstrip("/")}/**'}:
126+
include = []
127+
else:
128+
root = "/"
129+
130+
for path in _filter_paths(fs.find(root), include, exclude):
109131
check_path = path.lstrip("/")
110132
if scm.check_attr(check_path, "filter", source=rev) == "lfs":
111133
try:

0 commit comments

Comments
 (0)