|
1 | 1 | import fnmatch
|
2 | 2 | import io
|
3 | 3 | import os
|
| 4 | +import re |
4 | 5 | from collections.abc import Iterable, Iterator
|
5 | 6 | from typing import TYPE_CHECKING, Callable, Optional
|
6 | 7 |
|
@@ -98,14 +99,35 @@ def get_fetch_url(scm: "Git", remote: Optional[str] = None): # noqa: C901,PLR09
|
98 | 99 | return scm.get_remote_url(remote)
|
99 | 100 |
|
100 | 101 |
|
| 102 | +_ROOT_PATH_PREFIX_REGEX = re.compile(r"^(?P<prefix>[^*?\[]*(?:/|$))") |
| 103 | + |
| 104 | + |
101 | 105 | def _collect_objects(
|
102 | 106 | scm: "Git",
|
103 | 107 | rev: str,
|
104 | 108 | include: Optional[list[str]],
|
105 | 109 | exclude: Optional[list[str]],
|
106 | 110 | ) -> Iterator[Pointer]:
|
107 | 111 | fs = scm.get_fs(rev)
|
108 |
| - for path in _filter_paths(fs.find("/"), include, exclude): |
| 112 | + # Optimize path filtering if the `include` list contains exactly one path. |
| 113 | + # First, determine the root directory wherein to initiate the file search. |
| 114 | + # If the `include` path is a Unix filename pattern, determine the static |
| 115 | + # path prefix and set it as the root directory. Second, if the path and the |
| 116 | + # root are identical or the Unix filename pattern matches *any* (i.e., `**`) |
| 117 | + # file under the root directory, unset `include` to avoid unnecessary |
| 118 | + # filtering work. |
| 119 | + if ( |
| 120 | + include |
| 121 | + and len(include) == 1 |
| 122 | + and (result := _ROOT_PATH_PREFIX_REGEX.match(path := include[0])) |
| 123 | + ): |
| 124 | + root = result.group("prefix") |
| 125 | + if path in {root, f'{root.rstrip("/")}/**'}: |
| 126 | + include = [] |
| 127 | + else: |
| 128 | + root = "/" |
| 129 | + |
| 130 | + for path in _filter_paths(fs.find(root), include, exclude): |
109 | 131 | check_path = path.lstrip("/")
|
110 | 132 | if scm.check_attr(check_path, "filter", source=rev) == "lfs":
|
111 | 133 | try:
|
|
0 commit comments