Skip to content

Commit 51e8eb0

Browse files
libcdb: add offline_only to search_by_symbol_offsets (#2388)
* Add `offline_only` for `search_by_symbol_offsets` * Fix bug * Update CHANGELOG * Remove redundant code * Update --------- Co-authored-by: peace-maker <[email protected]>
1 parent e92a30b commit 51e8eb0

File tree

2 files changed

+124
-25
lines changed

2 files changed

+124
-25
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ The table below shows which release corresponds to each branch, and what date th
8282
- [#2391][2391] Fix error message when passing invalid kwargs to `xor`
8383
- [#2376][2376] Return buffered data on first EOF in tube.readline()
8484
- [#2387][2387] Convert apport_corefile() output from bytes-like object to string
85+
- [#2388][2388] libcdb: add `offline_only` to `search_by_symbol_offsets`
8586

8687
[2360]: https://github.com/Gallopsled/pwntools/pull/2360
8788
[2356]: https://github.com/Gallopsled/pwntools/pull/2356
@@ -93,6 +94,7 @@ The table below shows which release corresponds to each branch, and what date th
9394
[2391]: https://github.com/Gallopsled/pwntools/pull/2391
9495
[2376]: https://github.com/Gallopsled/pwntools/pull/2376
9596
[2387]: https://github.com/Gallopsled/pwntools/pull/2387
97+
[2388]: https://github.com/Gallopsled/pwntools/pull/2388
9698

9799
## 4.13.0 (`beta`)
98100

pwnlib/libcdb.py

+122-25
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,43 @@ def provider_local_database(hex_encoded_id, hash_type):
143143

144144
return None
145145

146+
def query_local_database(params):
147+
if not context.local_libcdb or not params.get("symbols"):
148+
return None
149+
150+
localdb = Path(context.local_libcdb)
151+
if not localdb.is_dir():
152+
return None
153+
154+
res = []
155+
query_syms = params["symbols"]
156+
157+
# Loop through each '.symbols' file in the local database
158+
# Make sure `Path.rglod` order stable
159+
for symbol_file in sorted(localdb.rglob("*.symbols"), key=lambda x: x.as_posix()):
160+
libc_syms = _parse_libc_symbol(symbol_file)
161+
162+
matched = 0
163+
for name, addr in query_syms.items():
164+
if isinstance(addr, str):
165+
addr = int(addr, 16)
166+
167+
# Compare last 12 bits
168+
if libc_syms.get(name) and (libc_syms.get(name) & 0xfff) == (addr & 0xfff):
169+
matched += 1
170+
else:
171+
# aborting this loop once there was a mismatch.
172+
break
173+
174+
# Check if all symbols have been matched
175+
if matched == len(query_syms):
176+
libs_id = symbol_file.stem
177+
libc_path = symbol_file.parent / ("%s.so" % libs_id)
178+
libs_url = read(symbol_file.parent / ("%s.url" % libs_id)).decode().strip()
179+
res.append(_pack_libs_info(libc_path, libs_id, libs_url, libc_syms))
180+
181+
return res
182+
146183
PROVIDERS = {
147184
"offline": [provider_local_system, provider_local_database],
148185
"online": [provider_libcdb, provider_libc_rip]
@@ -546,7 +583,7 @@ def _handle_multiple_matching_libcs(matching_libcs):
546583
selected_index = options("Select the libc version to use:", [libc['id'] for libc in matching_libcs])
547584
return matching_libcs[selected_index]
548585

549-
def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as_list=False):
586+
def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as_list=False, offline_only=False):
550587
"""
551588
Lookup possible matching libc versions based on leaked function addresses.
552589
@@ -568,6 +605,9 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as
568605
return_as_list(bool):
569606
Return a list of build ids of all matching libc versions
570607
instead of a path to a downloaded file.
608+
offline_only(bool):
609+
When pass `offline_only=True`, restricts search mode to offline sources only,
610+
disable online lookup. Defaults to `False`, and enable both offline and online providers.
571611
572612
Returns:
573613
Path to the downloaded library on disk, or :const:`None`.
@@ -592,27 +632,50 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as
592632

593633
params = {'symbols': symbols}
594634
log.debug('Request: %s', params)
595-
matching_libcs = query_libc_rip(params)
596-
log.debug('Result: %s', matching_libcs)
597-
if matching_libcs is None or len(matching_libcs) == 0:
598-
log.warn_once("No matching libc for symbols %r on libc.rip", symbols)
635+
636+
offline_matching = query_local_database(params)
637+
online_matching = query_libc_rip(params) if not offline_only else None
638+
639+
if offline_matching is None:
640+
offline_matching = []
641+
if online_matching is None:
642+
online_matching = []
643+
644+
# Aggregate and deduplicate matches from both sources
645+
matching_libcs = {}
646+
for libc in offline_matching + online_matching:
647+
if libc['id'] not in matching_libcs:
648+
matching_libcs[libc['id']] = libc
649+
650+
log.debug('Offline search result: %s', offline_matching)
651+
if not offline_only:
652+
log.debug('Online search result: %s', online_matching)
653+
654+
# Check if no matches are found
655+
if len(matching_libcs) == 0:
656+
log.warn_once("No matching libc for symbols %r", symbols)
599657
return None
600658

659+
matching_list = list(matching_libcs.values())
660+
601661
if return_as_list:
602-
return [libc['buildid'] for libc in matching_libcs]
662+
return [libc['buildid'] for libc in matching_list]
603663

604-
if len(matching_libcs) == 1:
605-
return search_by_build_id(matching_libcs[0]['buildid'], unstrip=unstrip)
664+
# If there's only one match, return it directly
665+
if len(matching_list) == 1:
666+
return search_by_build_id(matching_list[0]['buildid'], unstrip=unstrip, offline_only=offline_only)
606667

668+
# If a specific index is provided, validate it and return the selected libc
607669
if select_index is not None:
608-
if select_index > 0 and select_index <= len(matching_libcs):
609-
return search_by_build_id(matching_libcs[select_index - 1]['buildid'], unstrip=unstrip)
670+
if select_index > 0 and select_index <= len(matching_list):
671+
return search_by_build_id(matching_list[select_index - 1]['buildid'], unstrip=unstrip, offline_only=offline_only)
610672
else:
611-
log.error('Invalid selected libc index. %d is not in the range of 1-%d.', select_index, len(matching_libcs))
673+
log.error('Invalid selected libc index. %d is not in the range of 1-%d.', select_index, len(matching_list))
612674
return None
613675

614-
selected_libc = _handle_multiple_matching_libcs(matching_libcs)
615-
return search_by_build_id(selected_libc['buildid'], unstrip=unstrip)
676+
# Handle multiple matches interactively if no index is specified
677+
selected_libc = _handle_multiple_matching_libcs(matching_list)
678+
return search_by_build_id(selected_libc['buildid'], unstrip=unstrip, offline_only=offline_only)
616679

617680
def search_by_build_id(hex_encoded_id, unstrip=True, offline_only=False):
618681
"""
@@ -624,9 +687,8 @@ def search_by_build_id(hex_encoded_id, unstrip=True, offline_only=False):
624687
unstrip(bool):
625688
Try to fetch debug info for the libc and apply it to the downloaded file.
626689
offline_only(bool):
627-
Both offline and online providers are used by default. When pass
628-
`offline_only=True`, libcdb enable an exclusive offline search mode,
629-
which will disable online providers.
690+
When pass `offline_only=True`, restricts search mode to offline sources only,
691+
disable online lookup. Defaults to `False`, and enable both offline and online providers.
630692
631693
Returns:
632694
Path to the downloaded library on disk, or :const:`None`.
@@ -654,9 +716,8 @@ def search_by_md5(hex_encoded_id, unstrip=True, offline_only=False):
654716
unstrip(bool):
655717
Try to fetch debug info for the libc and apply it to the downloaded file.
656718
offline_only(bool):
657-
Both offline and online providers are used by default. When pass
658-
`offline_only=True`, libcdb enable an exclusive offline search mode,
659-
which will disable online providers.
719+
When pass `offline_only=True`, restricts search mode to offline sources only,
720+
disable online lookup. Defaults to `False`, and enable both offline and online providers.
660721
661722
Returns:
662723
Path to the downloaded library on disk, or :const:`None`.
@@ -684,9 +745,8 @@ def search_by_sha1(hex_encoded_id, unstrip=True, offline_only=False):
684745
unstrip(bool):
685746
Try to fetch debug info for the libc and apply it to the downloaded file.
686747
offline_only(bool):
687-
Both offline and online providers are used by default. When pass
688-
`offline_only=True`, libcdb enable an exclusive offline search mode,
689-
which will disable online providers.
748+
When pass `offline_only=True`, restricts search mode to offline sources only,
749+
disable online lookup. Defaults to `False`, and enable both offline and online providers.
690750
691751
Returns:
692752
Path to the downloaded library on disk, or :const:`None`.
@@ -714,9 +774,8 @@ def search_by_sha256(hex_encoded_id, unstrip=True, offline_only=False):
714774
unstrip(bool):
715775
Try to fetch debug info for the libc and apply it to the downloaded file.
716776
offline_only(bool):
717-
Both offline and online providers are used by default. When pass
718-
`offline_only=True`, libcdb enable an exclusive offline search mode,
719-
which will disable online providers.
777+
When pass `offline_only=True`, restricts search mode to offline sources only,
778+
disable online lookup. Defaults to `False`, and enable both offline and online providers.
720779
721780
Returns:
722781
Path to the downloaded library on disk, or :const:`None`.
@@ -734,7 +793,45 @@ def search_by_sha256(hex_encoded_id, unstrip=True, offline_only=False):
734793
"""
735794
return search_by_hash(hex_encoded_id, 'sha256', unstrip, offline_only)
736795

796+
def _parse_libc_symbol(path):
797+
"""
798+
Parse symbols file to `dict`, the format is same as https://github.com/niklasb/libc-database/
799+
"""
800+
801+
syms = {}
802+
803+
with open(path, "r") as fd:
804+
for x in fd:
805+
name, addr = x.split(" ")
806+
syms[name] = int(addr, 16)
807+
808+
return syms
809+
810+
def _pack_libs_info(path, libs_id, libs_url, syms):
811+
"""
812+
The JSON format is the same as libc.rip, and the "download_url" field is by default an empty string,
813+
as it's not required in offline mode.
814+
"""
815+
816+
info = {}
817+
818+
info["id"] = libs_id
819+
info["libs_url"] = libs_url
820+
info["download_url"] = ""
821+
822+
for hash_type, hash_func in HASHES.items():
823+
# replace 'build_id' to 'buildid'
824+
info[hash_type.replace("_", "")] = hash_func(path)
825+
826+
default_symbol_list = [
827+
"__libc_start_main_ret", "dup2", "printf", "puts", "read", "system", "str_bin_sh"
828+
]
829+
830+
info["symbols"] = {}
831+
for name in default_symbol_list:
832+
info["symbols"][name] = hex(syms[name])
737833

834+
return info
738835

739836

740837
def get_build_id_offsets():

0 commit comments

Comments
 (0)