8
8
import time
9
9
import six
10
10
import tempfile
11
+ import struct
11
12
12
13
from pwnlib .context import context
13
14
from pwnlib .elf import ELF
14
15
from pwnlib .filesystem .path import Path
15
16
from pwnlib .log import getLogger
16
17
from pwnlib .tubes .process import process
17
- from pwnlib .util .fiddling import enhex
18
+ from pwnlib .util .fiddling import enhex , unhex
18
19
from pwnlib .util .hashes import sha1filehex , sha256filehex , md5filehex
19
20
from pwnlib .util .misc import read
20
21
from pwnlib .util .misc import which
23
24
24
25
log = getLogger (__name__ )
25
26
26
- HASHES = {
27
- 'build_id' : lambda path : enhex (ELF (path , checksec = False ).buildid or b'' ),
27
+
28
+ def _turbofast_extract_build_id (path ):
29
+ """
30
+ Elf_External_Note:
31
+
32
+ 0x00 +--------+
33
+ | namesz | <- Size of entry's owner string
34
+ 0x04 +--------+
35
+ | descsz | <- Size of the note descriptor
36
+ 0x08 +--------+
37
+ | type | <- Interpretation of the descriptor
38
+ 0x0c +--------+
39
+ | name | <- Start of the name+desc data
40
+ ... +--------
41
+ | desc |
42
+ ... +--------+
43
+ """
44
+ data = read (path , 0x1000 )
45
+ # search NT_GNU_BUILD_ID and b"GNU\x00" (type+name)
46
+ idx = data .find (unhex ("03000000474e5500" ))
47
+ if idx == - 1 :
48
+ return enhex (ELF (path , checksec = False ).buildid or b'' )
49
+ descsz , = struct .unpack ("<L" , data [idx - 4 : idx ])
50
+ return enhex (data [idx + 8 : idx + 8 + descsz ])
51
+
52
+
53
+ TYPES = {
54
+ 'libs_id' : None ,
55
+ 'build_id' : _turbofast_extract_build_id ,
28
56
'sha1' : sha1filehex ,
29
57
'sha256' : sha256filehex ,
30
58
'md5' : md5filehex ,
31
59
}
60
+
61
+ # mapping for search result (same as libc.rip)
62
+ MAP_TYPES = {
63
+ 'libs_id' : 'id' ,
64
+ 'build_id' : 'buildid'
65
+ }
66
+
32
67
DEBUGINFOD_SERVERS = [
33
68
'https://debuginfod.elfutils.org/' ,
34
69
]
42
77
43
78
# https://gitlab.com/libcdb/libcdb wasn't updated after 2019,
44
79
# but still is a massive database of older libc binaries.
45
- def provider_libcdb (hex_encoded_id , hash_type ):
80
+ def provider_libcdb (hex_encoded_id , search_type ):
81
+ if search_type == 'libs_id' :
82
+ return None
83
+
46
84
# Deferred import because it's slow
47
85
import requests
48
86
from six .moves import urllib
49
87
50
88
# Build the URL using the requested hash type
51
- url_base = "https://gitlab.com/libcdb/libcdb/raw/master/hashes/%s/" % hash_type
89
+ url_base = "https://gitlab.com/libcdb/libcdb/raw/master/hashes/%s/" % search_type
52
90
url = urllib .parse .urljoin (url_base , hex_encoded_id )
53
91
54
92
data = b""
@@ -58,15 +96,15 @@ def provider_libcdb(hex_encoded_id, hash_type):
58
96
data = wget (url , timeout = 20 )
59
97
60
98
if not data :
61
- log .warn_once ("Could not fetch libc for %s %s from libcdb" , hash_type , hex_encoded_id )
99
+ log .warn_once ("Could not fetch libc for %s %s from libcdb" , search_type , hex_encoded_id )
62
100
break
63
101
64
102
# GitLab serves up symlinks with
65
103
if data .startswith (b'..' ):
66
104
url = os .path .dirname (url ) + '/'
67
105
url = urllib .parse .urljoin (url .encode ('utf-8' ), data )
68
106
except requests .RequestException as e :
69
- log .warn_once ("Failed to fetch libc for %s %s from libcdb: %s" , hash_type , hex_encoded_id , e )
107
+ log .warn_once ("Failed to fetch libc for %s %s from libcdb: %s" , search_type , hex_encoded_id , e )
70
108
return data
71
109
72
110
def query_libc_rip (params ):
@@ -86,16 +124,17 @@ def query_libc_rip(params):
86
124
return None
87
125
88
126
# https://libc.rip/
89
- def provider_libc_rip (hex_encoded_id , hash_type ):
127
+ def provider_libc_rip (search_target , search_type ):
90
128
# Build the request for the hash type
91
129
# https://github.com/niklasb/libc-database/blob/master/searchengine/api.yml
92
- if hash_type == 'build_id' :
93
- hash_type = 'buildid'
94
- params = {hash_type : hex_encoded_id }
130
+ if search_type in MAP_TYPES .keys ():
131
+ search_type = MAP_TYPES [search_type ]
132
+
133
+ params = {search_type : search_target }
95
134
96
135
libc_match = query_libc_rip (params )
97
136
if not libc_match :
98
- log .warn_once ("Could not find libc info for %s %s on libc.rip" , hash_type , hex_encoded_id )
137
+ log .warn_once ("Could not find libc info for %s %s on libc.rip" , search_type , search_target )
99
138
return None
100
139
101
140
if len (libc_match ) > 1 :
@@ -107,13 +146,13 @@ def provider_libc_rip(hex_encoded_id, hash_type):
107
146
data = wget (url , timeout = 20 )
108
147
109
148
if not data :
110
- log .warn_once ("Could not fetch libc binary for %s %s from libc.rip" , hash_type , hex_encoded_id )
149
+ log .warn_once ("Could not fetch libc binary for %s %s from libc.rip" , search_type , search_target )
111
150
return None
112
151
return data
113
152
114
153
# Check if the local system libc matches the requested hash.
115
- def provider_local_system (hex_encoded_id , hash_type ):
116
- if hash_type == 'id ' :
154
+ def provider_local_system (hex_encoded_id , search_type ):
155
+ if search_type == 'libs_id ' :
117
156
return None
118
157
shell_path = os .environ .get ('SHELL' , None ) or '/bin/sh'
119
158
if not os .path .exists (shell_path ):
@@ -123,22 +162,29 @@ def provider_local_system(hex_encoded_id, hash_type):
123
162
if not local_libc :
124
163
log .debug ('Cannot lookup libc from shell %r. Skipping local system libc matching.' , shell_path )
125
164
return None
126
- if HASHES [ hash_type ](local_libc .path ) == hex_encoded_id :
165
+ if TYPES [ search_type ](local_libc .path ) == hex_encoded_id :
127
166
return local_libc .data
128
167
return None
129
168
130
169
# Offline search https://github.com/niklasb/libc-database for hash type
131
- def provider_local_database (hex_encoded_id , hash_type ):
170
+ def provider_local_database (search_target , search_type ):
132
171
if not context .local_libcdb :
133
172
return None
134
173
135
174
localdb = Path (context .local_libcdb )
136
175
if not localdb .is_dir ():
137
176
return None
138
177
139
- log .debug ("Searching local libc database, %s: %s" , hash_type , hex_encoded_id )
178
+ # Handle the specific search type 'libs_id'
179
+ if search_type == 'libs_id' :
180
+ libc_list = list (localdb .rglob ("%s.so" % search_target ))
181
+ if len (libc_list ) == 0 :
182
+ return None
183
+ return read (libc_list [0 ])
184
+
185
+ log .debug ("Searching local libc database, %s: %s" , search_type , search_target )
140
186
for libc_path in localdb .rglob ("*.so" ):
141
- if hex_encoded_id == HASHES [ hash_type ](libc_path ):
187
+ if search_target == TYPES [ search_type ](libc_path ):
142
188
return read (libc_path )
143
189
144
190
return None
@@ -185,11 +231,28 @@ def query_local_database(params):
185
231
"online" : [provider_libcdb , provider_libc_rip ]
186
232
}
187
233
188
- def search_by_hash (hex_encoded_id , hash_type = 'build_id' , unstrip = True , offline_only = False ):
189
- assert hash_type in HASHES , hash_type
234
+ def search_by_hash (search_target , search_type = 'build_id' , unstrip = True , offline_only = False ):
235
+ """search_by_hash(str, str, bool, bool) -> str
236
+ Arguments:
237
+ search_target(str):
238
+ Use for searching the libc. This could be a hex encoded ID (`hex_encoded_id`) or a library
239
+ name (`libs_id`). Depending on `search_type`, this can represent different types of encoded
240
+ values or names.
241
+ search_type(str):
242
+ The type of the search to be performed, it should be one of the keys in the `TYPES` dictionary.
243
+ unstrip(bool):
244
+ Try to fetch debug info for the libc and apply it to the downloaded file.
245
+ offline_only(bool):
246
+ If True, restricts the search to offline providers only (local database). If False, it will also
247
+ search online providers. Default is False.
248
+
249
+ Returns:
250
+ The path to the cached directory containing the downloaded libraries.
251
+ """
252
+ assert search_type in TYPES , search_type
190
253
191
254
# Ensure that the libcdb cache directory exists
192
- cache , cache_valid = _check_elf_cache ('libcdb' , hex_encoded_id , hash_type )
255
+ cache , cache_valid = _check_elf_cache ('libcdb' , search_target , search_type )
193
256
if cache_valid :
194
257
return cache
195
258
@@ -203,12 +266,12 @@ def search_by_hash(hex_encoded_id, hash_type='build_id', unstrip=True, offline_o
203
266
204
267
# Run through all available libc database providers to see if we have a match.
205
268
for provider in providers :
206
- data = provider (hex_encoded_id , hash_type )
269
+ data = provider (search_target , search_type )
207
270
if data and data .startswith (b'\x7F ELF' ):
208
271
break
209
272
210
273
if not data :
211
- log .warn_once ("Could not find libc for %s %s anywhere" , hash_type , hex_encoded_id )
274
+ log .warn_once ("Could not find libc for %s %s anywhere" , search_type , search_target )
212
275
213
276
# Save whatever we got to the cache
214
277
write (cache , data or b'' )
@@ -257,7 +320,7 @@ def _search_debuginfo_by_hash(base_url, hex_encoded_id):
257
320
258
321
return cache
259
322
260
- def _check_elf_cache (cache_type , hex_encoded_id , hash_type ):
323
+ def _check_elf_cache (cache_type , search_target , search_type ):
261
324
"""
262
325
Check if there already is an ELF file for this hash in the cache.
263
326
@@ -270,14 +333,14 @@ def _check_elf_cache(cache_type, hex_encoded_id, hash_type):
270
333
True
271
334
"""
272
335
# Ensure that the cache directory exists
273
- cache_dir = os .path .join (context .cache_dir , cache_type , hash_type )
336
+ cache_dir = os .path .join (context .cache_dir , cache_type , search_type )
274
337
275
338
if not os .path .isdir (cache_dir ):
276
339
os .makedirs (cache_dir )
277
340
278
341
# If we already downloaded the file, and it looks even passingly like
279
342
# a valid ELF file, return it.
280
- cache = os .path .join (cache_dir , hex_encoded_id )
343
+ cache = os .path .join (cache_dir , search_target )
281
344
282
345
if not os .path .exists (cache ):
283
346
return cache , False
@@ -289,7 +352,7 @@ def _check_elf_cache(cache_type, hex_encoded_id, hash_type):
289
352
# Retry failed lookups after some time
290
353
if time .time () > os .path .getmtime (cache ) + NEGATIVE_CACHE_EXPIRY :
291
354
return cache , False
292
- log .info_once ("Skipping invalid cached ELF %s" , hex_encoded_id )
355
+ log .info_once ("Skipping invalid cached ELF %s" , search_target )
293
356
return None , False
294
357
295
358
log .info_once ("Using cached data from %r" , cache )
@@ -583,7 +646,7 @@ def _handle_multiple_matching_libcs(matching_libcs):
583
646
selected_index = options ("Select the libc version to use:" , [libc ['id' ] for libc in matching_libcs ])
584
647
return matching_libcs [selected_index ]
585
648
586
- def search_by_symbol_offsets (symbols , select_index = None , unstrip = True , return_as_list = False , offline_only = False ):
649
+ def search_by_symbol_offsets (symbols , select_index = None , unstrip = True , return_as_list = False , offline_only = False , search_type = 'build_id' ):
587
650
"""
588
651
Lookup possible matching libc versions based on leaked function addresses.
589
652
@@ -608,6 +671,8 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as
608
671
offline_only(bool):
609
672
When pass `offline_only=True`, restricts search mode to offline sources only,
610
673
disable online lookup. Defaults to `False`, and enable both offline and online providers.
674
+ search_type(str):
675
+ An option to select searched hash.
611
676
612
677
Returns:
613
678
Path to the downloaded library on disk, or :const:`None`.
@@ -626,6 +691,8 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as
626
691
>>> for buildid in matched_libcs: # doctest +SKIP
627
692
... libc = ELF(search_by_build_id(buildid)) # doctest +SKIP
628
693
"""
694
+ assert search_type in TYPES , search_type
695
+
629
696
for symbol , address in symbols .items ():
630
697
if isinstance (address , int ):
631
698
symbols [symbol ] = hex (address )
@@ -661,21 +728,49 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as
661
728
if return_as_list :
662
729
return [libc ['buildid' ] for libc in matching_list ]
663
730
731
+ mapped_type = MAP_TYPES .get (search_type , search_type )
732
+
664
733
# If there's only one match, return it directly
665
734
if len (matching_list ) == 1 :
666
- return search_by_build_id (matching_list [0 ]['buildid' ] , unstrip = unstrip , offline_only = offline_only )
735
+ return search_by_hash (matching_list [0 ][mapped_type ], search_type = search_type , unstrip = unstrip , offline_only = offline_only )
667
736
668
737
# If a specific index is provided, validate it and return the selected libc
669
738
if select_index is not None :
670
739
if select_index > 0 and select_index <= len (matching_list ):
671
- return search_by_build_id (matching_list [select_index - 1 ]['buildid' ] , unstrip = unstrip , offline_only = offline_only )
740
+ return search_by_hash (matching_list [select_index - 1 ][mapped_type ], search_type = search_type , unstrip = unstrip , offline_only = offline_only )
672
741
else :
673
742
log .error ('Invalid selected libc index. %d is not in the range of 1-%d.' , select_index , len (matching_list ))
674
743
return None
675
744
676
745
# Handle multiple matches interactively if no index is specified
677
746
selected_libc = _handle_multiple_matching_libcs (matching_list )
678
- return search_by_build_id (selected_libc ['buildid' ], unstrip = unstrip , offline_only = offline_only )
747
+ return search_by_hash (selected_libc [mapped_type ], search_type = search_type , unstrip = unstrip , offline_only = offline_only )
748
+
749
+ def search_by_libs_id (libs_id , unstrip = True , offline_only = False ):
750
+ """
751
+ Given a Libs ID, attempt to download a matching libc from libcdb.
752
+
753
+ Arguments:
754
+ libs_id(str):
755
+ Libs ID (e.g. 'libc6_...') of the library
756
+ unstrip(bool):
757
+ Try to fetch debug info for the libc and apply it to the downloaded file.
758
+ offline_only(bool):
759
+ When pass `offline_only=True`, restricts search mode to offline sources only,
760
+ disable online lookup. Defaults to `False`, and enable both offline and online providers.
761
+
762
+ Returns:
763
+ Path to the downloaded library on disk, or :const:`None`.
764
+
765
+ Examples:
766
+
767
+ >>> None == search_by_libs_id('XX')
768
+ True
769
+ >>> filename = search_by_libs_id('libc6_2.31-3_amd64')
770
+ >>> hex(ELF(filename).symbols.read)
771
+ '0xeef40'
772
+ """
773
+ return search_by_hash (libs_id , 'libs_id' , unstrip , offline_only )
679
774
680
775
def search_by_build_id (hex_encoded_id , unstrip = True , offline_only = False ):
681
776
"""
@@ -819,9 +914,16 @@ def _pack_libs_info(path, libs_id, libs_url, syms):
819
914
info ["libs_url" ] = libs_url
820
915
info ["download_url" ] = ""
821
916
822
- for hash_type , hash_func in HASHES .items ():
823
- # replace 'build_id' to 'buildid'
824
- info [hash_type .replace ("_" , "" )] = hash_func (path )
917
+ for search_type , hash_func in TYPES .items ():
918
+ # pass libs_id
919
+ if search_type == 'libs_id' :
920
+ continue
921
+
922
+ # replace search_type
923
+ if search_type in MAP_TYPES .keys ():
924
+ search_type = MAP_TYPES [search_type ]
925
+
926
+ info [search_type ] = hash_func (path )
825
927
826
928
default_symbol_list = [
827
929
"__libc_start_main_ret" , "dup2" , "printf" , "puts" , "read" , "system" , "str_bin_sh"
@@ -886,4 +988,4 @@ def get_build_id_offsets():
886
988
}.get (context .arch , [])
887
989
888
990
889
- __all__ = ['get_build_id_offsets' , 'search_by_build_id' , 'search_by_sha1' , 'search_by_sha256' , 'search_by_md5' , 'unstrip_libc' , 'search_by_symbol_offsets' , 'download_libraries' ]
991
+ __all__ = ['get_build_id_offsets' , 'search_by_build_id' , 'search_by_sha1' , 'search_by_sha256' , 'search_by_md5' , 'search_by_libs_id' , ' unstrip_libc' , 'search_by_symbol_offsets' , 'download_libraries' ]
0 commit comments