Skip to content

Commit 86cac2b

Browse files
committed
ext: backport libxml2/gnome:bf5fcf6e for xmlXPathContext perf
See extended discussion at #3378 Benchmark comparing this commit against v1.17.x ("main"): Comparison: large: main: 3910.6 i/s large: patched: 3759.6 i/s - same-ish: difference falls within error Comparison: small: patched: 242901.7 i/s small: main: 127486.0 i/s - 1.91x slower I think we could get greater performance gains by re-using XPathContext objects, but only at the cost of a significant amount of additional complexity, since in order to properly support recursive XPath evaluation, Nokogiri would have to push and pop "stack frames" containing: - internal state contextSize and proximityPosition - registered namespaces - registered variables - function lookup handler That feels like a lot of code for a small win. Comparatively, pulling in this upstream patch is still a 2x speedup for zero additional complexity.
1 parent 47e89a3 commit 86cac2b

File tree

1 file changed

+244
-0
lines changed

1 file changed

+244
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
From d3e3526111097560cf7c002613e2cb1d469b59e0 Mon Sep 17 00:00:00 2001
2+
From: Nick Wellnhofer <[email protected]>
3+
Date: Sat, 21 Dec 2024 16:03:46 +0100
4+
Subject: [PATCH] xpath: Use separate static hash table for standard functions
5+
6+
This avoids registering standard functions when creating an XPath
7+
context.
8+
9+
Lookup of extension functions is a bit slower now, but ultimately, all
10+
function lookups should be moved to the compilation phase.
11+
12+
(cherry picked from commit bf5fcf6e646bb51a0f6a3655a1d64bea97274867)
13+
---
14+
xpath.c | 170 ++++++++++++++++++++++++++++++++------------------------
15+
1 file changed, 98 insertions(+), 72 deletions(-)
16+
17+
diff --git a/xpath.c b/xpath.c
18+
index 485d7747..21711653 100644
19+
--- a/xpath.c
20+
+++ b/xpath.c
21+
@@ -136,11 +136,48 @@
22+
23+
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
24+
25+
-/************************************************************************
26+
- * *
27+
- * Floating point stuff *
28+
- * *
29+
- ************************************************************************/
30+
+static void
31+
+xmlXPathNameFunction(xmlXPathParserContextPtr ctxt, int nargs);
32+
+
33+
+static const struct {
34+
+ const char *name;
35+
+ xmlXPathFunction func;
36+
+} xmlXPathStandardFunctions[] = {
37+
+ { "boolean", xmlXPathBooleanFunction },
38+
+ { "ceiling", xmlXPathCeilingFunction },
39+
+ { "count", xmlXPathCountFunction },
40+
+ { "concat", xmlXPathConcatFunction },
41+
+ { "contains", xmlXPathContainsFunction },
42+
+ { "id", xmlXPathIdFunction },
43+
+ { "false", xmlXPathFalseFunction },
44+
+ { "floor", xmlXPathFloorFunction },
45+
+ { "last", xmlXPathLastFunction },
46+
+ { "lang", xmlXPathLangFunction },
47+
+ { "local-name", xmlXPathLocalNameFunction },
48+
+ { "not", xmlXPathNotFunction },
49+
+ { "name", xmlXPathNameFunction },
50+
+ { "namespace-uri", xmlXPathNamespaceURIFunction },
51+
+ { "normalize-space", xmlXPathNormalizeFunction },
52+
+ { "number", xmlXPathNumberFunction },
53+
+ { "position", xmlXPathPositionFunction },
54+
+ { "round", xmlXPathRoundFunction },
55+
+ { "string", xmlXPathStringFunction },
56+
+ { "string-length", xmlXPathStringLengthFunction },
57+
+ { "starts-with", xmlXPathStartsWithFunction },
58+
+ { "substring", xmlXPathSubstringFunction },
59+
+ { "substring-before", xmlXPathSubstringBeforeFunction },
60+
+ { "substring-after", xmlXPathSubstringAfterFunction },
61+
+ { "sum", xmlXPathSumFunction },
62+
+ { "true", xmlXPathTrueFunction },
63+
+ { "translate", xmlXPathTranslateFunction }
64+
+};
65+
+
66+
+#define NUM_STANDARD_FUNCTIONS \
67+
+ (sizeof(xmlXPathStandardFunctions) / sizeof(xmlXPathStandardFunctions[0]))
68+
+
69+
+#define SF_HASH_SIZE 64
70+
+
71+
+static unsigned char xmlXPathSFHash[SF_HASH_SIZE];
72+
73+
double xmlXPathNAN = 0.0;
74+
double xmlXPathPINF = 0.0;
75+
@@ -156,6 +193,18 @@ xmlXPathInit(void) {
76+
xmlInitParser();
77+
}
78+
79+
+ATTRIBUTE_NO_SANITIZE_INTEGER
80+
+static unsigned
81+
+xmlXPathSFComputeHash(const xmlChar *name) {
82+
+ unsigned hashValue = 5381;
83+
+ const xmlChar *ptr;
84+
+
85+
+ for (ptr = name; *ptr; ptr++)
86+
+ hashValue = hashValue * 33 + *ptr;
87+
+
88+
+ return(hashValue);
89+
+}
90+
+
91+
/**
92+
* xmlInitXPathInternal:
93+
*
94+
@@ -164,6 +213,8 @@ xmlXPathInit(void) {
95+
ATTRIBUTE_NO_SANITIZE("float-divide-by-zero")
96+
void
97+
xmlInitXPathInternal(void) {
98+
+ size_t i;
99+
+
100+
#if defined(NAN) && defined(INFINITY)
101+
xmlXPathNAN = NAN;
102+
xmlXPathPINF = INFINITY;
103+
@@ -175,8 +226,34 @@ xmlInitXPathInternal(void) {
104+
xmlXPathPINF = 1.0 / zero;
105+
xmlXPathNINF = -xmlXPathPINF;
106+
#endif
107+
+
108+
+ /*
109+
+ * Initialize hash table for standard functions
110+
+ */
111+
+
112+
+ for (i = 0; i < SF_HASH_SIZE; i++)
113+
+ xmlXPathSFHash[i] = UCHAR_MAX;
114+
+
115+
+ for (i = 0; i < NUM_STANDARD_FUNCTIONS; i++) {
116+
+ const char *name = xmlXPathStandardFunctions[i].name;
117+
+ int bucketIndex = xmlXPathSFComputeHash(BAD_CAST name) % SF_HASH_SIZE;
118+
+
119+
+ while (xmlXPathSFHash[bucketIndex] != UCHAR_MAX) {
120+
+ bucketIndex += 1;
121+
+ if (bucketIndex >= SF_HASH_SIZE)
122+
+ bucketIndex = 0;
123+
+ }
124+
+
125+
+ xmlXPathSFHash[bucketIndex] = i;
126+
+ }
127+
}
128+
129+
+/************************************************************************
130+
+ * *
131+
+ * Floating point stuff *
132+
+ * *
133+
+ ************************************************************************/
134+
+
135+
/**
136+
* xmlXPathIsNaN:
137+
* @val: a double value
138+
@@ -3979,18 +4056,6 @@ xmlXPathRegisterFuncLookup (xmlXPathContextPtr ctxt,
139+
*/
140+
xmlXPathFunction
141+
xmlXPathFunctionLookup(xmlXPathContextPtr ctxt, const xmlChar *name) {
142+
- if (ctxt == NULL)
143+
- return (NULL);
144+
-
145+
- if (ctxt->funcLookupFunc != NULL) {
146+
- xmlXPathFunction ret;
147+
- xmlXPathFuncLookupFunc f;
148+
-
149+
- f = ctxt->funcLookupFunc;
150+
- ret = f(ctxt->funcLookupData, name, NULL);
151+
- if (ret != NULL)
152+
- return(ret);
153+
- }
154+
return(xmlXPathFunctionLookupNS(ctxt, name, NULL));
155+
}
156+
157+
@@ -4015,6 +4080,22 @@ xmlXPathFunctionLookupNS(xmlXPathContextPtr ctxt, const xmlChar *name,
158+
if (name == NULL)
159+
return(NULL);
160+
161+
+ if (ns_uri == NULL) {
162+
+ int bucketIndex = xmlXPathSFComputeHash(name) % SF_HASH_SIZE;
163+
+
164+
+ while (xmlXPathSFHash[bucketIndex] != UCHAR_MAX) {
165+
+ int funcIndex = xmlXPathSFHash[bucketIndex];
166+
+
167+
+ if (strcmp(xmlXPathStandardFunctions[funcIndex].name,
168+
+ (char *) name) == 0)
169+
+ return(xmlXPathStandardFunctions[funcIndex].func);
170+
+
171+
+ bucketIndex += 1;
172+
+ if (bucketIndex >= SF_HASH_SIZE)
173+
+ bucketIndex = 0;
174+
+ }
175+
+ }
176+
+
177+
if (ctxt->funcLookupFunc != NULL) {
178+
xmlXPathFuncLookupFunc f;
179+
180+
@@ -13494,61 +13575,6 @@ xmlXPathEscapeUriFunction(xmlXPathParserContextPtr ctxt, int nargs) {
181+
void
182+
xmlXPathRegisterAllFunctions(xmlXPathContextPtr ctxt)
183+
{
184+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"boolean",
185+
- xmlXPathBooleanFunction);
186+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"ceiling",
187+
- xmlXPathCeilingFunction);
188+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"count",
189+
- xmlXPathCountFunction);
190+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"concat",
191+
- xmlXPathConcatFunction);
192+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"contains",
193+
- xmlXPathContainsFunction);
194+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"id",
195+
- xmlXPathIdFunction);
196+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"false",
197+
- xmlXPathFalseFunction);
198+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"floor",
199+
- xmlXPathFloorFunction);
200+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"last",
201+
- xmlXPathLastFunction);
202+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"lang",
203+
- xmlXPathLangFunction);
204+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"local-name",
205+
- xmlXPathLocalNameFunction);
206+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"not",
207+
- xmlXPathNotFunction);
208+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"name",
209+
- xmlXPathNameFunction);
210+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"namespace-uri",
211+
- xmlXPathNamespaceURIFunction);
212+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"normalize-space",
213+
- xmlXPathNormalizeFunction);
214+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"number",
215+
- xmlXPathNumberFunction);
216+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"position",
217+
- xmlXPathPositionFunction);
218+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"round",
219+
- xmlXPathRoundFunction);
220+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"string",
221+
- xmlXPathStringFunction);
222+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"string-length",
223+
- xmlXPathStringLengthFunction);
224+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"starts-with",
225+
- xmlXPathStartsWithFunction);
226+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring",
227+
- xmlXPathSubstringFunction);
228+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring-before",
229+
- xmlXPathSubstringBeforeFunction);
230+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring-after",
231+
- xmlXPathSubstringAfterFunction);
232+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"sum",
233+
- xmlXPathSumFunction);
234+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"true",
235+
- xmlXPathTrueFunction);
236+
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"translate",
237+
- xmlXPathTranslateFunction);
238+
-
239+
xmlXPathRegisterFuncNS(ctxt, (const xmlChar *)"escape-uri",
240+
(const xmlChar *)"http://www.w3.org/2002/08/xquery-functions",
241+
xmlXPathEscapeUriFunction);
242+
--
243+
2.47.1
244+

0 commit comments

Comments
 (0)