Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support compiled XPath expressions #3380

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ext/nokogiri/nokogiri.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ void noko_init_xml_schema(void);
void noko_init_xml_syntax_error(void);
void noko_init_xml_text(void);
void noko_init_xml_xpath_context(void);
void noko_init_xml_xpath_expression(void);
void noko_init_xslt_stylesheet(void);
void noko_init_html_document(void);
void noko_init_html_element_description(void);
Expand Down Expand Up @@ -253,6 +254,7 @@ Init_nokogiri(void)
noko_init_html4_sax_parser();

noko_init_xml_xpath_context();
noko_init_xml_xpath_expression();
noko_init_xslt_stylesheet();
noko_init_html_element_description();
noko_init_html_entity_lookup();
Expand Down
3 changes: 3 additions & 0 deletions ext/nokogiri/nokogiri.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ NOKOPUBVAR VALUE cNokogiriXmlSchema;
NOKOPUBVAR VALUE cNokogiriXmlSyntaxError;
NOKOPUBVAR VALUE cNokogiriXmlText ;
NOKOPUBVAR VALUE cNokogiriXmlXpathContext;
NOKOPUBVAR VALUE cNokogiriXmlXpathExpression;
NOKOPUBVAR VALUE cNokogiriXmlXpathSyntaxError;
NOKOPUBVAR VALUE cNokogiriXsltStylesheet ;

Expand Down Expand Up @@ -213,6 +214,8 @@ VALUE noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context);
xmlParserCtxtPtr noko_xml_sax_parser_context_unwrap(VALUE rb_context);
void noko_xml_sax_parser_context_set_encoding(xmlParserCtxtPtr c_context, VALUE rb_encoding);

xmlXPathCompExprPtr noko_xml_xpath_expression_unwrap(VALUE rb_expression);

#define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
#define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
#define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
Expand Down
13 changes: 11 additions & 2 deletions ext/nokogiri/xml_xpath_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ noko_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
VALUE rb_expression = Qnil;
VALUE rb_function_lookup_handler = Qnil;
xmlChar *c_expression_str = NULL;
xmlXPathCompExprPtr c_expression_comp = NULL;
VALUE rb_errors = rb_ary_new();
xmlXPathObjectPtr c_xpath_object;
VALUE rb_xpath_object = Qnil;
Expand All @@ -376,7 +377,11 @@ noko_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)

rb_scan_args(argc, argv, "11", &rb_expression, &rb_function_lookup_handler);

c_expression_str = (xmlChar *)StringValueCStr(rb_expression);
if (rb_obj_is_kind_of(rb_expression, cNokogiriXmlXpathExpression)) {
c_expression_comp = noko_xml_xpath_expression_unwrap(rb_expression);
} else {
c_expression_str = (xmlChar *)StringValueCStr(rb_expression);
}

if (Qnil != rb_function_lookup_handler) {
/* FIXME: not sure if this is the correct place to shove private data. */
Expand All @@ -392,7 +397,11 @@ noko_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
xmlSetGenericErrorFunc((void *)rb_errors, _noko_xml_xpath_context__generic_exception_pusher);

c_xpath_object = xmlXPathEvalExpression(c_expression_str, c_context);
if (c_expression_comp) {
c_xpath_object = xmlXPathCompiledEval(c_expression_comp, c_context);
} else {
c_xpath_object = xmlXPathEvalExpression(c_expression_str, c_context);
}

xmlSetStructuredErrorFunc(NULL, NULL);
xmlSetGenericErrorFunc(NULL, NULL);
Expand Down
70 changes: 70 additions & 0 deletions ext/nokogiri/xml_xpath_expression.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#include <nokogiri.h>

VALUE cNokogiriXmlXpathExpression;

static void
_noko_xml_xpath_expression_dfree(void *data)
{
xmlXPathCompExprPtr c_expr = (xmlXPathCompExprPtr)data;
xmlXPathFreeCompExpr(c_expr);
}

static size_t
_noko_xml_xpath_expression_dsize(const void *data)
{
return 0; // TODO
}

static const rb_data_type_t _noko_xml_xpath_expression_type = {
.wrap_struct_name = "xmlXPathCompExpr",
.function = {
.dfree = _noko_xml_xpath_expression_dfree,
.dsize = _noko_xml_xpath_expression_dsize,
},
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
};

static VALUE
noko_xml_xpath_expression_s_new(VALUE klass, VALUE rb_input)
{
xmlXPathCompExprPtr c_expr = NULL;
VALUE rb_expr = Qnil;
VALUE rb_errors = rb_ary_new();

xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);

c_expr = xmlXPathCompile((const xmlChar *)StringValueCStr(rb_input));

xmlSetStructuredErrorFunc(NULL, NULL);

if (c_expr == NULL) {
rb_exc_raise(rb_ary_entry(rb_errors, 0));
}

rb_expr = TypedData_Wrap_Struct(klass, &_noko_xml_xpath_expression_type, c_expr);
return rb_expr;
}

xmlXPathCompExprPtr
noko_xml_xpath_expression_unwrap(VALUE rb_expression)
{
xmlXPathCompExprPtr c_expression;
TypedData_Get_Struct(rb_expression, xmlXPathCompExpr, &_noko_xml_xpath_expression_type, c_expression);
return c_expression;
}

void
noko_init_xml_xpath_expression(void)
{
/*
* Nokogiri::XML::XPath::Expression is a compiled XPath expression that can be created to
* prepare frequently-used search queries. Preparing them once and re-using them is generally
* faster than re-parsing the expression from a string each time it's used.
*/
cNokogiriXmlXpathExpression = rb_define_class_under(mNokogiriXmlXpath, "Expression", rb_cObject);
rb_gc_register_mark_object(cNokogiriXmlXpathExpression);

rb_undef_alloc_func(cNokogiriXmlXpathExpression);

rb_define_singleton_method(cNokogiriXmlXpathExpression, "new", noko_xml_xpath_expression_s_new, 1);
}
5 changes: 5 additions & 0 deletions lib/nokogiri/css.rb
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ def xpath_for(
Parser.new.xpath_for(selector, visitor)
end
end

# TODO: document me
def selector(expr)
expr
end
end
end
end
Expand Down
2 changes: 1 addition & 1 deletion lib/nokogiri/xml/searchable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def >(selector) # rubocop:disable Naming/BinaryOperatorParameterName

def extract_params(params) # :nodoc:
handler = params.find do |param|
![Hash, String, Symbol].include?(param.class)
![Hash, String, Symbol, XPath::Expression].include?(param.class)
end
params -= [handler] if handler

Expand Down
5 changes: 5 additions & 0 deletions lib/nokogiri/xml/xpath.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ module XPath

# The XPath search prefix to search anywhere in the current element's subtree, +.//+
SUBTREE_SEARCH_PREFIX = ".//"

# TODO: document me
def self.expression(expr)
Nokogiri::XML::XPath::Expression.new(expr)
end
end
end
end
Expand Down
95 changes: 95 additions & 0 deletions test/xml/test_xpath.rb
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,101 @@ def collision(nodes)
assert_equal(3, doc.xpath("//self::*:child").length)
end
end

describe "compiled" do
let(:xml) {
<<~XML
<root xmlns="http://nokogiri.org/default" xmlns:ns1="http://nokogiri.org/ns1">
<child>default</child>
<ns1:child>ns1</ns1:child>
</root>
XML
}

let(:doc) { Nokogiri::XML::Document.parse(xml) }

describe "XPath expressions" do
it "works in the trivial case" do
expr = Nokogiri::XML::XPath.expression("//xmlns:child")

result = doc.xpath(expr)
assert_equal(doc.xpath("//xmlns:child"), result)
assert_pattern do
result => [{name: "child", namespace: { href: "http://nokogiri.org/default" }}]
end
end

it "works as expected with namespace bindings" do
expr = Nokogiri::XML::XPath.expression("//ns:child")

node = doc.at_xpath(expr, { "ns" => "http://nokogiri.org/ns1" })
assert_equal("ns1", node.text)

assert_raises(XPath::SyntaxError) do
doc.at_xpath("//ns:child")
end
end

it "works as expected with a function handler" do
expr = Nokogiri::XML::XPath.expression("//xmlns:child[nokogiri:thing(.)]")

doc.xpath(expr, @handler)
assert_equal(1, @handler.things.length)

assert_raises(XPath::SyntaxError) do
doc.xpath("//xmlns:child[nokogiri:thing(.)]")
end
end

it "works as expected with bound variables" do
expr = Nokogiri::XML::XPath.expression("//address[@domestic=$value]")

nodes = @xml.xpath("//address[@domestic=$value]", nil, value: "Yes")
assert_equal(4, nodes.length)

assert_raises(XPath::SyntaxError) do
@xml.xpath(expr)
end
end

it "can be evaluated in different documents" do
doc1 = Nokogiri::XML::Document.parse(xml)
doc2 = Nokogiri::XML::Document.parse(xml)

expr = Nokogiri::XML::XPath.expression("//xmlns:child")

result1 = doc1.xpath(expr)
result2 = doc2.xpath(expr)

assert_pattern do
result1 => [{name: "child", namespace: { href: "http://nokogiri.org/default" }}]
end
assert_pattern do
result2 => [{name: "child", namespace: { href: "http://nokogiri.org/default" }}]
end
end
end

describe "CSS selectors" do
it "works" do
expr = Nokogiri::CSS.selector("child")

result = doc.css(expr)
assert_equal(doc.css("child"), result)
assert_pattern do
result => [{name: "child", namespace: { href: "http://nokogiri.org/default" }}]
end
end

it "can be evaluated in different documents"

it "work with function handlers"

it "work with variable bindings"

it "work with namespace bindings"
end
end
end
end
end
21 changes: 21 additions & 0 deletions test/xml/test_xpath_expression.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# frozen_string_literal: true

require "helper"

describe Nokogiri::XML::XPath::Expression do
it ".new" do
assert_kind_of(Nokogiri::XML::XPath::Expression, Nokogiri::XML::XPath::Expression.new("//foo"))
end

it "raises an exception when there are compile-time errors" do
assert_raises(Nokogiri::XML::XPath::SyntaxError) do
Nokogiri::XML::XPath.expression("//foo[")
end
end
end

describe Nokogiri::XML::XPath do
it "XPath.expression" do
assert_kind_of(Nokogiri::XML::XPath::Expression, Nokogiri::XML::XPath.expression("//foo"))
end
end
Loading