Skip to content

Commit 97f2579

Browse files
authored
support upstream libxml2 master up to GNOME/libxml2@05c147c3 (#3161)
**What problem is this PR intended to solve?** Closes #3156 and gets CI green against libxml2 master. - CDATA.new no longer accepts `nil` for content - update Node lifecycle to ensure the new libxml2 changes don't leak memory - update tests to reflect improved text coalescing See commit logs for deeper explanations on these changes. **Have you included adequate test coverage?** Yes. **Does this change affect the behavior of either the C or the Java implementations?** Both C and Java implementations no longer accept `nil` for CDATA content.
2 parents d5cecb5 + e54cd33 commit 97f2579

11 files changed

+55
-47
lines changed

CHANGELOG.md

+6
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA
1616

1717
* [CRuby] libgumbo (the HTML5 parser) treats reaching max-depth as EOF. This addresses a class of issues when the parser is interrupted in this way. [#3121] @stevecheckoway
1818
* `Node#clone`, `NodeSet#clone`, and `*::Document#clone` all properly copy the metaclass of the original as expected. Previously, `#clone` had been aliased to `#dup` for these classes (since v1.3.0 in 2009). [#316, #3117] @flavorjones
19+
* [CRuby] Update node GC lifecycle to avoid a potential memory leak with fragments in libxml 2.13.0 caused by changes in `xmlAddChild`. [#3156] @flavorjones
20+
21+
22+
### Changed
23+
24+
* [CRuby] `Nokogiri::XML::CData.new` no longer accepts `nil` as the content argument, making `CData` behave like other character data classes (like `Comment` and `Text`). This change was necessitated by behavioral changes in the upcoming libxml 2.13.0 release. If you wish to create an empty CDATA node, pass an empty string. [#3156] @flavorjones
1925

2026

2127
## v1.16.3 / 2024-03-15

CONTRIBUTING.md

+7
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,13 @@ To run a focused test, use Minitest's `TESTOPTS`:
131131
bundle exec rake compile test TESTOPTS="-n/test_last_element_child/"
132132
```
133133

134+
Or to run tests on specific files, use `TESTGLOB`:
135+
136+
``` sh
137+
bundle exec rake compile test TESTGLOB="test/**/test_*node*rb"
138+
```
139+
140+
134141
To run the test suite in parallel, set the `NCPU` environment variable; and to compile in parallel, set the `MAKEFLAGS` environment variable (you may want to set these in something like your .bashrc):
135142

136143
``` sh

ext/java/nokogiri/XmlCdata.java

+3
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ public class XmlCdata extends XmlText
4646
IRubyObject rbDocument = args[0];
4747
content = args[1];
4848

49+
if (content.isNil()) {
50+
throw context.runtime.newTypeError("expected second parameter to be a String, received NilClass");
51+
}
4952
if (!(rbDocument instanceof XmlNode)) {
5053
String msg = "expected first parameter to be a Nokogiri::XML::Document, received " + rbDocument.getMetaClass();
5154
throw context.runtime.newTypeError(msg);

ext/nokogiri/xml_cdata.c

+2-10
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,10 @@ rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
2020
VALUE rb_content;
2121
VALUE rb_rest;
2222
VALUE rb_node;
23-
xmlChar *c_content = NULL;
24-
int c_content_len = 0;
2523

2624
rb_scan_args(argc, argv, "2*", &rb_document, &rb_content, &rb_rest);
2725

26+
Check_Type(rb_content, T_STRING);
2827
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
2928
rb_raise(rb_eTypeError,
3029
"expected first parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
@@ -40,15 +39,8 @@ rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
4039
c_document = noko_xml_document_unwrap(rb_document);
4140
}
4241

43-
if (!NIL_P(rb_content)) {
44-
c_content = (xmlChar *)StringValuePtr(rb_content);
45-
c_content_len = RSTRING_LENINT(rb_content);
46-
}
47-
48-
c_node = xmlNewCDataBlock(c_document, c_content, c_content_len);
49-
42+
c_node = xmlNewCDataBlock(c_document, (xmlChar *)StringValueCStr(rb_content), RSTRING_LENINT(rb_content));
5043
noko_xml_document_pin_node(c_node);
51-
5244
rb_node = noko_xml_node_wrap(klass, c_node);
5345
rb_obj_call_init(rb_node, argc, argv);
5446

ext/nokogiri/xml_comment.c

+3-8
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,20 @@ new (int argc, VALUE *argv, VALUE klass)
2323

2424
rb_scan_args(argc, argv, "2*", &document, &content, &rest);
2525

26+
Check_Type(content, T_STRING);
2627
if (rb_obj_is_kind_of(document, cNokogiriXmlNode)) {
2728
document = rb_funcall(document, document_id, 0);
2829
} else if (!rb_obj_is_kind_of(document, cNokogiriXmlDocument)
2930
&& !rb_obj_is_kind_of(document, cNokogiriXmlDocumentFragment)) {
3031
rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
3132
}
32-
3333
xml_doc = noko_xml_document_unwrap(document);
3434

35-
node = xmlNewDocComment(
36-
xml_doc,
37-
(const xmlChar *)StringValueCStr(content)
38-
);
39-
35+
node = xmlNewDocComment(xml_doc, (const xmlChar *)StringValueCStr(content));
36+
noko_xml_document_pin_node(node);
4037
rb_node = noko_xml_node_wrap(klass, node);
4138
rb_obj_call_init(rb_node, argc, argv);
4239

43-
noko_xml_document_pin_node(node);
44-
4540
if (rb_block_given_p()) { rb_yield(rb_node); }
4641

4742
return rb_node;

ext/nokogiri/xml_document.c

+2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
1717
break;
1818
default:
1919
if (node->parent == NULL) {
20+
node->next = NULL;
21+
node->prev = NULL;
2022
xmlAddChild((xmlNodePtr)doc, node);
2123
}
2224
}

ext/nokogiri/xml_node.c

+14-15
Original file line numberDiff line numberDiff line change
@@ -1893,22 +1893,21 @@ output_node(
18931893
}
18941894
break;
18951895

1896-
case XML_ATTRIBUTE_NODE:
1897-
{
1898-
xmlAttrPtr attr = (xmlAttrPtr)node;
1899-
output_attr_name(out, attr);
1900-
if (attr->children) {
1901-
output_string(out, "=\"");
1902-
xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1903-
output_escaped_string(out, value, true);
1904-
xmlFree(value);
1905-
output_char(out, '"');
1906-
} else {
1907-
// Output name=""
1908-
output_string(out, "=\"\"");
1909-
}
1896+
case XML_ATTRIBUTE_NODE: {
1897+
xmlAttrPtr attr = (xmlAttrPtr)node;
1898+
output_attr_name(out, attr);
1899+
if (attr->children) {
1900+
output_string(out, "=\"");
1901+
xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1902+
output_escaped_string(out, value, true);
1903+
xmlFree(value);
1904+
output_char(out, '"');
1905+
} else {
1906+
// Output name=""
1907+
output_string(out, "=\"\"");
19101908
}
1911-
break;
1909+
}
1910+
break;
19121911

19131912
case XML_TEXT_NODE:
19141913
if (node->parent

ext/nokogiri/xml_text.c

+2-4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ rb_xml_text_s_new(int argc, VALUE *argv, VALUE klass)
2020

2121
rb_scan_args(argc, argv, "2*", &rb_string, &rb_document, &rb_rest);
2222

23+
Check_Type(rb_string, T_STRING);
2324
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
2425
rb_raise(rb_eTypeError,
2526
"expected second parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
@@ -35,11 +36,8 @@ rb_xml_text_s_new(int argc, VALUE *argv, VALUE klass)
3536
c_document = noko_xml_document_unwrap(rb_document);
3637
}
3738

38-
c_node = xmlNewText((xmlChar *)StringValueCStr(rb_string));
39-
c_node->doc = c_document;
40-
39+
c_node = xmlNewDocText(c_document, (xmlChar *)StringValueCStr(rb_string));
4140
noko_xml_document_pin_node(c_node);
42-
4341
rb_node = noko_xml_node_wrap(klass, c_node) ;
4442
rb_obj_call_init(rb_node, argc, argv);
4543

rakelib/test.rake

+2-3
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,12 @@ end
109109

110110
def nokogiri_test_task_configuration(t)
111111
t.libs << "test"
112-
# t.verbose = true # This is noisier than we need. Commenting out 2024-03-07.
113-
# t.options = "-v" if ENV["CI"] # I haven't needed this in a long time. Commenting out 2023-12-10.
112+
t.verbose = true if ENV["TESTGLOB"]
114113
end
115114

116115
def nokogiri_test_case_configuration(t)
117116
nokogiri_test_task_configuration(t)
118-
t.test_files = FileList["test/**/test_*.rb"]
117+
t.test_files = FileList[ENV["TESTGLOB"] || "test/**/test_*.rb"]
119118
end
120119

121120
def nokogiri_test_bench_configuration(t)

test/xml/test_cdata.rb

+4-5
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,10 @@
2828
assert_same(doc, node.document)
2929
end
3030

31-
it "has nil content when passed nil" do
32-
node = Nokogiri::XML::CDATA.new(Nokogiri::XML::Document.new, nil)
33-
34-
assert_instance_of(Nokogiri::XML::CDATA, node)
35-
assert_nil(node.content)
31+
it "when passed nil raises TypeError" do
32+
assert_raises(TypeError) do
33+
Nokogiri::XML::CDATA.new(Nokogiri::XML::Document.new, nil)
34+
end
3635
end
3736

3837
it "does not accept anything but a string" do

test/xml/test_node_reparenting.rb

+10-2
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,11 @@ def coerce(data)
623623
assert_equal "after", after.content
624624
refute_nil after.parent, "unrelated node should not be affected"
625625

626-
assert_equal "before", before.content
626+
if Nokogiri.uses_libxml?(">= 2.13.0")
627+
assert_equal "beforex", before.content # coalescing fixed in gnome/libxml2@4ccd3eb8
628+
else
629+
assert_equal "before", before.content
630+
end
627631
refute_nil before.parent, "no need to reparent"
628632
end
629633
end
@@ -662,7 +666,11 @@ def coerce(data)
662666
assert_equal "before", before.content
663667
refute_nil before.parent, "unrelated node should not be affected"
664668

665-
assert_equal "after", after.content
669+
if Nokogiri.uses_libxml?(">= 2.13.0")
670+
assert_equal "xafter", after.content # coalescing fixed in gnome/libxml2@4ccd3eb8
671+
else
672+
assert_equal "after", after.content
673+
end
666674
refute_nil after.parent
667675
end
668676
end

0 commit comments

Comments
 (0)