From 3c850814f328b62fc68e9d6cec57e74f0255b48b Mon Sep 17 00:00:00 2001
From: Stephen Checkoway <s@pahtak.org>
Date: Fri, 7 Jun 2024 14:20:45 -0400
Subject: [PATCH 1/3] Build libgumbo via autotools and rebuild on source
 changes

- Implement a minimal autotools build for libgumbo.
- Make `rake gumbo:test` use the same libgumbo.a used to build the
  nokogiri extension.
- Make changes to `gumbo-parser/src` trigger a rebuild of libgumbo.
---
 ext/nokogiri/extconf.rb                 | 60 ++++++-------------------
 gumbo-parser/.gitignore                 | 15 ++++---
 gumbo-parser/Makefile.am                |  5 +++
 gumbo-parser/{Makefile => Makefile.old} |  0
 gumbo-parser/build-aux/.gitignore       |  4 ++
 gumbo-parser/configure.ac               | 11 +++++
 gumbo-parser/src/Makefile               | 34 --------------
 gumbo-parser/src/Makefile.am            | 20 +++++++++
 gumbo-parser/test/Makefile.am           | 21 +++++++++
 nokogiri.gemspec                        |  8 +++-
 rakelib/extensions.rake                 |  2 +-
 rakelib/gumbo.rake                      | 38 ++++++++++------
 12 files changed, 116 insertions(+), 102 deletions(-)
 create mode 100644 gumbo-parser/Makefile.am
 rename gumbo-parser/{Makefile => Makefile.old} (100%)
 create mode 100644 gumbo-parser/build-aux/.gitignore
 create mode 100644 gumbo-parser/configure.ac
 delete mode 100644 gumbo-parser/src/Makefile
 create mode 100644 gumbo-parser/src/Makefile.am
 create mode 100644 gumbo-parser/test/Makefile.am

diff --git a/ext/nokogiri/extconf.rb b/ext/nokogiri/extconf.rb
index 8ea793d2a43..4d0d627fb43 100644
--- a/ext/nokogiri/extconf.rb
+++ b/ext/nokogiri/extconf.rb
@@ -1056,54 +1056,8 @@ def configure
   find_header("nokogiri_gumbo.h") || abort("nokogiri_gumbo.h not found")
 else
   libgumbo_recipe = process_recipe("libgumbo", "1.0.0-nokogiri", static_p, cross_build_p, false) do |recipe|
-    recipe.configure_options = []
-
-    class << recipe
-      def downloaded?
-        true
-      end
-
-      def extract
-        target = File.join(tmp_path, "gumbo-parser")
-        output("Copying gumbo-parser files into #{target}...")
-        FileUtils.mkdir_p(target)
-        FileUtils.cp(Dir.glob(File.join(PACKAGE_ROOT_DIR, "gumbo-parser/src/*")), target)
-      end
-
-      def configured?
-        true
-      end
-
-      def install
-        lib_dir = File.join(port_path, "lib")
-        inc_dir = File.join(port_path, "include")
-        FileUtils.mkdir_p([lib_dir, inc_dir])
-        FileUtils.cp(File.join(work_path, "libgumbo.a"), lib_dir)
-        FileUtils.cp(Dir.glob(File.join(work_path, "*.h")), inc_dir)
-      end
-
-      def compile
-        cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-O2", "-g")
-
-        env = { "CC" => gcc_cmd, "CFLAGS" => cflags }
-        if config_cross_build?
-          if host.include?("darwin")
-            env["AR"] = "#{host}-libtool"
-            env["ARFLAGS"] = "-o"
-          else
-            env["AR"] = "#{host}-ar"
-          end
-          env["RANLIB"] = "#{host}-ranlib"
-        end
-
-        execute("compile", make_cmd, { env: env })
-      end
-    end
+    recipe.source_directory = File.join(PACKAGE_ROOT_DIR, "gumbo-parser")
   end
-  append_cppflags("-I#{File.join(libgumbo_recipe.path, "include")}")
-  $libs = $libs + " " + File.join(libgumbo_recipe.path, "lib", "libgumbo.a")
-  $LIBPATH = $LIBPATH | [File.join(libgumbo_recipe.path, "lib")]
-  ensure_func("gumbo_parse_with_options", "nokogiri_gumbo.h")
 end
 
 have_func("xmlHasFeature") || abort("xmlHasFeature() is missing.") # introduced in libxml 2.6.21
@@ -1142,11 +1096,23 @@ def compile
   File.open("Makefile", "at") do |mk|
     mk.print(<<~EOF)
 
+      .PHONY: clean-ports
       all: clean-ports
       clean-ports: $(DLLIB)
       \t-$(Q)$(RUBY) $(srcdir)/extconf.rb --clean --#{static_p ? "enable" : "disable"}-static
     EOF
   end
+  File.open("Makefile", "at") do |mk|
+    mk.print(<<~EOF)
+
+      .PHONY: rebuild-libgumbo
+
+      $(TARGET_SO): rebuild-libgumbo
+      rebuild-libgumbo:
+      \t-$(Q)$(MAKE) -C tmp/#{libgumbo_recipe.host}/ports/libgumbo/1.0.0-nokogiri/libgumbo-1.0.0-nokogiri install
+    EOF
+  end
+
 end
 
 # rubocop:enable Style/GlobalVars
diff --git a/gumbo-parser/.gitignore b/gumbo-parser/.gitignore
index 37a46fc2b3d..afe2b1da42c 100644
--- a/gumbo-parser/.gitignore
+++ b/gumbo-parser/.gitignore
@@ -1,5 +1,10 @@
-build
-googletest
-src/*.o
-fuzzer/build
-src/libgumbo.a
\ No newline at end of file
+/build
+/googletest
+/src/*.o
+/fuzzer/build
+/src/libgumbo.a
+/aclocal.m4
+/autom4te.cache/
+/configure
+/configure.in
+Makefile.in
diff --git a/gumbo-parser/Makefile.am b/gumbo-parser/Makefile.am
new file mode 100644
index 00000000000..591ff85c08a
--- /dev/null
+++ b/gumbo-parser/Makefile.am
@@ -0,0 +1,5 @@
+if HAS_TESTS
+  SUBDIRS = src test
+else
+  SUBDIRS = src
+endif
diff --git a/gumbo-parser/Makefile b/gumbo-parser/Makefile.old
similarity index 100%
rename from gumbo-parser/Makefile
rename to gumbo-parser/Makefile.old
diff --git a/gumbo-parser/build-aux/.gitignore b/gumbo-parser/build-aux/.gitignore
new file mode 100644
index 00000000000..72fd7022e87
--- /dev/null
+++ b/gumbo-parser/build-aux/.gitignore
@@ -0,0 +1,4 @@
+# Ignore everything in this directory except for this file.
+# Credit: https://stackoverflow.com/a/932982
+*
+!/.gitignore
diff --git a/gumbo-parser/configure.ac b/gumbo-parser/configure.ac
new file mode 100644
index 00000000000..ba8608597df
--- /dev/null
+++ b/gumbo-parser/configure.ac
@@ -0,0 +1,11 @@
+AC_INIT([Gumbo], [1.0])
+AC_CONFIG_SRCDIR([src/nokogiri_gumbo.h])
+AC_CONFIG_AUX_DIR([build-aux])
+AM_INIT_AUTOMAKE([subdir-objects foreign serial-tests no-dist no-installinfo no-installman -Wall])
+AC_PROG_RANLIB
+AC_PROG_CC
+AC_PROG_CXX
+AM_PROG_AR
+AM_CONDITIONAL([HAS_TESTS], [test -d "${srcdir}/test"])
+AC_CONFIG_FILES([Makefile src/Makefile test/Makefile])
+AC_OUTPUT
diff --git a/gumbo-parser/src/Makefile b/gumbo-parser/src/Makefile
deleted file mode 100644
index 6bd4a18fbed..00000000000
--- a/gumbo-parser/src/Makefile
+++ /dev/null
@@ -1,34 +0,0 @@
-# this Makefile is used by ext/nokogiri/extconf.rb
-# to enable a mini_portile2 recipe to build the gumbo parser
-.PHONY: clean
-
-CFLAGS += -std=c99 -Wall
-
-# allow the ENV var to override this
-RANLIB ?= ranlib
-
-gumbo_objs := \
-	ascii.o \
-	attribute.o \
-	char_ref.o \
-	error.o \
-	foreign_attrs.o \
-	parser.o \
-	string_buffer.o \
-	string_piece.o \
-	svg_attrs.o \
-	svg_tags.o \
-	tag.o \
-	tag_lookup.o \
-	token_buffer.o \
-	tokenizer.o \
-	utf8.o \
-	util.o \
-	vector.o
-
-libgumbo.a: $(gumbo_objs)
-	$(AR) $(ARFLAGS) $@ $(gumbo_objs)
-	- ($(RANLIB) $@ || true) >/dev/null 2>&1
-
-clean:
-	rm -f $(gumbo_objs) libgumbo.a
diff --git a/gumbo-parser/src/Makefile.am b/gumbo-parser/src/Makefile.am
new file mode 100644
index 00000000000..f41055b415f
--- /dev/null
+++ b/gumbo-parser/src/Makefile.am
@@ -0,0 +1,20 @@
+lib_LIBRARIES = libgumbo.a
+libgumbo_a_SOURCES = \
+	ascii.c \
+	attribute.c \
+	char_ref.c \
+	error.c \
+	foreign_attrs.c \
+	parser.c \
+	string_buffer.c \
+	string_piece.c \
+	svg_attrs.c \
+	svg_tags.c \
+	tag.c \
+	tag_lookup.c \
+	token_buffer.c \
+	tokenizer.c \
+	utf8.c \
+	util.c \
+	vector.c
+include_HEADERS = nokogiri_gumbo.h
diff --git a/gumbo-parser/test/Makefile.am b/gumbo-parser/test/Makefile.am
new file mode 100644
index 00000000000..e5ad1de2b70
--- /dev/null
+++ b/gumbo-parser/test/Makefile.am
@@ -0,0 +1,21 @@
+check_LIBRARIES = libgtest_main.a
+libgtest_main_a_SOURCES = ../googletest/src/gtest-all.cc ../googletest/src/gtest_main.cc
+libgtest_main_a_CPPFLAGS = -I$(top_srcdir)/googletest/include -I$(top_srcdir)/googletest
+libgtest_main_a_CXXFLAGS = -pthread
+
+check_PROGRAMS = gumbotest
+gumbotest_SOURCES = attribute.cc \
+    parser.cc \
+    string_buffer.cc \
+    string_piece.cc \
+    test_utils.cc \
+    token_buffer.cc \
+    tokenizer.cc \
+    utf8.cc \
+    vector.cc
+
+gumbotest_LDADD = libgtest_main.a ../src/libgumbo.a
+gumbotest_LDFLAGS = -pthread
+gumbotest_CPPFLAGS = -I$(top_srcdir)/googletest/include -I$(top_srcdir)/src
+
+TESTS = gumbotest
diff --git a/nokogiri.gemspec b/nokogiri.gemspec
index 44322172004..da30f102230 100644
--- a/nokogiri.gemspec
+++ b/nokogiri.gemspec
@@ -182,9 +182,13 @@ Gem::Specification.new do |spec|
     "ext/nokogiri/xml_xpath_context.c",
     "ext/nokogiri/xslt_stylesheet.c",
     "gumbo-parser/CHANGES.md",
-    "gumbo-parser/Makefile",
+    "gumbo-parser/Makefile.am",
+    "gumbo-parser/Makefile.in",
     "gumbo-parser/THANKS",
-    "gumbo-parser/src/Makefile",
+    "gumbo-parser/configure",
+    "gumbo-parser/configure.ac",
+    "gumbo-parser/src/Makefile.am",
+    "gumbo-parser/src/Makefile.in",
     "gumbo-parser/src/README.md",
     "gumbo-parser/src/ascii.c",
     "gumbo-parser/src/ascii.h",
diff --git a/rakelib/extensions.rake b/rakelib/extensions.rake
index 66867f407f2..0cc974f82ec 100644
--- a/rakelib/extensions.rake
+++ b/rakelib/extensions.rake
@@ -444,7 +444,7 @@ else
   end
 
   Rake::ExtensionTask.new("nokogiri", NOKOGIRI_SPEC.dup) do |ext|
-    ext.source_pattern = "*.{c,cc,cpp,h}"
+    ext.source_pattern = "{.,../../gumbo-parser/src}/*.{c,cc,cpp,h}"
     ext.gem_spec.files.reject! { |path| File.fnmatch?("**/*.{java,jar}", path, File::FNM_EXTGLOB) }
 
     ext.lib_dir = File.join(*["lib", "nokogiri", ENV["FAT_DIR"]].compact)
diff --git a/rakelib/gumbo.rake b/rakelib/gumbo.rake
index f574c2697c8..9dfccd62c4e 100644
--- a/rakelib/gumbo.rake
+++ b/rakelib/gumbo.rake
@@ -1,12 +1,19 @@
 # frozen_string_literal: true
 
 namespace "gumbo" do
-  gtest_pkg = "gumbo-parser/googletest"
-  gtest_lib = File.join(gtest_pkg, "make/gtest_main.a")
+  # We want to run the gumbo test suite using exactly the same compiled gumbo-parser
+  # that Nokogiri uses.
+  #
+  # To that end, we first need to get the Rake ExtensionTask to run extconf.rb which will
+  # run the gumbo-parser's configure script. We don't want to compile the extension
+  # at this point, so we make `gumbo:test` depend on the Nokogiri Makefile.
 
-  file gtest_lib => gtest_pkg do
-    sh("make -C gumbo-parser/googletest/make gtest_main.a")
-  end
+  gtest_pkg = "gumbo-parser/googletest"
+  host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
+  host = host.gsub("i386", "i686")
+  nokogiri_makefile = File.join("tmp/#{RUBY_PLATFORM}/nokogiri/#{RUBY_VERSION}/Makefile")
+  gumbotest_builddir = "tmp/#{RUBY_PLATFORM}/nokogiri/#{RUBY_VERSION}/tmp/#{host}/ports/libgumbo/1.0.0-nokogiri/libgumbo-1.0.0-nokogiri"
+  gumbotest_configure = File.absolute_path("gumbo-parser/configure")
 
   file gtest_pkg do
     sh(<<~EOF)
@@ -15,20 +22,25 @@ namespace "gumbo" do
     EOF
   end
 
-  desc "Run the gumbo parser test suite"
-  task "test" => gtest_lib do
-    sh("make -j2 -C gumbo-parser")
+  file gumbotest_configure => gtest_pkg do
+    sh("autoreconf", "-fiv", chdir: "gumbo-parser")
   end
 
-  desc "Clean up after the gumbo parser test suite"
-  task "clean" do
-    sh("make -j2 -C gumbo-parser clean")
+  desc "Run the gumbo parser test suite"
+  task "test" => nokogiri_makefile do
+    sh("make", "-j2", "-C", gumbotest_builddir, "check")
   end
 
+  # Make sure the libgumbo configure script is created before trying to compile the extension.
+  file nokogiri_makefile => gumbotest_configure
+
   CLOBBER.add(gtest_pkg)
+  CLOBBER.add(gumbotest_configure)
+  CLOBBER.add("gumbo-parser/Makefile.in")
+  CLOBBER.add("gumbo-parser/configure")
+  CLOBBER.add("gumbo-parser/src/Makefile.in")
+  CLOBBER.add("gumbo-parser/test/Makefile.in")
 end
 
 desc "Run the gumbo parser test suite"
 task "gumbo" => "gumbo:test"
-
-task "clean" => "gumbo:clean" # rubocop:disable Rake/Desc

From 7a6613cb376bbf772548431e8ff814bf240ae1d1 Mon Sep 17 00:00:00 2001
From: Mike Dalessio <mike.dalessio@gmail.com>
Date: Fri, 7 Jun 2024 15:56:54 -0400
Subject: [PATCH 2/3] Small changes to the libgumbo build branch

- remove the `rebuild-libgumbo` target which is probably not needed
  once #3220 is merged
- avoid downloading googletest when just running `rake compile`
- update the Manifest check to ignore new gumbo-parser/ files
- simplify the 'host' variable in gumbo.rake, since we're only using
  it in development (and not cross-compiling)
- put back the libgumbo $libs, $LIBPATH, and include flags modifications
- make sure libgumbo is built static
---
 ext/nokogiri/extconf.rb     | 17 +++++------------
 rakelib/check-manifest.rake |  5 +++++
 rakelib/gumbo.rake          | 28 +++++++++++++---------------
 3 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/ext/nokogiri/extconf.rb b/ext/nokogiri/extconf.rb
index 4d0d627fb43..5fd9d7c0e0d 100644
--- a/ext/nokogiri/extconf.rb
+++ b/ext/nokogiri/extconf.rb
@@ -1055,9 +1055,13 @@ def configure
   $VPATH << "$(srcdir)/../../gumbo-parser/src"
   find_header("nokogiri_gumbo.h") || abort("nokogiri_gumbo.h not found")
 else
-  libgumbo_recipe = process_recipe("libgumbo", "1.0.0-nokogiri", static_p, cross_build_p, false) do |recipe|
+  libgumbo_recipe = process_recipe("libgumbo", "1.0.0-nokogiri", true, cross_build_p, false) do |recipe|
     recipe.source_directory = File.join(PACKAGE_ROOT_DIR, "gumbo-parser")
   end
+  append_cppflags("-I#{File.join(libgumbo_recipe.path, "include")}")
+  $libs = $libs + " " + File.join(libgumbo_recipe.path, "lib", "libgumbo.a")
+  $LIBPATH = $LIBPATH | [File.join(libgumbo_recipe.path, "lib")]
+  ensure_func("gumbo_parse_with_options", "nokogiri_gumbo.h")
 end
 
 have_func("xmlHasFeature") || abort("xmlHasFeature() is missing.") # introduced in libxml 2.6.21
@@ -1102,17 +1106,6 @@ def configure
       \t-$(Q)$(RUBY) $(srcdir)/extconf.rb --clean --#{static_p ? "enable" : "disable"}-static
     EOF
   end
-  File.open("Makefile", "at") do |mk|
-    mk.print(<<~EOF)
-
-      .PHONY: rebuild-libgumbo
-
-      $(TARGET_SO): rebuild-libgumbo
-      rebuild-libgumbo:
-      \t-$(Q)$(MAKE) -C tmp/#{libgumbo_recipe.host}/ports/libgumbo/1.0.0-nokogiri/libgumbo-1.0.0-nokogiri install
-    EOF
-  end
-
 end
 
 # rubocop:enable Style/GlobalVars
diff --git a/rakelib/check-manifest.rake b/rakelib/check-manifest.rake
index 50ca2d53eb5..53e391b9923 100644
--- a/rakelib/check-manifest.rake
+++ b/rakelib/check-manifest.rake
@@ -56,7 +56,12 @@ task :check_manifest, [:verbose] do |_, args|
     [0-9]*
     appveyor.yml
     **/compile_commands.json
+    gumbo-parser/Makefile.old
+    gumbo-parser/aclocal.m4
+    gumbo-parser/autom4te.cache/*
+    gumbo-parser/build-aux/*
     gumbo-parser/fuzzer/*
+    gumbo-parser/googletest/*
     gumbo-parser/test/*
     gumbo-parser/gperf-filter.sed
     lib/nokogiri/**/nokogiri.{jar,so}
diff --git a/rakelib/gumbo.rake b/rakelib/gumbo.rake
index 9dfccd62c4e..fadfadf8910 100644
--- a/rakelib/gumbo.rake
+++ b/rakelib/gumbo.rake
@@ -1,20 +1,18 @@
 # frozen_string_literal: true
 
+# We want to run the gumbo test suite using exactly the same compiled gumbo-parser
+# that Nokogiri uses.
+#
+# To that end, we first need to get the Rake ExtensionTask to run extconf.rb which will
+# run the gumbo-parser's configure script. We don't want to compile the extension
+# at this point, so we make `gumbo:test` depend on the Nokogiri Makefile.
 namespace "gumbo" do
-  # We want to run the gumbo test suite using exactly the same compiled gumbo-parser
-  # that Nokogiri uses.
-  #
-  # To that end, we first need to get the Rake ExtensionTask to run extconf.rb which will
-  # run the gumbo-parser's configure script. We don't want to compile the extension
-  # at this point, so we make `gumbo:test` depend on the Nokogiri Makefile.
-
-  gtest_pkg = "gumbo-parser/googletest"
-  host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
-  host = host.gsub("i386", "i686")
+  host = RbConfig::CONFIG["host"]
   nokogiri_makefile = File.join("tmp/#{RUBY_PLATFORM}/nokogiri/#{RUBY_VERSION}/Makefile")
   gumbotest_builddir = "tmp/#{RUBY_PLATFORM}/nokogiri/#{RUBY_VERSION}/tmp/#{host}/ports/libgumbo/1.0.0-nokogiri/libgumbo-1.0.0-nokogiri"
   gumbotest_configure = File.absolute_path("gumbo-parser/configure")
 
+  gtest_pkg = "gumbo-parser/googletest"
   file gtest_pkg do
     sh(<<~EOF)
       curl -L https://github.com/google/googletest/archive/release-1.8.0.tar.gz | \
@@ -22,15 +20,15 @@ namespace "gumbo" do
     EOF
   end
 
-  file gumbotest_configure => gtest_pkg do
-    sh("autoreconf", "-fiv", chdir: "gumbo-parser")
-  end
-
   desc "Run the gumbo parser test suite"
-  task "test" => nokogiri_makefile do
+  task "test" => [nokogiri_makefile, gtest_pkg] do
     sh("make", "-j2", "-C", gumbotest_builddir, "check")
   end
 
+  file gumbotest_configure do
+    sh("autoreconf", "-fiv", chdir: "gumbo-parser")
+  end
+
   # Make sure the libgumbo configure script is created before trying to compile the extension.
   file nokogiri_makefile => gumbotest_configure
 

From 6e6b368fc0d6d91291ae684cca3b2ec469e77659 Mon Sep 17 00:00:00 2001
From: Mike Dalessio <mike.dalessio@gmail.com>
Date: Fri, 7 Jun 2024 22:21:43 -0400
Subject: [PATCH 3/3] dev: make sure clobber removes all autoconf-generated
 files

and make sure autoconf is installed in the basic ubuntu container in CI
---
 .github/workflows/ci.yml | 1 +
 rakelib/gumbo.rake       | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0f17a12cdbb..5c255c0dd13 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -46,6 +46,7 @@ jobs:
       - uses: actions/checkout@v1 # v1 because of https://github.com/actions/checkout/issues/334
         with:
           submodules: true
+      - run: apt install -y autoconf
       - run: bundle install --local || bundle install
       - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries
       - run: bundle exec rake test
diff --git a/rakelib/gumbo.rake b/rakelib/gumbo.rake
index fadfadf8910..4ced6621278 100644
--- a/rakelib/gumbo.rake
+++ b/rakelib/gumbo.rake
@@ -35,9 +35,11 @@ namespace "gumbo" do
   CLOBBER.add(gtest_pkg)
   CLOBBER.add(gumbotest_configure)
   CLOBBER.add("gumbo-parser/Makefile.in")
-  CLOBBER.add("gumbo-parser/configure")
   CLOBBER.add("gumbo-parser/src/Makefile.in")
   CLOBBER.add("gumbo-parser/test/Makefile.in")
+  CLOBBER.add("gumbo-parser/build-aux/*")
+  CLOBBER.add("gumbo-parser/autom4te.cache")
+  CLOBBER.add("gumbo-parser/aclocal.m4")
 end
 
 desc "Run the gumbo parser test suite"