From cf30f727f568d2e7c2d3c992c5fa5740049fd73b Mon Sep 17 00:00:00 2001 From: Reece Dunham Date: Sat, 18 Apr 2026 16:35:34 -0400 Subject: [PATCH] Fix use after free Between v1.4.1 and v1.5.1.1, 6793d14972f69edf2433d06c1cca9c183e967bd1 added a GC mark function to the `SyckNode` Data wrapper created inside `rb_syck_load_handler`. The change fixed one crash (GC collecting node children mid-parse) but opened a new one: the parser frees the node the moment the handler returns, so any later GC that still reaches the wrapper, via a conservative stack root, calls the mark function on a dangling pointer. --- ext/syck/rubyext.c | 14 +++++++++++++- test/test_gc.rb | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 test/test_gc.rb diff --git a/ext/syck/rubyext.c b/ext/syck/rubyext.c index 73e5f98..45de5aa 100644 --- a/ext/syck/rubyext.c +++ b/ext/syck/rubyext.c @@ -645,6 +645,7 @@ SYMID rb_syck_load_handler(SyckParser *p, SyckNode *n) { VALUE obj = Qnil; + VALUE node_wrapper; struct parser_xtra *bonus = (struct parser_xtra *)p->bonus; VALUE resolver = bonus->resolver; if ( NIL_P( resolver ) ) @@ -655,7 +656,17 @@ rb_syck_load_handler(SyckParser *p, SyckNode *n) /* * Create node, */ - obj = rb_funcall( resolver, s_node_import, 1, Data_Wrap_Struct( cNode, syck_node_mark, NULL, n ) ); + node_wrapper = Data_Wrap_Struct( cNode, syck_node_mark, NULL, n ); + obj = rb_funcall( resolver, s_node_import, 1, node_wrapper ); + + /* + * syck_hdlr_add_node frees `n` as soon as we return (handler.c:25, when + * the node has no anchor). Ruby may still keep `node_wrapper` reachable + * via a conservative GC root on the machine stack, so the next GC would + * call syck_node_mark() with a dangling SyckNode*. Zero the wrapper's + * data pointer to make the mark a no-op. See ruby/syck#50. + */ + DATA_PTR( node_wrapper ) = NULL; /* * ID already set, let's alter the symbol table to accept the new object @@ -1413,6 +1424,7 @@ static void syck_node_mark(SyckNode *n) { int i; + if ( n == NULL ) return; rb_gc_mark_maybe( n->id ); switch ( n->kind ) { diff --git a/test/test_gc.rb b/test/test_gc.rb new file mode 100644 index 0000000..48452d2 --- /dev/null +++ b/test/test_gc.rb @@ -0,0 +1,33 @@ +require_relative "helper" + +# Regression test for ruby/syck#50 - heap-use-after-free in syck_node_mark +# when GC runs between syck_hdlr_add_node freeing a SyckNode and the Ruby +# Data wrapper that pointed at it being collected. +# +# GC.stress forces a full GC on every allocation, so if the fix regresses +# this test will segfault (or fire under ASAN/valgrind). +class TestGC < Test::Unit::TestCase + def test_load_under_gc_stress + yaml = ({}.tap { |h| 50.times { |i| h["k#{i}"] = "v" * 64 } }).to_yaml + + GC.stress = true + begin + 3.times { Syck.load(yaml) } + ensure + GC.stress = false + end + end + + def test_load_many_documents_under_gc_stress + docs = Array.new(5) do |i| + ({}.tap { |h| 30.times { |k| h["k_#{i}_#{k}"] = "v_#{i}_#{k}" } }).to_yaml + end + + GC.stress = true + begin + docs.each { |d| Syck.load(d) } + ensure + GC.stress = false + end + end +end