Merge pull request #470 from bazsi/filterx-allow-stack-based-string-wrappers

alltilla · web-flow · commit cbaba643ab50 · 2025-02-07T13:41:11.000+01:00
Filterx allow stack based string wrappers
diff --git a/lib/filterx/filterx-object.c b/lib/filterx/filterx-object.c
@@ -119,7 +119,7 @@ filterx_object_freeze(FilterXObject *self)
   if (filterx_object_is_frozen(self))
     return FALSE;
   g_assert(g_atomic_counter_get(&self->ref_cnt) == 1);
-  g_atomic_counter_set(&self->ref_cnt, FILTERX_OBJECT_MAGIC_BIAS);
+  g_atomic_counter_set(&self->ref_cnt, FILTERX_OBJECT_REFCOUNT_FROZEN);
   return TRUE;
 }
 
diff --git a/lib/filterx/filterx-object.h b/lib/filterx/filterx-object.h
@@ -74,7 +74,9 @@ void _filterx_type_init_methods(FilterXType *type);
       __VA_ARGS__       \
     }
 
-#define FILTERX_OBJECT_MAGIC_BIAS G_MAXINT32
+#define FILTERX_OBJECT_REFCOUNT_FROZEN (G_MAXINT32)
+#define FILTERX_OBJECT_REFCOUNT_STACK  (G_MAXINT32-1)
+#define FILTERX_OBJECT_REFCOUNT_OFLOW_MARK (FILTERX_OBJECT_REFCOUNT_STACK-1024)
 
 
 FILTERX_DECLARE_TYPE(object);
@@ -110,7 +112,7 @@ void filterx_object_free_method(FilterXObject *self);
 static inline gboolean
 filterx_object_is_frozen(FilterXObject *self)
 {
-  return g_atomic_counter_get(&self->ref_cnt) == FILTERX_OBJECT_MAGIC_BIAS;
+  return g_atomic_counter_get(&self->ref_cnt) == FILTERX_OBJECT_REFCOUNT_FROZEN;
 }
 
 static inline FilterXObject *
@@ -119,12 +121,53 @@ filterx_object_ref(FilterXObject *self)
   if (!self)
     return NULL;
 
-  if (filterx_object_is_frozen(self))
-    return self;
+  gint r = g_atomic_counter_get(&self->ref_cnt);
+  if (r < FILTERX_OBJECT_REFCOUNT_OFLOW_MARK && r > 0)
+    {
+      /* NOTE: getting into this path is racy, as two threads might be
+       * checking the overflow mark in parallel and then decide we need to
+       * run this (normal) path.  In this case, the race could cause ref_cnt
+       * to reach FILTERX_OBJECT_REFCOUNT_OFLOW_MARK, without triggering the
+       * overflow assert below.
+       *
+       * To mitigate this, FILTERX_OBJECT_REFCOUNT_OFLOW_MARK is set to 1024
+       * less than the first value that we handle specially.  This means
+       * that even if the race is lost, we would need 1024 competing CPUs
+       * concurrently losing the race and incrementing ref_cnt here.  And
+       * even in this case the only issue is that we don't detect an actual
+       * overflow at runtime that should never occur in the first place.
+       *
+       * This is _really_ unlikely, and we will detect ref_cnt overflows in
+       * non-doom scenarios first, so we can address the actual issue (which
+       * might be a reference counting bug somewhere).
+       *
+       * If less than 1024 CPUs lose the race, then the refcount would end
+       * up in the range between FILTERX_OBJECT_REFCOUNT_OFLOW_MARK and
+       * FILTERX_OBJECT_REFCOUNT_STACK, causing the assertion at the end of
+       * this function to trigger an abort.
+       *
+       * The non-racy solution would be to use a
+       * g_atomic_int_exchange_and_add() call and checking the old_value
+       * against FILTERX_OBJECT_REFCOUNT_OFLOW_MARK another time, but that's
+       * an extra conditional in a hot-path.
+       */
+
+      g_atomic_counter_inc(&self->ref_cnt);
+      return self;
+    }
 
-  g_atomic_counter_inc(&self->ref_cnt);
+  if (r == FILTERX_OBJECT_REFCOUNT_FROZEN)
+    return self;
 
-  return self;
+  if (r == FILTERX_OBJECT_REFCOUNT_STACK)
+    {
+      /* we can't use filterx_object_clone() directly, as that's an inline
+       * function declared further below.  Also, filterx_object_clone() does
+       * not clone inmutable objects.  We only support allocating inmutable
+       * objects on the stack */
+      return self->type->clone(self);
+    }
+  g_assert_not_reached();
 }
 
 static inline void
@@ -133,10 +176,30 @@ filterx_object_unref(FilterXObject *self)
   if (!self)
     return;
 
-  if (filterx_object_is_frozen(self))
+  gint r = g_atomic_counter_get(&self->ref_cnt);
+  if (r == FILTERX_OBJECT_REFCOUNT_FROZEN)
     return;
+  if (r == FILTERX_OBJECT_REFCOUNT_STACK)
+    {
+      /* NOTE: Normally, stack based allocations are only used by a single
+       * thread.  Furthermore, code where we use this object will only have
+       * a borrowed reference, e.g.  it can't cross thread boundaries.  With
+       * that said, even though the condition of this if() statement is
+       * racy, it's not actually racing, as we only have a single relevant
+       * thread.
+       *
+       * In the rare case where we do want to pass a stack based allocation
+       * to another thread, we would need to pass a new reference to it, and
+       * filterx_object_ref() clones a new object in this case, which again
+       * means, that we won't have actual race here.
+       */
+
+      g_atomic_counter_set(&self->ref_cnt, 0);
+      return;
+    }
+  if (r <= 0)
+    g_assert_not_reached();
 
-  g_assert(g_atomic_counter_get(&self->ref_cnt) > 0);
   if (g_atomic_counter_dec_and_test(&self->ref_cnt))
     {
       self->type->free_fn(self);
@@ -354,4 +417,14 @@ filterx_object_set_modified_in_place(FilterXObject *self, gboolean modified)
   self->modified_in_place = modified;
 }
 
+#define FILTERX_OBJECT_STACK_INIT(_type) \
+  { \
+    .ref_cnt = { .counter = FILTERX_OBJECT_REFCOUNT_STACK }, \
+    .fx_ref_cnt = { .counter = 0 }, \
+    .modified_in_place = FALSE, \
+    .readonly = TRUE, \
+    .weak_referenced = FALSE, \
+    .type = &FILTERX_TYPE_NAME(_type) \
+  }
+
 #endif
diff --git a/lib/filterx/object-json-object.c b/lib/filterx/object-json-object.c
@@ -200,7 +200,7 @@ static gboolean
 _iter_inner(FilterXJsonObject *self, const gchar *obj_key, struct json_object *jso,
             FilterXDictIterFunc func, gpointer user_data)
 {
-  FilterXObject *key = filterx_string_new(obj_key, -1);
+  FILTERX_STRING_DECLARE_ON_STACK(key, obj_key, strlen(obj_key));
   FilterXObject *value = filterx_json_convert_json_to_object_cached(&self->super.super, &self->root_container,
                          jso);
 
diff --git a/lib/filterx/object-string.c b/lib/filterx/object-string.c
@@ -29,12 +29,6 @@
 #include "str-format.h"
 #include "str-utils.h"
 
-struct _FilterXString
-{
-  FilterXObject super;
-  gsize str_len;
-  gchar str[];
-};
 
 /* NOTE: Consider using filterx_object_extract_string_ref() to also support message_value. */
 const gchar *
@@ -142,6 +136,15 @@ _string_add(FilterXObject *s, FilterXObject *object)
   return filterx_string_new(buffer->str, buffer->len);
 }
 
+/* we support clone of stack allocated strings */
+static FilterXObject *
+_string_clone(FilterXObject *s)
+{
+  FilterXString *self = (FilterXString *) s;
+
+  return filterx_string_new(self->str, self->str_len);
+}
+
 static inline FilterXString *
 _string_new(const gchar *str, gssize str_len, FilterXStringTranslateFunc translate)
 {
@@ -154,10 +157,11 @@ _string_new(const gchar *str, gssize str_len, FilterXStringTranslateFunc transla
 
   self->str_len = str_len;
   if (translate)
-    translate(self->str, str, str_len);
+    translate(self->storage, str, str_len);
   else
-    memcpy(self->str, str, str_len);
-  self->str[str_len] = 0;
+    memcpy(self->storage, str, str_len);
+  self->storage[str_len] = 0;
+  self->str = self->storage;
 
   return self;
 }
@@ -291,7 +295,7 @@ filterx_typecast_string(FilterXExpr *s, FilterXObject *args[], gsize args_len)
       return NULL;
     }
 
-  return filterx_string_new(buf->str, -1);
+  return filterx_string_new(buf->str, buf->len);
 }
 
 FilterXObject *
@@ -353,6 +357,7 @@ FILTERX_DEFINE_TYPE(string, FILTERX_TYPE_NAME(object),
                     .truthy = _truthy,
                     .repr = _string_repr,
                     .add = _string_add,
+                    .clone = _string_clone,
                    );
 
 FILTERX_DEFINE_TYPE(bytes, FILTERX_TYPE_NAME(object),
diff --git a/lib/filterx/object-string.h b/lib/filterx/object-string.h
@@ -26,6 +26,14 @@
 #include "filterx-object.h"
 
 typedef struct _FilterXString FilterXString;
+struct _FilterXString
+{
+  FilterXObject super;
+  const gchar *str;
+  gsize str_len;
+  gchar storage[];
+};
+
 typedef void (*FilterXStringTranslateFunc)(gchar *target, const gchar *source, gsize source_len);
 
 FILTERX_DECLARE_TYPE(string);
@@ -46,4 +54,15 @@ FilterXObject *filterx_protobuf_new(const gchar *str, gssize str_len);
 
 FilterXString *filterx_string_typed_new(const gchar *str);
 
+#define FILTERX_STRING_STACK_INIT(cstr, cstr_len) \
+  { \
+    FILTERX_OBJECT_STACK_INIT(string), \
+    .str = (cstr), \
+    .str_len = ((cstr_len) < 0 ? strlen(cstr) : (cstr_len)), \
+  }
+
+#define FILTERX_STRING_DECLARE_ON_STACK(_name, cstr, cstr_len) \
+  FilterXString __ ## _name ## storage = FILTERX_STRING_STACK_INIT(cstr, cstr_len); \
+  FilterXObject *_name = &__ ## _name ## storage .super;
+
 #endif
diff --git a/lib/str-format.c b/lib/str-format.c
@@ -262,7 +262,7 @@ format_int32_padded(GString *result, gint field_len, gchar pad_char, gint base,
 }
 
 gchar *
-format_hex_string_with_delimiter(gpointer data, gsize data_len, gchar *result, gsize result_len, gchar delimiter)
+format_hex_string_with_delimiter(gconstpointer data, gsize data_len, gchar *result, gsize result_len, gchar delimiter)
 {
   gint i;
   gint pos = 0;
@@ -285,7 +285,7 @@ format_hex_string_with_delimiter(gpointer data, gsize data_len, gchar *result, g
 }
 
 gchar *
-format_hex_string(gpointer data, gsize data_len, gchar *result, gsize result_len)
+format_hex_string(gconstpointer data, gsize data_len, gchar *result, gsize result_len)
 {
   return format_hex_string_with_delimiter(data, data_len, result, result_len, 0);
 }
diff --git a/lib/str-format.h b/lib/str-format.h
@@ -33,8 +33,9 @@ gint format_int32_padded(GString *result, gint field_len, gchar pad_char, gint b
 gint format_uint64_padded(GString *result, gint field_len, gchar pad_char, gint base, guint64 value);
 gint format_int64_padded(GString *result, gint field_len, gchar pad_char, gint base, gint64 value);
 
-gchar *format_hex_string(gpointer str, gsize str_len, gchar *result, gsize result_len);
-gchar *format_hex_string_with_delimiter(gpointer str, gsize str_len, gchar *result, gsize result_len, gchar delimiter);
+gchar *format_hex_string(gconstpointer str, gsize str_len, gchar *result, gsize result_len);
+gchar *format_hex_string_with_delimiter(gconstpointer str, gsize str_len, gchar *result, gsize result_len,
+                                        gchar delimiter);
 
 gboolean scan_positive_int(const gchar **buf, gint *left, gint field_width, gint *num);
 gboolean scan_expect_char(const gchar **buf, gint *left, gchar value);
diff --git a/modules/csvparser/filterx-func-parse-csv.c b/modules/csvparser/filterx-func-parse-csv.c
@@ -172,8 +172,9 @@ _fill_object_col(FilterXFunctionParseCSV *self, FilterXObject *cols, gint64 inde
   else
     col = filterx_list_get_subscript(cols, index);
 
-  FilterXObject *val = filterx_string_new(csv_scanner_get_current_value(scanner),
-                                          csv_scanner_get_current_value_len(scanner));
+  FILTERX_STRING_DECLARE_ON_STACK(val,
+                                  csv_scanner_get_current_value(scanner),
+                                  csv_scanner_get_current_value_len(scanner));
 
   gboolean ok = filterx_object_set_subscript(result, col, &val);
 
@@ -186,9 +187,9 @@ _fill_object_col(FilterXFunctionParseCSV *self, FilterXObject *cols, gint64 inde
 static inline gboolean
 _fill_array_element(CSVScanner *scanner, FilterXObject *result)
 {
-  const gchar *current_value = csv_scanner_get_current_value(scanner);
-  gint current_value_len = csv_scanner_get_current_value_len(scanner);
-  FilterXObject *val = filterx_string_new(current_value, current_value_len);
+  FILTERX_STRING_DECLARE_ON_STACK(val,
+                                  csv_scanner_get_current_value(scanner),
+                                  csv_scanner_get_current_value_len(scanner));
 
   gboolean ok = filterx_list_append(result, &val);
 
diff --git a/modules/xml/filterx-parse-windows-eventlog-xml.c b/modules/xml/filterx-parse-windows-eventlog-xml.c
@@ -72,7 +72,7 @@ static gboolean
 _convert_to_dict(GMarkupParseContext *context, XmlElemContext *elem_context, GError **error)
 {
   const gchar *parent_elem_name = (const gchar *) g_markup_parse_context_get_element_stack(context)->next->data;
-  FilterXObject *key = filterx_string_new(parent_elem_name, -1);
+  FILTERX_STRING_DECLARE_ON_STACK(key, parent_elem_name, -1);
 
   FilterXObject *dict_obj = filterx_object_create_dict(elem_context->parent_obj);
   if (!dict_obj)
@@ -106,7 +106,7 @@ _prepare_elem(const gchar *new_elem_name, XmlElemContext *last_elem_context, Xml
 {
   xml_elem_context_init(new_elem_context, last_elem_context->current_obj, NULL);
 
-  FilterXObject *new_elem_key = filterx_string_new(new_elem_name, -1);
+  FILTERX_STRING_DECLARE_ON_STACK(new_elem_key, new_elem_name, -1);
   FilterXObject *existing_obj = NULL;
 
   if (!filterx_object_is_key_set(new_elem_context->parent_obj, new_elem_key))
@@ -330,8 +330,8 @@ _text(FilterXGeneratorFunctionParseXml *s,
       return;
     }
 
-  FilterXObject *key = filterx_string_new(state->last_data_name->str, state->last_data_name->len);
-  FilterXObject *text_obj = filterx_string_new(text, text_len);
+  FILTERX_STRING_DECLARE_ON_STACK(key, state->last_data_name->str, state->last_data_name->len);
+  FILTERX_STRING_DECLARE_ON_STACK(text_obj, text, text_len);
 
   if (!filterx_object_set_subscript(elem_context->current_obj, key, &text_obj))
     {
diff --git a/modules/xml/filterx-parse-xml.c b/modules/xml/filterx-parse-xml.c

Original file line number	Diff line number	Diff line change
`@@ -119,7 +119,7 @@ filterx_object_freeze(FilterXObject *self)`
`119`	`119`	`if (filterx_object_is_frozen(self))`
`120`	`120`	`return FALSE;`
`121`	`121`	`g_assert(g_atomic_counter_get(&self->ref_cnt) == 1);`
`122`		`- g_atomic_counter_set(&self->ref_cnt, FILTERX_OBJECT_MAGIC_BIAS);`
	`122`	`+ g_atomic_counter_set(&self->ref_cnt, FILTERX_OBJECT_REFCOUNT_FROZEN);`
`123`	`123`	`return TRUE;`
`124`	`124`	`}`
`125`	`125`
Original file line number	Diff line number	Diff line change
`@@ -200,7 +200,7 @@ static gboolean`
`200`	`200`	`_iter_inner(FilterXJsonObject self, const gchar obj_key, struct json_object *jso,`
`201`	`201`	`FilterXDictIterFunc func, gpointer user_data)`
`202`	`202`	`{`
`203`		`- FilterXObject *key = filterx_string_new(obj_key, -1);`
	`203`	`+ FILTERX_STRING_DECLARE_ON_STACK(key, obj_key, strlen(obj_key));`
`204`	`204`	`FilterXObject *value = filterx_json_convert_json_to_object_cached(&self->super.super, &self->root_container,`
`205`	`205`	`jso);`
`206`	`206`
Original file line number	Diff line number	Diff line change
`@@ -262,7 +262,7 @@ format_int32_padded(GString *result, gint field_len, gchar pad_char, gint base,`
`262`	`262`	`}`
`263`	`263`
`264`	`264`	`gchar *`
`265`		`-format_hex_string_with_delimiter(gpointer data, gsize data_len, gchar *result, gsize result_len, gchar delimiter)`
	`265`	`+format_hex_string_with_delimiter(gconstpointer data, gsize data_len, gchar *result, gsize result_len, gchar delimiter)`
`266`	`266`	`{`
`267`	`267`	`gint i;`
`268`	`268`	`gint pos = 0;`
`@@ -285,7 +285,7 @@ format_hex_string_with_delimiter(gpointer data, gsize data_len, gchar *result, g`
`285`	`285`	`}`
`286`	`286`
`287`	`287`	`gchar *`
`288`		`-format_hex_string(gpointer data, gsize data_len, gchar *result, gsize result_len)`
	`288`	`+format_hex_string(gconstpointer data, gsize data_len, gchar *result, gsize result_len)`
`289`	`289`	`{`
`290`	`290`	`return format_hex_string_with_delimiter(data, data_len, result, result_len, 0);`
`291`	`291`	`}`
Original file line number	Diff line number	Diff line change
`@@ -72,7 +72,7 @@ static gboolean`
`72`	`72`	`_convert_to_dict(GMarkupParseContext context, XmlElemContext elem_context, GError **error)`
`73`	`73`	`{`
`74`	`74`	`const gchar parent_elem_name = (const gchar ) g_markup_parse_context_get_element_stack(context)->next->data;`
`75`		`- FilterXObject *key = filterx_string_new(parent_elem_name, -1);`
	`75`	`+ FILTERX_STRING_DECLARE_ON_STACK(key, parent_elem_name, -1);`
`76`	`76`
`77`	`77`	`FilterXObject *dict_obj = filterx_object_create_dict(elem_context->parent_obj);`
`78`	`78`	`if (!dict_obj)`
`@@ -106,7 +106,7 @@ _prepare_elem(const gchar new_elem_name, XmlElemContext last_elem_context, Xml`
`106`	`106`	`{`
`107`	`107`	`xml_elem_context_init(new_elem_context, last_elem_context->current_obj, NULL);`
`108`	`108`
`109`		`- FilterXObject *new_elem_key = filterx_string_new(new_elem_name, -1);`
	`109`	`+ FILTERX_STRING_DECLARE_ON_STACK(new_elem_key, new_elem_name, -1);`
`110`	`110`	`FilterXObject *existing_obj = NULL;`
`111`	`111`
`112`	`112`	`if (!filterx_object_is_key_set(new_elem_context->parent_obj, new_elem_key))`
`@@ -330,8 +330,8 @@ _text(FilterXGeneratorFunctionParseXml *s,`
`330`	`330`	`return;`
`331`	`331`	`}`
`332`	`332`
`333`		`- FilterXObject *key = filterx_string_new(state->last_data_name->str, state->last_data_name->len);`
`334`		`- FilterXObject *text_obj = filterx_string_new(text, text_len);`
	`333`	`+ FILTERX_STRING_DECLARE_ON_STACK(key, state->last_data_name->str, state->last_data_name->len);`
	`334`	`+ FILTERX_STRING_DECLARE_ON_STACK(text_obj, text, text_len);`
`335`	`335`
`336`	`336`	`if (!filterx_object_set_subscript(elem_context->current_obj, key, &text_obj))`
`337`	`337`	`{`