From 1dbecfb6a63ac9c7e81bbc5717aca0851bff7fad Mon Sep 17 00:00:00 2001 From: Eirik Tsarpalis Date: Mon, 11 Nov 2024 21:54:33 +0000 Subject: [PATCH] Expose options for making schema generation conformant with the subset accepted by OpenAI. (#5619) * Expose options for making schema generation conformant with the subset accepted by OpenAI. * Uses the same set of defaults in all layers. --- .../Utilities/AIJsonSchemaCreateOptions.cs | 25 +++- .../Utilities/AIJsonUtilities.Schema.cs | 44 ++++++- .../Functions/AIFunctionFactory.cs | 13 +- .../AIFunctionFactoryCreateOptions.cs | 10 ++ .../Utilities/AIJsonUtilitiesTests.cs | 117 ++++++++++++++++-- .../Functions/AIFunctionFactoryTest.cs | 13 ++ 6 files changed, 198 insertions(+), 24 deletions(-) diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/AIJsonSchemaCreateOptions.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/AIJsonSchemaCreateOptions.cs index 150673560df..2ce42c3e618 100644 --- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/AIJsonSchemaCreateOptions.cs +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/AIJsonSchemaCreateOptions.cs @@ -16,15 +16,36 @@ public sealed class AIJsonSchemaCreateOptions /// /// Gets a value indicating whether to include the type keyword in inferred schemas for .NET enums. /// - public bool IncludeTypeInEnumSchemas { get; init; } + public bool IncludeTypeInEnumSchemas { get; init; } = true; /// /// Gets a value indicating whether to generate schemas with the additionalProperties set to false for .NET objects. /// - public bool DisallowAdditionalProperties { get; init; } + public bool DisallowAdditionalProperties { get; init; } = true; /// /// Gets a value indicating whether to include the $schema keyword in inferred schemas. /// public bool IncludeSchemaKeyword { get; init; } + + /// + /// Gets a value indicating whether to mark all properties as required in the schema. + /// + public bool RequireAllProperties { get; init; } = true; + + /// + /// Gets a value indicating whether to filter keywords that are disallowed by certain AI vendors. + /// + /// + /// Filters a number of non-essential schema keywords that are not yet supported by some AI vendors. + /// These include: + /// + /// The "minLength", "maxLength", "pattern", and "format" keywords. + /// The "minimum", "maximum", and "multipleOf" keywords. + /// The "patternProperties", "unevaluatedProperties", "propertyNames", "minProperties", and "maxProperties" keywords. + /// The "unevaluatedItems", "contains", "minContains", "maxContains", "minItems", "maxItems", and "uniqueItems" keywords. + /// + /// See also https://platform.openai.com/docs/guides/structured-outputs#some-type-specific-keywords-are-not-yet-supported. + /// + public bool FilterDisallowedKeywords { get; init; } = true; } diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/AIJsonUtilities.Schema.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/AIJsonUtilities.Schema.cs index fa893450d0f..195cf062eb3 100644 --- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/AIJsonUtilities.Schema.cs +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/AIJsonUtilities.Schema.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Concurrent; +using System.Collections.Generic; using System.ComponentModel; using System.Diagnostics; #if !NET9_0_OR_GREATER @@ -30,7 +31,9 @@ object? DefaultValue, bool IncludeSchemaUri, bool DisallowAdditionalProperties, - bool IncludeTypeInEnumSchemas); + bool IncludeTypeInEnumSchemas, + bool RequireAllProperties, + bool FilterDisallowedKeywords); namespace Microsoft.Extensions.AI; @@ -52,6 +55,10 @@ public static partial class AIJsonUtilities /// Gets a JSON schema only accepting null values. private static readonly JsonElement _nullJsonSchema = ParseJsonElement("""{"type":"null"}"""u8); + // List of keywords used by JsonSchemaExporter but explicitly disallowed by some AI vendors. + // cf. https://platform.openai.com/docs/guides/structured-outputs#some-type-specific-keywords-are-not-yet-supported + private static readonly string[] _schemaKeywordsDisallowedByAIVendors = ["minLength", "maxLength", "pattern", "format"]; + /// /// Determines a JSON schema for the provided parameter metadata. /// @@ -122,7 +129,9 @@ public static JsonElement CreateParameterJsonSchema( defaultValue, IncludeSchemaUri: false, inferenceOptions.DisallowAdditionalProperties, - inferenceOptions.IncludeTypeInEnumSchemas); + inferenceOptions.IncludeTypeInEnumSchemas, + inferenceOptions.RequireAllProperties, + inferenceOptions.FilterDisallowedKeywords); return GetJsonSchemaCached(serializerOptions, key); } @@ -154,7 +163,9 @@ public static JsonElement CreateJsonSchema( defaultValue, inferenceOptions.IncludeSchemaKeyword, inferenceOptions.DisallowAdditionalProperties, - inferenceOptions.IncludeTypeInEnumSchemas); + inferenceOptions.IncludeTypeInEnumSchemas, + inferenceOptions.RequireAllProperties, + inferenceOptions.FilterDisallowedKeywords); return GetJsonSchemaCached(serializerOptions, key); } @@ -242,6 +253,7 @@ JsonNode TransformSchemaNode(JsonSchemaExporterContext ctx, JsonNode schema) const string PatternPropertyName = "pattern"; const string EnumPropertyName = "enum"; const string PropertiesPropertyName = "properties"; + const string RequiredPropertyName = "required"; const string AdditionalPropertiesPropertyName = "additionalProperties"; const string DefaultPropertyName = "default"; const string RefPropertyName = "$ref"; @@ -275,11 +287,35 @@ JsonNode TransformSchemaNode(JsonSchemaExporterContext ctx, JsonNode schema) } // Disallow additional properties in object schemas - if (key.DisallowAdditionalProperties && objSchema.ContainsKey(PropertiesPropertyName) && !objSchema.ContainsKey(AdditionalPropertiesPropertyName)) + if (key.DisallowAdditionalProperties && + objSchema.ContainsKey(PropertiesPropertyName) && + !objSchema.ContainsKey(AdditionalPropertiesPropertyName)) { objSchema.Add(AdditionalPropertiesPropertyName, (JsonNode)false); } + // Mark all properties as required + if (key.RequireAllProperties && + objSchema.TryGetPropertyValue(PropertiesPropertyName, out JsonNode? properties) && + properties is JsonObject propertiesObj) + { + _ = objSchema.TryGetPropertyValue(RequiredPropertyName, out JsonNode? required); + if (required is not JsonArray { } requiredArray || requiredArray.Count != propertiesObj.Count) + { + requiredArray = [.. propertiesObj.Select(prop => prop.Key)]; + objSchema[RequiredPropertyName] = requiredArray; + } + } + + // Filter potentially disallowed keywords. + if (key.FilterDisallowedKeywords) + { + foreach (string keyword in _schemaKeywordsDisallowedByAIVendors) + { + _ = objSchema.Remove(keyword); + } + } + // Some consumers of the JSON schema, including Ollama as of v0.3.13, don't understand // schemas with "type": [...], and only understand "type" being a single value. // STJ represents .NET integer types as ["string", "integer"], which will then lead to an error. diff --git a/src/Libraries/Microsoft.Extensions.AI/Functions/AIFunctionFactory.cs b/src/Libraries/Microsoft.Extensions.AI/Functions/AIFunctionFactory.cs index a19608a2977..09d55388f75 100644 --- a/src/Libraries/Microsoft.Extensions.AI/Functions/AIFunctionFactory.cs +++ b/src/Libraries/Microsoft.Extensions.AI/Functions/AIFunctionFactory.cs @@ -189,7 +189,7 @@ static bool IsAsyncMethod(MethodInfo method) bool sawAIContextParameter = false; for (int i = 0; i < parameters.Length; i++) { - if (GetParameterMarshaller(options.SerializerOptions, parameters[i], ref sawAIContextParameter, out _parameterMarshallers[i]) is AIFunctionParameterMetadata parameterView) + if (GetParameterMarshaller(options, parameters[i], ref sawAIContextParameter, out _parameterMarshallers[i]) is AIFunctionParameterMetadata parameterView) { parameterMetadata?.Add(parameterView); } @@ -209,7 +209,7 @@ static bool IsAsyncMethod(MethodInfo method) { ParameterType = returnType, Description = method.ReturnParameter.GetCustomAttribute(inherit: true)?.Description, - Schema = AIJsonUtilities.CreateJsonSchema(returnType, serializerOptions: options.SerializerOptions), + Schema = AIJsonUtilities.CreateJsonSchema(returnType, serializerOptions: options.SerializerOptions, inferenceOptions: options.SchemaCreateOptions), }, AdditionalProperties = options.AdditionalProperties ?? EmptyReadOnlyDictionary.Instance, JsonSerializerOptions = options.SerializerOptions, @@ -272,7 +272,7 @@ static bool IsAsyncMethod(MethodInfo method) /// Gets a delegate for handling the marshaling of a parameter. /// private static AIFunctionParameterMetadata? GetParameterMarshaller( - JsonSerializerOptions options, + AIFunctionFactoryCreateOptions options, ParameterInfo parameter, ref bool sawAIFunctionContext, out Func, AIFunctionContext?, object?> marshaller) @@ -302,7 +302,7 @@ static bool IsAsyncMethod(MethodInfo method) // Resolve the contract used to marshal the value from JSON -- can throw if not supported or not found. Type parameterType = parameter.ParameterType; - JsonTypeInfo typeInfo = options.GetTypeInfo(parameterType); + JsonTypeInfo typeInfo = options.SerializerOptions.GetTypeInfo(parameterType); // Create a marshaller that simply looks up the parameter by name in the arguments dictionary. marshaller = (IReadOnlyDictionary arguments, AIFunctionContext? _) => @@ -325,7 +325,7 @@ static bool IsAsyncMethod(MethodInfo method) #pragma warning disable CA1031 // Do not catch general exception types try { - string json = JsonSerializer.Serialize(value, options.GetTypeInfo(value.GetType())); + string json = JsonSerializer.Serialize(value, options.SerializerOptions.GetTypeInfo(value.GetType())); return JsonSerializer.Deserialize(json, typeInfo); } catch @@ -361,7 +361,8 @@ static bool IsAsyncMethod(MethodInfo method) description, parameter.HasDefaultValue, parameter.DefaultValue, - options) + options.SerializerOptions, + options.SchemaCreateOptions) }; } diff --git a/src/Libraries/Microsoft.Extensions.AI/Functions/AIFunctionFactoryCreateOptions.cs b/src/Libraries/Microsoft.Extensions.AI/Functions/AIFunctionFactoryCreateOptions.cs index 6483b83c0ad..1f33c6d4155 100644 --- a/src/Libraries/Microsoft.Extensions.AI/Functions/AIFunctionFactoryCreateOptions.cs +++ b/src/Libraries/Microsoft.Extensions.AI/Functions/AIFunctionFactoryCreateOptions.cs @@ -16,6 +16,7 @@ namespace Microsoft.Extensions.AI; public sealed class AIFunctionFactoryCreateOptions { private JsonSerializerOptions _options = AIJsonUtilities.DefaultOptions; + private AIJsonSchemaCreateOptions _schemaCreateOptions = AIJsonSchemaCreateOptions.Default; /// /// Initializes a new instance of the class. @@ -31,6 +32,15 @@ public JsonSerializerOptions SerializerOptions set => _options = Throw.IfNull(value); } + /// + /// Gets or sets the governing the generation of JSON schemas for the function. + /// + public AIJsonSchemaCreateOptions SchemaCreateOptions + { + get => _schemaCreateOptions; + set => _schemaCreateOptions = Throw.IfNull(value); + } + /// Gets or sets the name to use for the function. /// /// The name to use for the function. The default value is a name derived from the method represented by the passed or . diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Utilities/AIJsonUtilitiesTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Utilities/AIJsonUtilitiesTests.cs index 52f9cad246d..4107618d85b 100644 --- a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Utilities/AIJsonUtilitiesTests.cs +++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Utilities/AIJsonUtilitiesTests.cs @@ -1,10 +1,13 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System; using System.ComponentModel; +using System.Linq; using System.Text.Json; using System.Text.Json.Nodes; using System.Text.Json.Serialization; +using System.Text.Json.Serialization.Metadata; using Microsoft.Extensions.AI.JsonSchemaExporter; using Xunit; @@ -38,9 +41,11 @@ public static void DefaultOptions_HasExpectedConfiguration() public static void AIJsonSchemaCreateOptions_DefaultInstance_ReturnsExpectedValues(bool useSingleton) { AIJsonSchemaCreateOptions options = useSingleton ? AIJsonSchemaCreateOptions.Default : new AIJsonSchemaCreateOptions(); - Assert.False(options.IncludeTypeInEnumSchemas); - Assert.False(options.DisallowAdditionalProperties); + Assert.True(options.IncludeTypeInEnumSchemas); + Assert.True(options.DisallowAdditionalProperties); Assert.False(options.IncludeSchemaKeyword); + Assert.True(options.RequireAllProperties); + Assert.True(options.FilterDisallowedKeywords); } [Fact] @@ -56,6 +61,7 @@ public static void CreateJsonSchema_DefaultParameters_GeneratesExpectedJsonSchem "type": "integer" }, "EnumValue": { + "type": "string", "enum": ["A", "B"] }, "Value": { @@ -63,11 +69,13 @@ public static void CreateJsonSchema_DefaultParameters_GeneratesExpectedJsonSchem "default": null } }, - "required": ["Key", "EnumValue"] + "required": ["Key", "EnumValue", "Value"], + "additionalProperties": false } """).RootElement; JsonElement actual = AIJsonUtilities.CreateJsonSchema(typeof(MyPoco), serializerOptions: JsonSerializerOptions.Default); + Assert.True(JsonElement.DeepEquals(expected, actual)); } @@ -85,7 +93,6 @@ public static void CreateJsonSchema_OverriddenParameters_GeneratesExpectedJsonSc "type": "integer" }, "EnumValue": { - "type": "string", "enum": ["A", "B"] }, "Value": { @@ -94,28 +101,109 @@ public static void CreateJsonSchema_OverriddenParameters_GeneratesExpectedJsonSc } }, "required": ["Key", "EnumValue"], - "additionalProperties": false, "default": "42" } """).RootElement; AIJsonSchemaCreateOptions inferenceOptions = new AIJsonSchemaCreateOptions { - IncludeTypeInEnumSchemas = true, - DisallowAdditionalProperties = true, - IncludeSchemaKeyword = true + IncludeTypeInEnumSchemas = false, + DisallowAdditionalProperties = false, + IncludeSchemaKeyword = true, + RequireAllProperties = false, }; - JsonElement actual = AIJsonUtilities.CreateJsonSchema(typeof(MyPoco), + JsonElement actual = AIJsonUtilities.CreateJsonSchema( + typeof(MyPoco), description: "alternative description", hasDefaultValue: true, defaultValue: 42, - JsonSerializerOptions.Default, - inferenceOptions); + serializerOptions: JsonSerializerOptions.Default, + inferenceOptions: inferenceOptions); Assert.True(JsonElement.DeepEquals(expected, actual)); } + [Fact] + public static void CreateJsonSchema_FiltersDisallowedKeywords() + { + JsonElement expected = JsonDocument.Parse(""" + { + "type": "object", + "properties": { + "Date": { + "type": "string" + }, + "TimeSpan": { + "$comment": "Represents a System.TimeSpan value.", + "type": "string" + }, + "Char" : { + "type": "string" + } + }, + "required": ["Date","TimeSpan","Char"], + "additionalProperties": false + } + """).RootElement; + + JsonElement actual = AIJsonUtilities.CreateJsonSchema(typeof(PocoWithTypesWithOpenAIUnsupportedKeywords), serializerOptions: JsonSerializerOptions.Default); + + Assert.True(JsonElement.DeepEquals(expected, actual)); + } + + [Fact] + public static void CreateJsonSchema_FilterDisallowedKeywords_Disabled() + { + JsonElement expected = JsonDocument.Parse(""" + { + "type": "object", + "properties": { + "Date": { + "type": "string", + "format": "date-time" + }, + "TimeSpan": { + "$comment": "Represents a System.TimeSpan value.", + "type": "string", + "pattern": "^-?(\\d+\\.)?\\d{2}:\\d{2}:\\d{2}(\\.\\d{1,7})?$" + }, + "Char" : { + "type": "string", + "minLength": 1, + "maxLength": 1 + } + }, + "required": ["Date","TimeSpan","Char"], + "additionalProperties": false + } + """).RootElement; + + AIJsonSchemaCreateOptions inferenceOptions = new() + { + FilterDisallowedKeywords = false + }; + + JsonElement actual = AIJsonUtilities.CreateJsonSchema( + typeof(PocoWithTypesWithOpenAIUnsupportedKeywords), + serializerOptions: JsonSerializerOptions.Default, + inferenceOptions: inferenceOptions); + + Assert.True(JsonElement.DeepEquals(expected, actual)); + } + + public class PocoWithTypesWithOpenAIUnsupportedKeywords + { + // Uses the unsupported "format" keyword + public DateTimeOffset Date { get; init; } + + // Uses the unsupported "pattern" keyword + public TimeSpan TimeSpan { get; init; } + + // Uses the unsupported "minLength" and "maxLength" keywords + public char Char { get; init; } + } + [Fact] public static void ResolveParameterJsonSchema_ReturnsExpectedValue() { @@ -178,7 +266,12 @@ public static void CreateJsonSchema_ValidateWithTestData(ITestData testData) ? new(opts) { TypeInfoResolver = TestTypes.TestTypesContext.Default } : TestTypes.TestTypesContext.Default.Options; - JsonElement schema = AIJsonUtilities.CreateJsonSchema(testData.Type, serializerOptions: options); + JsonTypeInfo typeInfo = options.GetTypeInfo(testData.Type); + AIJsonSchemaCreateOptions? createOptions = typeInfo.Properties.Any(prop => prop.IsExtensionData) + ? new() { DisallowAdditionalProperties = false } // Do not append additionalProperties: false to the schema if the type has extension data. + : null; + + JsonElement schema = AIJsonUtilities.CreateJsonSchema(testData.Type, serializerOptions: options, inferenceOptions: createOptions); JsonNode? schemaAsNode = JsonSerializer.SerializeToNode(schema, options); Assert.NotNull(schemaAsNode); diff --git a/test/Libraries/Microsoft.Extensions.AI.Tests/Functions/AIFunctionFactoryTest.cs b/test/Libraries/Microsoft.Extensions.AI.Tests/Functions/AIFunctionFactoryTest.cs index 7d8b10814d4..0bec845babc 100644 --- a/test/Libraries/Microsoft.Extensions.AI.Tests/Functions/AIFunctionFactoryTest.cs +++ b/test/Libraries/Microsoft.Extensions.AI.Tests/Functions/AIFunctionFactoryTest.cs @@ -182,4 +182,17 @@ public void AIFunctionFactoryCreateOptions_ValuesPropagateToAIFunction() Assert.Equal(returnParameterMetadata, func.Metadata.ReturnParameter); Assert.Equal(metadata, func.Metadata.AdditionalProperties); } + + [Fact] + public void AIFunctionFactoryCreateOptions_SchemaOptions_HasExpectedDefaults() + { + var options = new AIFunctionFactoryCreateOptions(); + var schemaOptions = options.SchemaCreateOptions; + + Assert.NotNull(schemaOptions); + Assert.True(schemaOptions.IncludeTypeInEnumSchemas); + Assert.True(schemaOptions.FilterDisallowedKeywords); + Assert.True(schemaOptions.RequireAllProperties); + Assert.True(schemaOptions.DisallowAdditionalProperties); + } }