Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various bug fixes. #163

Merged
merged 22 commits into from
Mar 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 26 additions & 26 deletions GeneratedRegexPatterns/HighConfidenceSecurityModels.json

Large diffs are not rendered by default.

9 changes: 1 addition & 8 deletions GeneratedRegexPatterns/LowConfidenceSecurityModels.json
Original file line number Diff line number Diff line change
@@ -1,18 +1,11 @@
[
{
"Pattern": "(?i)authorization:(\\s|%20)bearer(\\s|%20)(?<refine>[0-9a-z][abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_~.\\-+\\/=]*)([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_~.\\-+/=]|$)",
"Pattern": "(?i)authorization:(\\s|%20)bearer(\\s|%20)(?P<refine>[0-9a-z][abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_~.\\-+\\/=]*)([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_~.\\-+/=]|$)",
"Id": "SEC101/061",
"Name": "OAuth2BearerToken",
"Signatures": null,
"DetectionMetadata": "LowConfidence"
},
{
"Pattern": "(?i)[a-z0-9.=\\-:[_@\\/*\\]+?]{32}$",
"Id": "SEC000/003",
"Name": "Unclassified32CharacterString",
"Signatures": null,
"DetectionMetadata": "HighEntropy, Unclassified, LowConfidence"
},
{
"Pattern": "(^|[^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890+/_\\-])[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890+/]{43}=",
"Id": "SEC000/000",
Expand Down
6 changes: 3 additions & 3 deletions GeneratedRegexPatterns/MediumConfidenceSecurityModels.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[
{
"Pattern": "(?i)\\.servicebus\\.windows.+[^0-9a-z\\/+](?<refine>[0-9a-z\\/+]{43}=)(?:[^=]|$)",
"Pattern": "(?i)\\.servicebus\\.windows.+[^0-9a-z\\/+](?P<refine>[0-9a-z\\/+]{43}=)(?:[^=]|$)",
"Id": "SEC101/105",
"Name": "AzureMessageLegacyCredentials",
"Signatures": [
Expand All @@ -20,7 +20,7 @@
"DetectionMetadata": "HighEntropy, MediumConfidence"
},
{
"Pattern": "($|\\b)(ftps?|https?):\\/\\/(?<refine>[^:@\\/]+:[^:@?\\/]+)@",
"Pattern": "($|\\b)(ftps?|https?):\\/\\/(?P<refine>[^:@\\/]+:[^:@?\\/]+)@",
"Id": "SEC101/127",
"Name": "UrlCredentials",
"Signatures": [
Expand All @@ -30,7 +30,7 @@
"DetectionMetadata": "MediumConfidence"
},
{
"Pattern": "(?i)(?:^|[?;&])(?:dsas_secret|sig)=(?<refine>[0-9a-z\\/+%]{43,129}(?:=|%3d))",
"Pattern": "(?i)(?:^|[?;&])(?:dsas_secret|sig)=(?P<refine>[0-9a-z\\/+%]{43,129}(?:=|%3d))",
"Id": "SEC101/060",
"Name": "LooseSasSecret",
"Signatures": [
Expand Down
62 changes: 31 additions & 31 deletions GeneratedRegexPatterns/PreciselyClassifiedSecurityKeys.json

Large diffs are not rendered by default.

13 changes: 3 additions & 10 deletions GeneratedRegexPatterns/UnclassifiedPotentialSecurityKeys.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"DetectionMetadata": "HighEntropy, MediumConfidence"
},
{
"Pattern": "($|\\b)(ftps?|https?):\\/\\/(?<refine>[^:@\\/]+:[^:@?\\/]+)@",
"Pattern": "($|\\b)(ftps?|https?):\\/\\/(?P<refine>[^:@\\/]+:[^:@?\\/]+)@",
"Id": "SEC101/127",
"Name": "UrlCredentials",
"Signatures": [
Expand All @@ -21,7 +21,7 @@
"DetectionMetadata": "MediumConfidence"
},
{
"Pattern": "(?i)(?:^|[?;&])(?:dsas_secret|sig)=(?<refine>[0-9a-z\\/+%]{43,129}(?:=|%3d))",
"Pattern": "(?i)(?:^|[?;&])(?:dsas_secret|sig)=(?P<refine>[0-9a-z\\/+%]{43,129}(?:=|%3d))",
"Id": "SEC101/060",
"Name": "LooseSasSecret",
"Signatures": [
Expand All @@ -31,19 +31,12 @@
"DetectionMetadata": "HighEntropy, MediumConfidence"
},
{
"Pattern": "(?i)authorization:(\\s|%20)bearer(\\s|%20)(?<refine>[0-9a-z][abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_~.\\-+\\/=]*)([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_~.\\-+/=]|$)",
"Pattern": "(?i)authorization:(\\s|%20)bearer(\\s|%20)(?P<refine>[0-9a-z][abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_~.\\-+\\/=]*)([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_~.\\-+/=]|$)",
"Id": "SEC101/061",
"Name": "OAuth2BearerToken",
"Signatures": null,
"DetectionMetadata": "LowConfidence"
},
{
"Pattern": "(?i)[a-z0-9.=\\-:[_@\\/*\\]+?]{32}$",
"Id": "SEC000/003",
"Name": "Unclassified32CharacterString",
"Signatures": null,
"DetectionMetadata": "HighEntropy, Unclassified, LowConfidence"
},
{
"Pattern": "(^|[^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890+/_\\-])[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890+/]{43}=",
"Id": "SEC000/000",
Expand Down
8 changes: 6 additions & 2 deletions docs/ReleaseHistory.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
- FPS => False positive reduction in static analysis.
- FNS => False negative reduction in static analysis.

# UNRELEASED

# 1.16.0 - 03/05/2025
- BRK: Eliminate `SEC000/101.Unclassified32CharacterString` as noisy and not useful.
- BRK: Rename `SEC101/102.AdoPat` friendly name to `AdoLegacyPat`.
- BRK: `IdentifiableScan` no longer supports stream input. The following API are removed. Use `IdentifiableScan.DetectSecrets(string)`.
- `IdentifiableScan.DetectSecrets(Stream)`
- `IdentifiableScan.Start`
Expand All @@ -22,6 +23,9 @@
- `IdentifiableScan.CheckPossibleMatchRange`
- PRF: `IdentifiableScan` did not use high-performance scanning techniques for `SEC101/178.AzureIotHubIdentifiableKey` and `SEC101/200.CommonAnnotatedSecurityKey`. A bug triggered fallback to slower scanning due to incorrect signatures being used.
- PRF: `IdentifiableScan` now implements high-performance scanning techniques in managed code. The performance has been found to be significantly better than the prior implementation via rust interop. This also reduces the size of the NuGet package size by a factor of 34 from 6.8 MB to 200 KB and adds support for non x86/x64 CPUs and non-Windows OSes.
- BUG: Correct `SEC000/002.Unclassified16ByteHexadecimalString` id and rule name on calling `GetMatchIdAndName` (where `SEC000/001.Unclassified64ByteBase64String` was returned incorrectly before).
- BUG: Resolve `System.FormatException: The input is not a valid Base-46 string` errors calling `SEC101/102.AdoPat.GetMatchIdAndName` by swallowing correct exception kind `ArgumentException` in `IsChecksumValid` helper.
- BUG: `?P<name>` is now used throughout for named captures as this is required currently for RE2 compatibility.

# 1.15.0 - 03/03/2025
- BRK: Regular expression syntax has been standardized in JSON to conform to how the overwhelming majority of patterns were already defined.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

<ItemGroup>
<ProjectReference Include="..\Microsoft.Security.Utilities.Core\Microsoft.Security.Utilities.Core.csproj" />
<ProjectReference Include="..\Tests.Microsoft.Security.Utilities.Core\Tests.Microsoft.Security.Utilities.Core.csproj" />
</ItemGroup>

</Project>
40 changes: 0 additions & 40 deletions src/Microsoft.Security.Utilities.Benchmarks/RE2RegexEngine.cs

This file was deleted.

9 changes: 7 additions & 2 deletions src/Microsoft.Security.Utilities.Core/CachedDotNetRegex.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,17 @@ public static Regex GetOrCreateRegex(string pattern, RegexOptions options)
{
var key = (pattern, options);
#if NET7_0_OR_GREATER
return RegexCache.GetOrAdd(key, key => new Regex(key.Pattern, key.Options | RegexOptions.Compiled | RegexOptions.NonBacktracking));
return RegexCache.GetOrAdd(key, key => new Regex(NormalizeGroupsPattern(key.Pattern), key.Options | RegexOptions.Compiled | RegexOptions.NonBacktracking));
#else
return RegexCache.GetOrAdd(key, key => new Regex(key.Pattern, key.Options | RegexOptions.Compiled));
return RegexCache.GetOrAdd(key, key => new Regex(NormalizeGroupsPattern(key.Pattern), key.Options | RegexOptions.Compiled));
#endif
}

internal static string NormalizeGroupsPattern(string pattern)
{
return pattern.Replace("?P<", "?<");
}

public bool IsMatch(string input, string pattern, RegexOptions options = RegexDefaults.DefaultOptionsCaseSensitive, TimeSpan timeout = default, string captureGroup = null)
{
// Note: Instance Regex.IsMatch has no timeout overload.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public Unclassified16ByteHexadecimalString()
DetectionMetadata = DetectionMetadata.HighEntropy | DetectionMetadata.Unclassified | DetectionMetadata.LowConfidence;
}

public override Tuple<string, string>? GetMatchIdAndName(string match) => new Tuple<string, string>("SEC000/001", "Unclassified64ByteBase64String");
public override Tuple<string, string>? GetMatchIdAndName(string match) => new Tuple<string, string>("SEC000/002", nameof(Unclassified16ByteHexadecimalString));
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

002

boo. cut and paste error. I added invariant tests that ensure no rules collide with each other in core metadata expression and also that GetMatchIdAndName results always match the declared rule id and name.

This is what we get for being 'cute' and allowing a single rule to be flexible in what ids it covers. That's useful but it tortures the design more than a little and leaves us vulnerable to bugs of this kind.


public override IEnumerable<string> GenerateTruePositiveExamples()
{
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public LooseSasSecret()
Id = "SEC101/060";
Name = nameof(LooseSasSecret);
DetectionMetadata = DetectionMetadata.HighEntropy | DetectionMetadata.MediumConfidence;
Pattern = @$"(?i)(?:^|[?;&])(?:dsas_secret|sig)=(?<refine>[0-9a-z\/+%]{{43,129}}(?:=|%3d))";
Pattern = @$"(?i)(?:^|[?;&])(?:dsas_secret|sig)=(?P<refine>[0-9a-z\/+%]{{43,129}}(?:=|%3d))";
Signatures = new HashSet<string>(new[] { "sig=", "ret=" });
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public OAuth2BearerToken()
DetectionMetadata = DetectionMetadata.LowConfidence;

// https://datatracker.ietf.org/doc/html/rfc6750#section-2.1
Pattern = @$"(?i)authorization:(\s|%20)bearer(\s|%20)(?<refine>[0-9a-z][{WellKnownRegexPatterns.UrlUnreserved}+\/=]*)([^{WellKnownRegexPatterns.UrlUnreserved}+/=]|$)";
Pattern = @$"(?i)authorization:(\s|%20)bearer(\s|%20)(?P<refine>[0-9a-z][{WellKnownRegexPatterns.UrlUnreserved}+\/=]*)([^{WellKnownRegexPatterns.UrlUnreserved}+/=]|$)";
}

public override IEnumerable<string> GenerateTruePositiveExamples()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public abstract class Azure32ByteIdentifiableKey : IdentifiableKey
{
public override string Pattern
{
get => @$"{WellKnownRegexPatterns.PrefixAllBase64}(?<refine>[{WellKnownRegexPatterns.Base64}]{{33}}{RegexNormalizedSignature}[A-P][{WellKnownRegexPatterns.Base64}]{{5}}=){WellKnownRegexPatterns.SuffixAllBase64}";
get => @$"{WellKnownRegexPatterns.PrefixAllBase64}(?P<refine>[{WellKnownRegexPatterns.Base64}]{{33}}{RegexNormalizedSignature}[A-P][{WellKnownRegexPatterns.Base64}]{{5}}=){WellKnownRegexPatterns.SuffixAllBase64}";
protected set => base.Pattern = value;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ internal sealed class Azure32ByteIdentifiableKeys : RegexPattern
public Azure32ByteIdentifiableKeys()
{
Pattern = $@"{WellKnownRegexPatterns.PrefixAllBase64}" +
$@"(?<refine>[{WellKnownRegexPatterns.Base64}]{{33}}(AIoT|\+(ASb|AEh|ARm))[A-P][{WellKnownRegexPatterns.Base64}]{{5}}=)" +
$@"(?P<refine>[{WellKnownRegexPatterns.Base64}]{{33}}(AIoT|\+(ASb|AEh|ARm))[A-P][{WellKnownRegexPatterns.Base64}]{{5}}=)" +
$@"{WellKnownRegexPatterns.SuffixAllBase64}";

RotationPeriod = TimeSpan.FromDays(365 * 2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public abstract class Azure64ByteIdentifiableKey : IdentifiableKey

public override string Pattern
{
get => $@"{WellKnownRegexPatterns.PrefixAllBase64}(?<refine>[{WellKnownRegexPatterns.Base64}]{{76}}{RegexNormalizedSignature}[{WellKnownRegexPatterns.Base64}]{{5}}[AQgw]==){WellKnownRegexPatterns.SuffixAllBase64}";
get => $@"{WellKnownRegexPatterns.PrefixAllBase64}(?P<refine>[{WellKnownRegexPatterns.Base64}]{{76}}{RegexNormalizedSignature}[{WellKnownRegexPatterns.Base64}]{{5}}[AQgw]==){WellKnownRegexPatterns.SuffixAllBase64}";
protected set => base.Pattern = value;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ internal sealed class Azure64ByteIdentifiableKeys : RegexPattern
public Azure64ByteIdentifiableKeys()
{
Pattern = $@"{WellKnownRegexPatterns.PrefixAllBase64}" +
$@"(?<refine>[{WellKnownRegexPatterns.Base64}]{{76}}(APIM|ACDb|\+(ABa|AMC|ASt))[{WellKnownRegexPatterns.Base64}]{{5}}[AQgw]==)" +
$@"(?P<refine>[{WellKnownRegexPatterns.Base64}]{{76}}(APIM|ACDb|\+(ABa|AMC|ASt))[{WellKnownRegexPatterns.Base64}]{{5}}[AQgw]==)" +
$@"{WellKnownRegexPatterns.SuffixAllBase64}";

RotationPeriod = TimeSpan.FromDays(365 * 2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public NuGetApiKey()

// This is the ApiKeyV4 format implemented here:
// https://github.com/NuGet/NuGetGallery/blob/main/src/NuGetGallery.Services/Authentication/ApiKeyV4.cs
Pattern = "(?i)(^|[^a-z0-9])(?<refine>oy2[a-z2-7]{43})([^a-z0-9]|$)";
Pattern = "(?i)(^|[^a-z0-9])(?P<refine>oy2[a-z2-7]{43})([^a-z0-9]|$)";

Signatures = new HashSet<string>(new[] { "oy2", "OY2" });
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public NpmAuthorKey()
Id = "SEC101/050";
Name = nameof(NpmAuthorKey);
DetectionMetadata = DetectionMetadata.FixedSignature | DetectionMetadata.HighEntropy | DetectionMetadata.HighConfidence;
Pattern = @$"{WellKnownRegexPatterns.PrefixBase62}(?<refine>npm_[{WellKnownRegexPatterns.Base62}]{{36}}){WellKnownRegexPatterns.SuffixBase62}";
Pattern = @$"{WellKnownRegexPatterns.PrefixBase62}(?P<refine>npm_[{WellKnownRegexPatterns.Base62}]{{36}}){WellKnownRegexPatterns.SuffixBase62}";
Signatures = new HashSet<string>(new[] { "npm_" });
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@

namespace Microsoft.Security.Utilities
{
public class AdoPat : RegexPattern
public class AdoLegacyPat : RegexPattern
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AdoLegacyPat

This rule no longer applies to the latest ADO PAT format and thefore I'm renaming it.

{
private static readonly byte[] EmptyByteArray = new byte[0];

public AdoPat()
public AdoLegacyPat()
{
Id = "SEC101/102";
Name = nameof(AdoPat);
Name = nameof(AdoLegacyPat);
DetectionMetadata = DetectionMetadata.HighEntropy | DetectionMetadata.EmbeddedChecksum;
Pattern = "(?:[^2-7a-z]|^)(?<refine>[2-7a-z]{52})(?:[^2-7a-z]|$)";
Pattern = "(?:[^2-7a-z]|^)(?P<refine>[2-7a-z]{52})(?:[^2-7a-z]|$)";
}

public override Tuple<string, string> GetMatchIdAndName(string match)
Expand Down Expand Up @@ -42,7 +42,7 @@ private static bool IsChecksumValid(string input, uint magicNumber)
{
inputBytes = ConvertFromBase32(input);
}
catch (FormatException)
catch (ArgumentException)
{
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public AzureCosmosDBLegacyCredentials()
Id = "SEC101/104";
Name = nameof(AzureCosmosDBLegacyCredentials);
DetectionMetadata = DetectionMetadata.HighEntropy | DetectionMetadata.ObsoleteFormat;
Pattern = "(?i)\\.documents\\.azure\\.com.+(?:^|[^0-9a-z\\/+])(?<refine>[0-9a-z\\/+]{86}==)(?:[^=]|$)";
Pattern = "(?i)\\.documents\\.azure\\.com.+(?:^|[^0-9a-z\\/+])(?P<refine>[0-9a-z\\/+]{86}==)(?:[^=]|$)";
}

public override Tuple<string, string> GetMatchIdAndName(string match)
Expand Down
Loading
Loading