Skip to content

Commit 11b2716

Browse files
committed
Add SanitizeDocument overload that takes a Stream
Fixes #158
1 parent dfd5e9d commit 11b2716

File tree

3 files changed

+64
-14
lines changed

3 files changed

+64
-14
lines changed

src/HtmlSanitizer/HtmlSanitizer.cs

+22
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
using System;
1010
using System.Collections.Generic;
1111
using System.Globalization;
12+
using System.IO;
1213
using System.Linq;
1314
using System.Text.RegularExpressions;
1415

@@ -490,6 +491,27 @@ public string SanitizeDocument(string html, string baseUrl = "", IMarkupFormatte
490491
}
491492
}
492493

494+
/// <summary>
495+
/// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned.
496+
/// </summary>
497+
/// <param name="html">The HTML document to sanitize.</param>
498+
/// <param name="baseUrl">The base URL relative URLs are resolved against. No resolution if empty.</param>
499+
/// <param name="outputFormatter">The formatter used to render the DOM. Using the <see cref="OutputFormatter"/> if null.</param>
500+
/// <returns>The sanitized HTML document.</returns>
501+
public string SanitizeDocument(Stream html, string baseUrl = "", IMarkupFormatter outputFormatter = null)
502+
{
503+
var parser = HtmlParserFactory();
504+
505+
using (var dom = parser.Parse(html))
506+
{
507+
DoSanitize(dom, dom.DocumentElement, baseUrl);
508+
509+
var output = dom.ToHtml(outputFormatter ?? OutputFormatter);
510+
511+
return output;
512+
}
513+
}
514+
493515
/// <summary>
494516
/// Creeates an instance of <see cref="HtmlParser"/>.
495517
/// </summary>

test/HtmlSanitizer.Tests/HtmlSanitizer.Tests.csproj

+18-14
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<Project Sdk="Microsoft.NET.Sdk">
22

33
<PropertyGroup>
4-
<TargetFrameworks>netcoreapp2.1;netcoreapp2.0;net452</TargetFrameworks>
4+
<TargetFrameworks>netcoreapp2.1;netcoreapp2.0;net46</TargetFrameworks>
55
<AssemblyName>HtmlSanitizer.Tests</AssemblyName>
66
<PackageId>HtmlSanitizer.Tests</PackageId>
77
<GenerateRuntimeConfigurationFiles>true</GenerateRuntimeConfigurationFiles>
@@ -23,19 +23,23 @@
2323
</ItemGroup>
2424

2525
<ItemGroup>
26-
<PackageReference Include="coverlet.msbuild" Version="2.1.1" />
27-
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.8.0" />
28-
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.0" />
29-
<PackageReference Include="xunit.runner.console" Version="2.4.0" />
30-
<PackageReference Include="xunit" Version="2.4.0" />
31-
</ItemGroup>
32-
33-
<ItemGroup Condition=" '$(TargetFramework)' == 'netcoreapp2.0' ">
34-
</ItemGroup>
35-
36-
<ItemGroup Condition=" '$(TargetFramework)' == 'net452' ">
37-
<Reference Include="System" />
38-
<Reference Include="Microsoft.CSharp" />
26+
<PackageReference Include="coverlet.msbuild" Version="2.5.1">
27+
<PrivateAssets>all</PrivateAssets>
28+
<IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
29+
</PackageReference>
30+
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.9.0" />
31+
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.1">
32+
<PrivateAssets>all</PrivateAssets>
33+
<IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
34+
</PackageReference>
35+
<PackageReference Include="xunit.runner.console" Version="2.4.1">
36+
<PrivateAssets>all</PrivateAssets>
37+
<IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
38+
</PackageReference>
39+
<PackageReference Include="xunit" Version="2.4.1" />
40+
<PackageReference Include="System.Text.Encoding.CodePages">
41+
<Version>4.5.1</Version>
42+
</PackageReference>
3943
</ItemGroup>
4044

4145
<ItemGroup>

test/HtmlSanitizer.Tests/Tests.cs

+24
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
using AngleSharp.Dom.Css;
1111
using System.Threading;
1212
using System.Reflection;
13+
using System.IO;
14+
using System.Text;
1315

1416
// Tests based on tests from http://roadkill.codeplex.com/
1517

@@ -36,6 +38,7 @@ public class HtmlSanitizerTests: IClassFixture<HtmlSanitizerFixture>
3638

3739
public HtmlSanitizerTests(HtmlSanitizerFixture fixture)
3840
{
41+
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
3942
Sanitizer = fixture.Sanitizer;
4043
}
4144

@@ -3097,6 +3100,27 @@ public void FilterUrlTest()
30973100

30983101
Assert.Equal(@"<img src=""https://www.example.com/test.png"">", actual);
30993102
}
3103+
3104+
3105+
[Fact]
3106+
public void EncodingTest()
3107+
{
3108+
// https://github.com/mganss/HtmlSanitizer/issues/158
3109+
3110+
var sanitizer = new HtmlSanitizer();
3111+
sanitizer.AllowedTags.Add("meta");
3112+
sanitizer.AllowedAttributes.Add("http-equiv");
3113+
sanitizer.AllowedAttributes.Add("content");
3114+
3115+
var html = @"<html><head><meta http-equiv=""Content-Type"" content=""text/html; charset=iso-8859-1""></head><body>kopieën</body></html>";
3116+
3117+
using (var stream = new MemoryStream(Encoding.GetEncoding("iso-8859-1").GetBytes(html)))
3118+
{
3119+
var actual = sanitizer.SanitizeDocument(stream);
3120+
3121+
Assert.Equal(html, actual);
3122+
}
3123+
}
31003124
}
31013125
}
31023126

0 commit comments

Comments
 (0)