Skip to content

Commit

Permalink
Optimize node storage for SymbolTreeInfo
Browse files Browse the repository at this point in the history
  • Loading branch information
sharwell committed Oct 28, 2020
1 parent 1745e06 commit 4d8ba56
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 99 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ internal partial class SymbolTreeInfo
/// <see cref="BuilderNode"/>s are produced when initially creating our indices.
/// They store Names of symbols and the index of their parent symbol. When we
/// produce the final <see cref="SymbolTreeInfo"/> though we will then convert
/// these to <see cref="Node"/>s. Those nodes will not point to individual
/// strings, but will instead point at <see cref="_concatenatedNames"/>.
/// these to <see cref="Node"/>s.
/// </summary>
[DebuggerDisplay("{GetDebuggerDisplay(),nq}")]
private struct BuilderNode
Expand Down Expand Up @@ -50,9 +49,9 @@ private string GetDebuggerDisplay()
private struct Node
{
/// <summary>
/// Span in <see cref="_concatenatedNames"/> of the Name of this Node.
/// The Name of this Node.
/// </summary>
public readonly TextSpan NameSpan;
public readonly string Name;

/// <summary>
/// Index in <see cref="_nodes"/> of the parent Node of this Node.
Expand All @@ -61,22 +60,22 @@ private struct Node
/// </summary>
public readonly int ParentIndex;

public Node(TextSpan wordSpan, int parentIndex)
public Node(string name, int parentIndex)
{
NameSpan = wordSpan;
Name = name;
ParentIndex = parentIndex;
}

public bool IsRoot => ParentIndex == RootNodeParentIndex;

public void AssertEquivalentTo(Node node)
{
Debug.Assert(node.NameSpan == this.NameSpan);
Debug.Assert(node.Name == this.Name);
Debug.Assert(node.ParentIndex == this.ParentIndex);
}

private string GetDebuggerDisplay()
=> NameSpan + ", " + ParentIndex;
=> Name + ", " + ParentIndex;
}

private readonly struct ParameterTypeInfo
Expand Down
109 changes: 43 additions & 66 deletions src/Workspaces/Core/Portable/FindSymbols/SymbolTree/SymbolTreeInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,6 @@ internal partial class SymbolTreeInfo : IChecksummedObject
{
public Checksum Checksum { get; }

/// <summary>
/// To prevent lots of allocations, we concatenate all the names in all our
/// Nodes into one long string. Each Node then just points at the span in
/// this string with the portion they care about.
/// </summary>
private readonly string _concatenatedNames;

/// <summary>
/// The list of nodes that represent symbols. The primary key into the sorting of this
/// list is the name. They are sorted case-insensitively with the <see cref="s_totalComparer" />.
Expand Down Expand Up @@ -102,27 +95,24 @@ public MultiDictionary<string, ExtensionMethodInfo>.ValueSet GetExtensionMethodI

private SymbolTreeInfo(
Checksum checksum,
string concatenatedNames,
ImmutableArray<Node> sortedNodes,
Task<SpellChecker> spellCheckerTask,
OrderPreservingMultiDictionary<string, string> inheritanceMap,
MultiDictionary<string, ExtensionMethodInfo> receiverTypeNameToExtensionMethodMap)
: this(checksum, concatenatedNames, sortedNodes, spellCheckerTask,
CreateIndexBasedInheritanceMap(concatenatedNames, sortedNodes, inheritanceMap),
: this(checksum, sortedNodes, spellCheckerTask,
CreateIndexBasedInheritanceMap(sortedNodes, inheritanceMap),
receiverTypeNameToExtensionMethodMap)
{
}

private SymbolTreeInfo(
Checksum checksum,
string concatenatedNames,
ImmutableArray<Node> sortedNodes,
Task<SpellChecker> spellCheckerTask,
OrderPreservingMultiDictionary<int, int> inheritanceMap,
MultiDictionary<string, ExtensionMethodInfo>? receiverTypeNameToExtensionMethodMap)
{
Checksum = checksum;
_concatenatedNames = concatenatedNames;
_nodes = sortedNodes;
_spellCheckerTask = spellCheckerTask;
_inheritanceMap = inheritanceMap;
Expand All @@ -132,18 +122,18 @@ private SymbolTreeInfo(
public static SymbolTreeInfo CreateEmpty(Checksum checksum)
{
var unsortedNodes = ImmutableArray.Create(BuilderNode.RootNode);
SortNodes(unsortedNodes, out var concatenatedNames, out var sortedNodes);
SortNodes(unsortedNodes, out var sortedNodes);

return new SymbolTreeInfo(checksum, concatenatedNames, sortedNodes,
CreateSpellCheckerAsync(checksum, concatenatedNames, sortedNodes),
return new SymbolTreeInfo(checksum, sortedNodes,
CreateSpellCheckerAsync(checksum, sortedNodes),
new OrderPreservingMultiDictionary<string, string>(),
new MultiDictionary<string, ExtensionMethodInfo>());
}

public SymbolTreeInfo WithChecksum(Checksum checksum)
{
return new SymbolTreeInfo(
checksum, _concatenatedNames, _nodes, _spellCheckerTask, _inheritanceMap, _receiverTypeNameToExtensionMethodMap);
checksum, _nodes, _spellCheckerTask, _inheritanceMap, _receiverTypeNameToExtensionMethodMap);
}

public Task<ImmutableArray<ISymbol>> FindAsync(
Expand Down Expand Up @@ -223,9 +213,12 @@ private async Task<ImmutableArray<ISymbol>> FindAsync(
CancellationToken cancellationToken)
{
var comparer = GetComparer(ignoreCase);
ArrayBuilder<ISymbol>? results = null;
IAssemblySymbol? assemblySymbol = null;

// PERF: Only allocate on first usage to avoid taking an object from the pool on this hot path that never
// gets used.
ArrayBuilder<ISymbol>? results = null;

foreach (var node in FindNodeIndices(name, comparer))
{
cancellationToken.ThrowIfCancellationRequested();
Expand All @@ -245,42 +238,42 @@ private static StringSliceComparer GetComparer(bool ignoreCase)
}

private IEnumerable<int> FindNodeIndices(string name, StringSliceComparer comparer)
=> FindNodeIndices(_concatenatedNames, _nodes, name, comparer);
=> FindNodeIndices(_nodes, name, comparer);

/// <summary>
/// Gets all the node indices with matching names per the <paramref name="comparer" />.
/// </summary>
private static IEnumerable<int> FindNodeIndices(
string concatenatedNames, ImmutableArray<Node> nodes,
ImmutableArray<Node> nodes,
string name, StringSliceComparer comparer)
{
// find any node that matches case-insensitively
var startingPosition = BinarySearch(concatenatedNames, nodes, name);
var startingPosition = BinarySearch(nodes, name);
var nameSlice = new StringSlice(name);

if (startingPosition != -1)
{
// yield if this matches by the actual given comparer
if (comparer.Equals(nameSlice, GetNameSlice(concatenatedNames, nodes, startingPosition)))
if (comparer.Equals(nameSlice, GetNameSlice(nodes, startingPosition)))
{
yield return startingPosition;
}

var position = startingPosition;
while (position > 0 && s_caseInsensitiveComparer.Equals(GetNameSlice(concatenatedNames, nodes, position - 1), nameSlice))
while (position > 0 && s_caseInsensitiveComparer.Equals(GetNameSlice(nodes, position - 1), nameSlice))
{
position--;
if (comparer.Equals(GetNameSlice(concatenatedNames, nodes, position), nameSlice))
if (comparer.Equals(GetNameSlice(nodes, position), nameSlice))
{
yield return position;
}
}

position = startingPosition;
while (position + 1 < nodes.Length && s_caseInsensitiveComparer.Equals(GetNameSlice(concatenatedNames, nodes, position + 1), nameSlice))
while (position + 1 < nodes.Length && s_caseInsensitiveComparer.Equals(GetNameSlice(nodes, position + 1), nameSlice))
{
position++;
if (comparer.Equals(GetNameSlice(concatenatedNames, nodes, position), nameSlice))
if (comparer.Equals(GetNameSlice(nodes, position), nameSlice))
{
yield return position;
}
Expand All @@ -289,18 +282,18 @@ private static IEnumerable<int> FindNodeIndices(
}

private static StringSlice GetNameSlice(
string concatenatedNames, ImmutableArray<Node> nodes, int nodeIndex)
ImmutableArray<Node> nodes, int nodeIndex)
{
return new StringSlice(concatenatedNames, nodes[nodeIndex].NameSpan);
return new StringSlice(nodes[nodeIndex].Name);
}

private int BinarySearch(string name)
=> BinarySearch(_concatenatedNames, _nodes, name);
=> BinarySearch(_nodes, name);

/// <summary>
/// Searches for a name in the ordered list that matches per the <see cref="s_caseInsensitiveComparer" />.
/// </summary>
private static int BinarySearch(string concatenatedNames, ImmutableArray<Node> nodes, string name)
private static int BinarySearch(ImmutableArray<Node> nodes, string name)
{
var nameSlice = new StringSlice(name);
var max = nodes.Length - 1;
Expand All @@ -311,7 +304,7 @@ private static int BinarySearch(string concatenatedNames, ImmutableArray<Node> n
var mid = min + ((max - min) >> 1);

var comparison = s_caseInsensitiveComparer.Compare(
GetNameSlice(concatenatedNames, nodes, mid), nameSlice);
GetNameSlice(nodes, mid), nameSlice);
if (comparison < 0)
{
min = mid + 1;
Expand Down Expand Up @@ -350,26 +343,25 @@ private static int BinarySearch(string concatenatedNames, ImmutableArray<Node> n

private static Task<SpellChecker> GetSpellCheckerAsync(
Solution solution, Checksum checksum, string filePath,
string concatenatedNames, ImmutableArray<Node> sortedNodes)
ImmutableArray<Node> sortedNodes)
{
// Create a new task to attempt to load or create the spell checker for this
// SymbolTreeInfo. This way the SymbolTreeInfo will be ready immediately
// for non-fuzzy searches, and soon afterwards it will be able to perform
// fuzzy searches as well.
return Task.Run(() => LoadOrCreateSpellCheckerAsync(
solution, checksum, filePath, concatenatedNames, sortedNodes));
solution, checksum, filePath, sortedNodes));
}

private static Task<SpellChecker> CreateSpellCheckerAsync(
Checksum checksum, string concatenatedNames, ImmutableArray<Node> sortedNodes)
Checksum checksum, ImmutableArray<Node> sortedNodes)
{
return Task.FromResult(new SpellChecker(
checksum, sortedNodes.Select(n => new StringSlice(concatenatedNames, n.NameSpan))));
checksum, sortedNodes.Select(n => new StringSlice(n.Name))));
}

private static void SortNodes(
ImmutableArray<BuilderNode> unsortedNodes,
out string concatenatedNames,
out ImmutableArray<Node> sortedNodes)
{
// Generate index numbers from 0 to Count-1
Expand Down Expand Up @@ -397,7 +389,6 @@ private static void SortNodes(
var result = ArrayBuilder<Node>.GetInstance(unsortedNodes.Length);
result.Count = unsortedNodes.Length;

var concatenatedNamesBuilder = new StringBuilder();
string? lastName = null;

// Copy nodes into the result array in the appropriate order and fixing
Expand All @@ -407,24 +398,20 @@ private static void SortNodes(
var n = unsortedNodes[i];
var currentName = n.Name;

// Don't bother adding the exact same name the concatenated sequence
// over and over again. This can trivially happen because we'll run
// into the same names with different parents all through metadata
// and source symbols.
if (currentName != lastName)
// De-duplicate identical strings
if (currentName == lastName)
{
concatenatedNamesBuilder.Append(currentName);
currentName = lastName;
}

result[ranking[i]] = new Node(
new TextSpan(concatenatedNamesBuilder.Length - currentName.Length, currentName.Length),
currentName,
n.IsRoot ? n.ParentIndex : ranking[n.ParentIndex]);

lastName = currentName;
}

sortedNodes = result.ToImmutableAndFree();
concatenatedNames = concatenatedNamesBuilder.ToString();
}

private static int CompareNodes(
Expand Down Expand Up @@ -471,7 +458,7 @@ private void Bind(

if (_nodes[node.ParentIndex].IsRoot)
{
var members = rootContainer.GetMembers(GetName(node));
var members = rootContainer.GetMembers(node.Name);
if (!members.IsEmpty)
{
results ??= ArrayBuilder<ISymbol>.GetInstance();
Expand All @@ -480,6 +467,8 @@ private void Bind(
}
else
{
// PERF: Only allocate on first usage to avoid taking an object from the pool on this hot path that
// never gets used.
ArrayBuilder<ISymbol>? containerSymbols = null;
try
{
Expand All @@ -492,7 +481,7 @@ private void Bind(

if (containerSymbol is INamespaceOrTypeSymbol nsOrType)
{
var members = nsOrType.GetMembers(GetName(node));
var members = nsOrType.GetMembers(node.Name);
if (!members.IsEmpty)
{
results ??= ArrayBuilder<ISymbol>.GetInstance();
Expand All @@ -509,25 +498,11 @@ private void Bind(
}
}

private string GetName(Node node)
{
// TODO(cyrusn): We could consider caching the strings we create in the
// Nodes themselves. i.e. we could have a field in the node where the
// string could be stored once created. The reason i'm not doing that now
// is because, in general, we shouldn't actually be allocating that many
// strings here. This data structure is not in a hot path, and does not
// have a usage pattern where may strings are accessed in it. Rather,
// some features generally use it to just see if they can find a symbol
// corresponding to a single name. As such, caching doesn't seem valuable.
return _concatenatedNames.Substring(node.NameSpan.Start, node.NameSpan.Length);
}

#endregion

internal void AssertEquivalentTo(SymbolTreeInfo other)
{
Debug.Assert(Checksum.Equals(other.Checksum));
Debug.Assert(_concatenatedNames == other._concatenatedNames);
Debug.Assert(_nodes.Length == other._nodes.Length);

for (int i = 0, n = _nodes.Length; i < n; i++)
Expand Down Expand Up @@ -557,18 +532,18 @@ private static SymbolTreeInfo CreateSymbolTreeInfo(
OrderPreservingMultiDictionary<string, string> inheritanceMap,
MultiDictionary<string, ExtensionMethodInfo> simpleMethods)
{
SortNodes(unsortedNodes, out var concatenatedNames, out var sortedNodes);
SortNodes(unsortedNodes, out var sortedNodes);
var createSpellCheckerTask = GetSpellCheckerAsync(
solution, checksum, filePath, concatenatedNames, sortedNodes);
solution, checksum, filePath, sortedNodes);

return new SymbolTreeInfo(
checksum, concatenatedNames,
checksum,
sortedNodes, createSpellCheckerTask, inheritanceMap,
simpleMethods);
}

private static OrderPreservingMultiDictionary<int, int> CreateIndexBasedInheritanceMap(
string concatenatedNames, ImmutableArray<Node> nodes,
ImmutableArray<Node> nodes,
OrderPreservingMultiDictionary<string, string> inheritanceMap)
{
// All names in metadata will be case sensitive.
Expand All @@ -578,12 +553,12 @@ private static OrderPreservingMultiDictionary<int, int> CreateIndexBasedInherita
foreach (var kvp in inheritanceMap)
{
var baseName = kvp.Key;
var baseNameIndex = BinarySearch(concatenatedNames, nodes, baseName);
var baseNameIndex = BinarySearch(nodes, baseName);
Debug.Assert(baseNameIndex >= 0);

foreach (var derivedName in kvp.Value)
{
foreach (var derivedNameIndex in FindNodeIndices(concatenatedNames, nodes, derivedName, comparer))
foreach (var derivedNameIndex in FindNodeIndices(nodes, derivedName, comparer))
{
result.Add(baseNameIndex, derivedNameIndex);
}
Expand All @@ -599,6 +574,8 @@ public ImmutableArray<INamedTypeSymbol> GetDerivedMetadataTypes(
var baseTypeNameIndex = BinarySearch(baseTypeName);
var derivedTypeIndices = _inheritanceMap[baseTypeNameIndex];

// PERF: Only allocate on first usage to avoid taking objects from the pool on this hot path that never get
// used.
ArrayBuilder<INamedTypeSymbol>? builder = null;
ArrayBuilder<ISymbol>? tempBuilder = null;
try
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ private static Task<SymbolTreeInfo> TryLoadOrCreateMetadataSymbolTreeInfoAsync(
loadOnly,
createAsync: () => CreateMetadataSymbolTreeInfoAsync(solution, checksum, reference),
keySuffix: "_Metadata_" + filePath,
tryReadObject: reader => TryReadSymbolTreeInfo(reader, checksum, (names, nodes) => GetSpellCheckerAsync(solution, checksum, filePath, names, nodes)),
tryReadObject: reader => TryReadSymbolTreeInfo(reader, checksum, nodes => GetSpellCheckerAsync(solution, checksum, filePath, nodes)),
cancellationToken: cancellationToken);
Contract.ThrowIfFalse(result != null || loadOnly == true, "Result can only be null if 'loadOnly: true' was passed.");
return result;
Expand Down
Loading

0 comments on commit 4d8ba56

Please sign in to comment.