Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[clangd] [Modules] Support Reusable Modules Builder #106683

Merged
merged 7 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
317 changes: 213 additions & 104 deletions clang-tools-extra/clangd/ModulesBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include "clang/Frontend/FrontendActions.h"
#include "clang/Serialization/ASTReader.h"
#include "clang/Serialization/InMemoryModuleCache.h"
#include "llvm/ADT/ScopeExit.h"
#include <queue>

namespace clang {
namespace clangd {
Expand Down Expand Up @@ -124,10 +126,58 @@ struct ModuleFile {
llvm::sys::fs::remove(ModuleFilePath);
}

StringRef getModuleName() const { return ModuleName; }

StringRef getModuleFilePath() const { return ModuleFilePath; }

private:
std::string ModuleName;
std::string ModuleFilePath;
};

// ReusablePrerequisiteModules - stands for PrerequisiteModules for which all
// the required modules are built successfully. All the module files
// are owned by the modules builder.
class ReusablePrerequisiteModules : public PrerequisiteModules {
public:
ReusablePrerequisiteModules() = default;

ReusablePrerequisiteModules(const ReusablePrerequisiteModules &Other) =
default;
ReusablePrerequisiteModules &
operator=(const ReusablePrerequisiteModules &) = default;
ReusablePrerequisiteModules(ReusablePrerequisiteModules &&) = delete;
ReusablePrerequisiteModules
operator=(ReusablePrerequisiteModules &&) = delete;

~ReusablePrerequisiteModules() override = default;

void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override {
// Appending all built module files.
for (const auto &RequiredModule : RequiredModules)
Options.PrebuiltModuleFiles.insert_or_assign(
RequiredModule->getModuleName().str(),
RequiredModule->getModuleFilePath().str());
}

bool canReuse(const CompilerInvocation &CI,
llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) const override;

bool isModuleUnitBuilt(llvm::StringRef ModuleName) const {
return BuiltModuleNames.contains(ModuleName);
}

void addModuleFile(std::shared_ptr<const ModuleFile> ModuleFile) {
BuiltModuleNames.insert(ModuleFile->getModuleName());
RequiredModules.emplace_back(std::move(ModuleFile));
}

private:
llvm::SmallVector<std::shared_ptr<const ModuleFile>, 8> RequiredModules;
// A helper class to speedup the query if a module is built.
llvm::StringSet<> BuiltModuleNames;
};

bool IsModuleFileUpToDate(PathRef ModuleFilePath,
const PrerequisiteModules &RequisiteModules,
llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) {
Expand Down Expand Up @@ -192,89 +242,20 @@ bool IsModuleFilesUpToDate(
});
}

// StandalonePrerequisiteModules - stands for PrerequisiteModules for which all
// the required modules are built successfully. All the module files
// are owned by the StandalonePrerequisiteModules class.
//
// Any of the built module files won't be shared with other instances of the
// class. So that we can avoid worrying thread safety.
//
// We don't need to worry about duplicated module names here since the standard
// guarantees the module names should be unique to a program.
class StandalonePrerequisiteModules : public PrerequisiteModules {
public:
StandalonePrerequisiteModules() = default;

StandalonePrerequisiteModules(const StandalonePrerequisiteModules &) = delete;
StandalonePrerequisiteModules
operator=(const StandalonePrerequisiteModules &) = delete;
StandalonePrerequisiteModules(StandalonePrerequisiteModules &&) = delete;
StandalonePrerequisiteModules
operator=(StandalonePrerequisiteModules &&) = delete;

~StandalonePrerequisiteModules() override = default;

void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override {
// Appending all built module files.
for (auto &RequiredModule : RequiredModules)
Options.PrebuiltModuleFiles.insert_or_assign(
RequiredModule.ModuleName, RequiredModule.ModuleFilePath);
}

bool canReuse(const CompilerInvocation &CI,
llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) const override;

bool isModuleUnitBuilt(llvm::StringRef ModuleName) const {
return BuiltModuleNames.contains(ModuleName);
}

void addModuleFile(llvm::StringRef ModuleName,
llvm::StringRef ModuleFilePath) {
RequiredModules.emplace_back(ModuleName, ModuleFilePath);
BuiltModuleNames.insert(ModuleName);
}

private:
llvm::SmallVector<ModuleFile, 8> RequiredModules;
// A helper class to speedup the query if a module is built.
llvm::StringSet<> BuiltModuleNames;
};

// Build a module file for module with `ModuleName`. The information of built
// module file are stored in \param BuiltModuleFiles.
llvm::Error buildModuleFile(llvm::StringRef ModuleName,
const GlobalCompilationDatabase &CDB,
const ThreadsafeFS &TFS, ProjectModules &MDB,
PathRef ModuleFilesPrefix,
StandalonePrerequisiteModules &BuiltModuleFiles) {
if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
return llvm::Error::success();

PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName);
// It is possible that we're meeting third party modules (modules whose
// source are not in the project. e.g, the std module may be a third-party
// module for most projects) or something wrong with the implementation of
// ProjectModules.
// FIXME: How should we treat third party modules here? If we want to ignore
// third party modules, we should return true instead of false here.
// Currently we simply bail out.
if (ModuleUnitFileName.empty())
return llvm::createStringError("Failed to get the primary source");

/// Build a module file for module with `ModuleName`. The information of built
/// module file are stored in \param BuiltModuleFiles.
llvm::Expected<ModuleFile>
buildModuleFile(llvm::StringRef ModuleName, PathRef ModuleUnitFileName,
const GlobalCompilationDatabase &CDB, const ThreadsafeFS &TFS,
const ReusablePrerequisiteModules &BuiltModuleFiles) {
// Try cheap operation earlier to boil-out cheaply if there are problems.
auto Cmd = CDB.getCompileCommand(ModuleUnitFileName);
if (!Cmd)
return llvm::createStringError(
llvm::formatv("No compile command for {0}", ModuleUnitFileName));

for (auto &RequiredModuleName : MDB.getRequiredModules(ModuleUnitFileName)) {
// Return early if there are errors building the module file.
if (llvm::Error Err = buildModuleFile(RequiredModuleName, CDB, TFS, MDB,
ModuleFilesPrefix, BuiltModuleFiles))
return llvm::createStringError(
llvm::formatv("Failed to build dependency {0}: {1}",
RequiredModuleName, llvm::toString(std::move(Err))));
}
llvm::SmallString<256> ModuleFilesPrefix =
getUniqueModuleFilesPath(ModuleUnitFileName);

Cmd->Output = getModuleFilePath(ModuleName, ModuleFilesPrefix);

Expand Down Expand Up @@ -316,58 +297,186 @@ llvm::Error buildModuleFile(llvm::StringRef ModuleName,
if (Clang->getDiagnostics().hasErrorOccurred())
return llvm::createStringError("Compilation failed");

BuiltModuleFiles.addModuleFile(ModuleName, Inputs.CompileCommand.Output);
return llvm::Error::success();
return ModuleFile{ModuleName, Inputs.CompileCommand.Output};
}

bool ReusablePrerequisiteModules::canReuse(
const CompilerInvocation &CI,
llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) const {
if (RequiredModules.empty())
return true;

llvm::SmallVector<llvm::StringRef> BMIPaths;
for (auto &MF : RequiredModules)
BMIPaths.push_back(MF->getModuleFilePath());
return IsModuleFilesUpToDate(BMIPaths, *this, VFS);
}

class ModuleFileCache {
public:
ModuleFileCache(const GlobalCompilationDatabase &CDB) : CDB(CDB) {}
const GlobalCompilationDatabase &getCDB() const { return CDB; }

std::shared_ptr<const ModuleFile> getModule(StringRef ModuleName);

void add(StringRef ModuleName, std::shared_ptr<const ModuleFile> ModuleFile) {
std::lock_guard<std::mutex> Lock(ModuleFilesMutex);

ModuleFiles[ModuleName] = ModuleFile;
}

void remove(StringRef ModuleName);

private:
const GlobalCompilationDatabase &CDB;

llvm::StringMap<std::weak_ptr<const ModuleFile>> ModuleFiles;
// Mutex to guard accesses to ModuleFiles.
std::mutex ModuleFilesMutex;
};

std::shared_ptr<const ModuleFile>
ModuleFileCache::getModule(StringRef ModuleName) {
std::lock_guard<std::mutex> Lock(ModuleFilesMutex);

auto Iter = ModuleFiles.find(ModuleName);
if (Iter == ModuleFiles.end())
return nullptr;

if (auto Res = Iter->second.lock())
return Res;

ModuleFiles.erase(Iter);
return nullptr;
}

void ModuleFileCache::remove(StringRef ModuleName) {
std::lock_guard<std::mutex> Lock(ModuleFilesMutex);

ModuleFiles.erase(ModuleName);
}

/// Collect the directly and indirectly required module names for \param
/// ModuleName in topological order. The \param ModuleName is guaranteed to
/// be the last element in \param ModuleNames.
llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
StringRef ModuleName) {
llvm::SmallVector<llvm::StringRef> ModuleNames;
llvm::StringSet<> ModuleNamesSet;

auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void {
ModuleNamesSet.insert(ModuleName);

for (StringRef RequiredModuleName :
MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName)))
if (ModuleNamesSet.insert(RequiredModuleName).second)
Visitor(RequiredModuleName, Visitor);

ModuleNames.push_back(ModuleName);
};
VisitDeps(ModuleName, VisitDeps);

return ModuleNames;
}

} // namespace

class ModulesBuilder::ModulesBuilderImpl {
public:
ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) : Cache(CDB) {}

const GlobalCompilationDatabase &getCDB() const { return Cache.getCDB(); }

llvm::Error
getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS,
ProjectModules &MDB,
ReusablePrerequisiteModules &BuiltModuleFiles);

private:
ModuleFileCache Cache;
};

llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB,
ReusablePrerequisiteModules &BuiltModuleFiles) {
if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
return llvm::Error::success();

PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName);
/// It is possible that we're meeting third party modules (modules whose
/// source are not in the project. e.g, the std module may be a third-party
/// module for most project) or something wrong with the implementation of
/// ProjectModules.
/// FIXME: How should we treat third party modules here? If we want to ignore
/// third party modules, we should return true instead of false here.
/// Currently we simply bail out.
if (ModuleUnitFileName.empty())
return llvm::createStringError(
llvm::formatv("Don't get the module unit for module {0}", ModuleName));

// Get Required modules in topological order.
auto ReqModuleNames = getAllRequiredModules(MDB, ModuleName);
for (llvm::StringRef ReqModuleName : ReqModuleNames) {
if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
continue;

if (auto Cached = Cache.getModule(ReqModuleName)) {
if (IsModuleFileUpToDate(Cached->getModuleFilePath(), BuiltModuleFiles,
TFS.view(std::nullopt))) {
log("Reusing module {0} from {1}", ModuleName,
Cached->getModuleFilePath());
BuiltModuleFiles.addModuleFile(std::move(Cached));
continue;
}
Cache.remove(ReqModuleName);
}

llvm::Expected<ModuleFile> MF = buildModuleFile(
ModuleName, ModuleUnitFileName, getCDB(), TFS, BuiltModuleFiles);
if (llvm::Error Err = MF.takeError())
return Err;

log("Built module {0} to {1}", ModuleName, MF->getModuleFilePath());
auto BuiltModuleFile = std::make_shared<const ModuleFile>(std::move(*MF));
Cache.add(ModuleName, BuiltModuleFile);
BuiltModuleFiles.addModuleFile(std::move(BuiltModuleFile));
}

return llvm::Error::success();
}

std::unique_ptr<PrerequisiteModules>
ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
const ThreadsafeFS &TFS) const {
std::unique_ptr<ProjectModules> MDB = CDB.getProjectModules(File);
const ThreadsafeFS &TFS) {
std::unique_ptr<ProjectModules> MDB = Impl->getCDB().getProjectModules(File);
if (!MDB) {
elog("Failed to get Project Modules information for {0}", File);
return std::make_unique<FailedPrerequisiteModules>();
}

std::vector<std::string> RequiredModuleNames = MDB->getRequiredModules(File);
if (RequiredModuleNames.empty())
return std::make_unique<StandalonePrerequisiteModules>();

llvm::SmallString<256> ModuleFilesPrefix = getUniqueModuleFilesPath(File);

log("Trying to build required modules for {0} in {1}", File,
ModuleFilesPrefix);

auto RequiredModules = std::make_unique<StandalonePrerequisiteModules>();
return std::make_unique<ReusablePrerequisiteModules>();

auto RequiredModules = std::make_unique<ReusablePrerequisiteModules>();
for (llvm::StringRef RequiredModuleName : RequiredModuleNames) {
// Return early if there is any error.
if (llvm::Error Err =
buildModuleFile(RequiredModuleName, CDB, TFS, *MDB.get(),
ModuleFilesPrefix, *RequiredModules.get())) {
if (llvm::Error Err = Impl->getOrBuildModuleFile(
RequiredModuleName, TFS, *MDB.get(), *RequiredModules.get())) {
elog("Failed to build module {0}; due to {1}", RequiredModuleName,
toString(std::move(Err)));
return std::make_unique<FailedPrerequisiteModules>();
}
}

log("Built required modules for {0} in {1}", File, ModuleFilesPrefix);

return std::move(RequiredModules);
}

bool StandalonePrerequisiteModules::canReuse(
const CompilerInvocation &CI,
llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) const {
if (RequiredModules.empty())
return true;

SmallVector<StringRef> BMIPaths;
for (auto &MF : RequiredModules)
BMIPaths.push_back(MF.ModuleFilePath);
return IsModuleFilesUpToDate(BMIPaths, *this, VFS);
ModulesBuilder::ModulesBuilder(const GlobalCompilationDatabase &CDB) {
Impl = std::make_unique<ModulesBuilderImpl>(CDB);
}

ModulesBuilder::~ModulesBuilder() {}

} // namespace clangd
} // namespace clang
Loading
Loading