Skip to content

[Index] Add option to compress unit and record files #10977

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: next
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,11 @@ def index_ignore_pcms : Flag<["-"], "index-ignore-pcms">,
Visibility<[ClangOption, CC1Option]>,
HelpText<"Ignore symbols from imported pcm modules">,
MarshallingInfoFlag<FrontendOpts<"IndexIgnorePcms">>;
def index_store_record_compression
: Flag<["-"], "index-store-compress">,
Visibility<[ClangOption, CC1Option]>,
HelpText<"Whether to compress unit and record files in the index store">,
MarshallingInfoFlag<FrontendOpts<"IndexStoreCompress">>;

// Make sure all other -ccc- options are rejected.
def ccc_ : Joined<["-"], "ccc-">, Group<internal_Group>, Flags<[Unsupported]>;
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Frontend/FrontendOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,9 @@ class FrontendOptions {
std::string IndexStorePath;
std::string IndexUnitOutputPath;

/// Whether to compress the unit and record files in the index store.
bool IndexStoreCompress = false;

/// The input kind, either specified via -x argument or deduced from the input
/// file name.
InputKind DashX;
Expand Down
4 changes: 3 additions & 1 deletion clang/include/clang/Index/IndexRecordWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,12 @@ typedef llvm::function_ref<Symbol(OpaqueDecl, SmallVectorImpl<char> &Scratch)>
/// beginRecord, and if the file does not already exist, then proceed to add
/// all symbol occurrences (addOccurrence) and finally finish with endRecord.
class IndexRecordWriter {
/// Whether to compress the index record using zlib.
bool Compress;
SmallString<64> RecordsPath; ///< The records directory path.
void *Record = nullptr; ///< The state of the current record.
public:
IndexRecordWriter(StringRef IndexPath);
IndexRecordWriter(StringRef IndexPath, bool Compress);

enum class Result {
Success,
Expand Down
17 changes: 7 additions & 10 deletions clang/include/clang/Index/IndexUnitWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class IndexUnitWriter {
SmallString<64> UnitsPath;
std::string ProviderIdentifier;
std::string ProviderVersion;
/// Whether to compress the index unit using zlib.
bool Compress;
std::string OutputFile;
std::string ModuleName;
OptionalFileEntryRef MainFile;
Expand Down Expand Up @@ -92,17 +94,12 @@ class IndexUnitWriter {
/// \param IsSystem true for system module units, false otherwise.
/// \param Remapper Remapper to use to standardize file paths to make them
/// hermetic/reproducible. This applies to all paths emitted in the unit file.
IndexUnitWriter(FileManager &FileMgr,
StringRef StorePath,
IndexUnitWriter(FileManager &FileMgr, StringRef StorePath,
StringRef ProviderIdentifier, StringRef ProviderVersion,
StringRef OutputFile,
StringRef ModuleName,
OptionalFileEntryRef MainFile,
bool IsSystem,
bool IsModuleUnit,
bool IsDebugCompilation,
StringRef TargetTriple,
StringRef SysrootPath,
bool Compress, StringRef OutputFile, StringRef ModuleName,
OptionalFileEntryRef MainFile, bool IsSystem,
bool IsModuleUnit, bool IsDebugCompilation,
StringRef TargetTriple, StringRef SysrootPath,
const PathRemapper &Remapper,
writer::ModuleInfoWriterCallback GetInfoForModule);
~IndexUnitWriter();
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Index/ClangIndexRecordWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ StringRef ClangIndexRecordWriter::getUSRNonCached(const IdentifierInfo *Name,
return StringRef(Ptr, USR.size());
}

ClangIndexRecordWriter::ClangIndexRecordWriter(ASTContext &Ctx,
ClangIndexRecordWriter::ClangIndexRecordWriter(ASTContext &Ctx, bool Compress,
RecordingOptions Opts)
: Impl(Opts.DataDirPath), Ctx(Ctx), RecordOpts(std::move(Opts)) {
: Impl(Opts.DataDirPath, Compress), Ctx(Ctx), RecordOpts(std::move(Opts)) {
if (Opts.RecordSymbolCodeGenName)
ASTNameGen.reset(new ASTNameGenerator(Ctx));
}
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Index/ClangIndexRecordWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class ClangIndexRecordWriter {
llvm::DenseMap<const void *, StringRef> USRByDecl;

public:
ClangIndexRecordWriter(ASTContext &Ctx, RecordingOptions Opts);
ClangIndexRecordWriter(ASTContext &Ctx, bool Compress, RecordingOptions Opts);
~ClangIndexRecordWriter();

ASTContext &getASTContext() { return Ctx; }
Expand Down
40 changes: 38 additions & 2 deletions clang/lib/Index/IndexRecordReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
//===----------------------------------------------------------------------===//

#include "clang/Index/IndexRecordReader.h"
#include "IndexDataStoreUtils.h"
#include "BitstreamVisitor.h"
#include "IndexDataStoreUtils.h"
#include "clang/Index/IndexDataStoreSymbolUtils.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
Expand Down Expand Up @@ -368,7 +369,42 @@ IndexRecordReader::createWithBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer,
std::unique_ptr<IndexRecordReader> Reader;
Reader.reset(new IndexRecordReader());
auto &Impl = Reader->Impl;
Impl.Buffer = std::move(Buffer);
if (Buffer->getBuffer().starts_with("CIDXR")) {
if (!llvm::compression::zlib::isAvailable()) {
Error = "zlib not available to decompress compressed index record";
return nullptr;
}

ArrayRef compressedBuffer =
llvm::arrayRefFromStringRef(Buffer->getBuffer());

// Slice off the `CIDXR` marker we checked above.
compressedBuffer = compressedBuffer.slice(5);

// Read the uncompressed size of the record.
if (compressedBuffer.size() < 4) {
Error = "Unexpectedly found end of record file";
return nullptr;
}
size_t uncompressedSize =
llvm::support::endian::read32le(compressedBuffer.data());
compressedBuffer = compressedBuffer.slice(4);

// Decompress the record
llvm::SmallVector<uint8_t, 0> decompressed;
llvm::Error decompressError = llvm::compression::zlib::decompress(
compressedBuffer, decompressed, uncompressedSize);
if (decompressError) {
llvm::raw_string_ostream ErrorOS(Error);
ErrorOS << "Failed to decompress index record: " << decompressError;
return nullptr;
}
Impl.Buffer = llvm::MemoryBuffer::getMemBufferCopy(
llvm::toStringRef(decompressed),
Buffer->getBufferIdentifier() + " decompressed");
} else {
Impl.Buffer = std::move(Buffer);
}
llvm::BitstreamCursor Stream(*Impl.Buffer);

if (Stream.AtEndOfStream()) {
Expand Down
44 changes: 41 additions & 3 deletions clang/lib/Index/IndexRecordWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Bitstream/BitstreamWriter.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
Expand Down Expand Up @@ -234,8 +235,8 @@ static void writeDecls(BitstreamWriter &Stream, ArrayRef<DeclInfo> Decls,
Stream.ExitBlock();
}

IndexRecordWriter::IndexRecordWriter(StringRef IndexPath)
: RecordsPath(IndexPath) {
IndexRecordWriter::IndexRecordWriter(StringRef IndexPath, bool Compress)
: Compress(Compress), RecordsPath(IndexPath) {
store::appendRecordSubDir(RecordsPath);
}

Expand Down Expand Up @@ -319,7 +320,44 @@ IndexRecordWriter::endRecord(std::string &Error,
}

raw_fd_ostream OS(TempFD, /*shouldClose=*/true);
OS.write(State.Buffer.data(), State.Buffer.size());
if (Compress) {
if (!llvm::compression::zlib::isAvailable()) {
Error = "Zlib not available to compress record file";
return Result::Failure;
}

// Higher compression levels add marginal improvements to the compressed
// size while having a a measurable impact on compile time. An analysis on a
// mixed clang / Swift project showed the following results:
// - BestSpeed: Compresses the index store by 66% while increasing the
// index-while-building overhead by 15% (from 1.07% to 1.23%)
// - Default: Compression of 68.1%, increases index-while-building overhead
// by 23%
// - BestSize: Compression of 68.2%, increases index-while-building
// overhead by 37%
// Based on those numbers, BestSpeed seems like the best choice. If clients
// need to compress the index store further, they should run a compression
// algorithm across all files in the index store.
auto compressionLevel = compression::zlib::BestSpeedCompression;
ArrayRef<uint8_t> bufferRef = llvm::arrayRefFromStringRef(State.Buffer);
llvm::SmallVector<uint8_t, 0> compressed;
llvm::compression::zlib::compress(bufferRef, compressed, compressionLevel);

// Write the `CIDXR` (compressed index record) marker to indicate that this
// is a compressed record file.
OS << "CIDXR";

// Write the size of the uncompressed record so that we can allocate a
// buffer of the corresponding size when decompressing it.
char Buf[4];
llvm::support::endian::write32le(Buf, bufferRef.size());
OS.write(Buf, sizeof(Buf));

// Write the acutal compressed data
OS << llvm::toStringRef(compressed);
} else {
OS << State.Buffer;
}
OS.close();

if (OS.has_error()) {
Expand Down
37 changes: 36 additions & 1 deletion clang/lib/Index/IndexUnitReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,42 @@ bool IndexUnitReaderImpl::init(std::unique_ptr<MemoryBuffer> Buf,
sys::TimePoint<> ModTime,
std::string &Error) {
this->ModTime = ModTime;
this->MemBuf = std::move(Buf);

if (Buf->getBuffer().starts_with("CIDXU")) {
if (!llvm::compression::zlib::isAvailable()) {
Error = "zlib not available to decompress compressed index unit";
return true;
}

ArrayRef compressedBuffer = llvm::arrayRefFromStringRef(Buf->getBuffer());

// Slice off the `CIDXU` marker we checked above.
compressedBuffer = compressedBuffer.slice(5);

// Read the uncompressed size of the unit.
if (compressedBuffer.size() < 4) {
Error = "Unexpectedly found end of record unit";
return true;
}
size_t uncompressedSize =
llvm::support::endian::read32le(compressedBuffer.data());
compressedBuffer = compressedBuffer.slice(4);

// Decompress the unit
llvm::SmallVector<uint8_t, 0> decompressed;
llvm::Error decompressError = llvm::compression::zlib::decompress(
compressedBuffer, decompressed, uncompressedSize);
if (decompressError) {
llvm::raw_string_ostream ErrorOS(Error);
ErrorOS << "Failed to decompress index unit: " << decompressError;
return true;
}
this->MemBuf = llvm::MemoryBuffer::getMemBufferCopy(
llvm::toStringRef(decompressed),
Buf->getBufferIdentifier() + " decompressed");
} else {
this->MemBuf = std::move(Buf);
}
llvm::BitstreamCursor Stream(*MemBuf);

if (Stream.AtEndOfStream()) {
Expand Down
53 changes: 37 additions & 16 deletions clang/lib/Index/IndexUnitWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/Bitstream/BitstreamWriter.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
Expand Down Expand Up @@ -116,21 +117,14 @@ class IndexUnitWriter::PathStorage {
}
};

IndexUnitWriter::IndexUnitWriter(FileManager &FileMgr,
StringRef StorePath,
StringRef ProviderIdentifier,
StringRef ProviderVersion,
StringRef OutputFile,
StringRef ModuleName,
OptionalFileEntryRef MainFile,
bool IsSystem,
bool IsModuleUnit,
bool IsDebugCompilation,
StringRef TargetTriple,
StringRef SysrootPath,
const PathRemapper &Remapper,
writer::ModuleInfoWriterCallback GetInfoForModule)
: FileMgr(FileMgr), Remapper(Remapper) {
IndexUnitWriter::IndexUnitWriter(
FileManager &FileMgr, StringRef StorePath, StringRef ProviderIdentifier,
StringRef ProviderVersion, bool Compress, StringRef OutputFile,
StringRef ModuleName, OptionalFileEntryRef MainFile, bool IsSystem,
bool IsModuleUnit, bool IsDebugCompilation, StringRef TargetTriple,
StringRef SysrootPath, const PathRemapper &Remapper,
writer::ModuleInfoWriterCallback GetInfoForModule)
: FileMgr(FileMgr), Compress(Compress), Remapper(Remapper) {
this->UnitsPath = StorePath;
store::appendUnitSubDir(this->UnitsPath);
this->ProviderIdentifier = std::string(ProviderIdentifier);
Expand Down Expand Up @@ -393,7 +387,34 @@ bool IndexUnitWriter::write(std::string &Error) {
}

raw_fd_ostream OS(TempFD, /*shouldClose=*/true);
OS.write(Buffer.data(), Buffer.size());
if (Compress) {
if (!llvm::compression::zlib::isAvailable()) {
Error = "Zlib not available to compress record file";
return true;
}

// See comment in `IndexRecordWriter::endRecord` for a rational why we use
// `BestSpeed`.
auto compressionLevel = compression::zlib::BestSpeedCompression;
ArrayRef<uint8_t> bufferRef = llvm::arrayRefFromStringRef(Buffer);
llvm::SmallVector<uint8_t, 0> compressed;
llvm::compression::zlib::compress(bufferRef, compressed, compressionLevel);

// Write the `CIDXU` (compressed index unit) marker to indicate that this
// is a compressed unit file.
OS << "CIDXU";

// Write the size of the uncompressed unit so that we can allocate a
// buffer of the corresponding size when decompressing it.
char Buf[4];
llvm::support::endian::write32le(Buf, bufferRef.size());
OS.write(Buf, sizeof(Buf));

// Write the acutal compressed data
OS << llvm::toStringRef(compressed);
} else {
OS << Buffer;
}
OS.close();

if (OS.has_error()) {
Expand Down
10 changes: 6 additions & 4 deletions clang/lib/Index/IndexingAction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -838,9 +838,10 @@ static void writeUnitData(const CompilerInstance &CI,
Remapper.addMapping(It->first, It->second);

IndexUnitWriter UnitWriter(
CI.getFileManager(), DataPath, "clang", getClangVersion(), OutputFile,
ModuleName, RootFile, IsSystemUnit, IsModuleUnit, IsDebugCompilation,
CI.getTargetOpts().Triple, SysrootPath, Remapper, getModuleInfo);
CI.getFileManager(), DataPath, "clang", getClangVersion(),
CI.getFrontendOpts().IndexStoreCompress, OutputFile, ModuleName, RootFile,
IsSystemUnit, IsModuleUnit, IsDebugCompilation, CI.getTargetOpts().Triple,
SysrootPath, Remapper, getModuleInfo);

DepProvider.visitFileDependencies(
CI, [&](FileEntryRef FE, bool isSystemFile) {
Expand All @@ -863,7 +864,8 @@ static void writeUnitData(const CompilerInstance &CI,
}
});

ClangIndexRecordWriter RecordWriter(CI.getASTContext(), RecordOpts);
ClangIndexRecordWriter RecordWriter(
CI.getASTContext(), CI.getFrontendOpts().IndexStoreCompress, RecordOpts);
for (auto I = Recorder.record_begin(), E = Recorder.record_end(); I != E;
++I) {
FileID FID = I->first;
Expand Down
8 changes: 8 additions & 0 deletions clang/test/Index/Store/compress-index-store.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// RUN: rm -rf %t.idx
// RUN: %clang_cc1 %s -index-store-path %t.idx -index-store-compress
// RUN: c-index-test core -print-unit %t.idx | FileCheck --check-prefix=UNIT %s
// RUN: c-index-test core -print-record %t.idx | FileCheck --check-prefix=RECORD %s

// UNIT: main-path: {{.*}}/compress-index-store.c
// RECORD: [[@LINE+1]]:6 | function/C | c:@F@foo | Decl | rel: 0
void foo(int *p);
3 changes: 2 additions & 1 deletion clang/tools/c-index-test/JSONAggregation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,8 @@ std::unique_ptr<RecordInfo> Aggregator::processRecord(StringRef recordFile) {
std::string error;
auto recordReader = IndexRecordReader(Store, recordFile, error);
if (!recordReader) {
errs() << "failed reading record file: " << recordFile << '\n';
errs() << "failed reading record file: " << recordFile << ": " << error
<< '\n';
::exit(1);
}
auto record = std::make_unique<RecordInfo>();
Expand Down