Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 201 additions & 6 deletions infra/indexer/index/sqlite.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@
namespace oss_fuzz {
namespace indexer {
namespace {

// Note: We could in principle enforce UNIQUE constraints on `reference` foreign
// key pairs, as well as those of `virtual_method_link` and
// `entity_translation_unit` (as an extreme, non-ID fields of e.g. `location`
// could also be made into a UNIQUE tuple). But those are unique by construction
// now and we hope to avoid the overhead of checking those constraints.

const char kCreateDb[] =
"PRAGMA foreign_keys = ON;\n"
"PRAGMA user_version = " SCHEMA_VERSION
Expand Down Expand Up @@ -87,6 +94,25 @@ const char kCreateDb[] =
"CREATE INDEX virtual_method_link_parent ON virtual_method_link("
" parent_entity_id);\n";

const char kCreateIncrementalIndexingSupportTables[] =
"CREATE TABLE translation_unit(\n"
" id INTEGER PRIMARY KEY,\n"
" path TEXT);\n"
"\n"
"CREATE TABLE entity_translation_unit(\n"
" id INTEGER PRIMARY KEY,\n"
" entity_id INT NOT NULL,\n"
" tu_id INT NOT NULL,\n"
" FOREIGN KEY (entity_id) REFERENCES entity(id),\n"
" FOREIGN KEY (tu_id) REFERENCES translation_unit(id));\n"
"\n"
"CREATE TABLE reference_translation_unit(\n"
" id INTEGER PRIMARY KEY,\n"
" reference_id INT NOT NULL,\n"
" tu_id INT NOT NULL,\n"
" FOREIGN KEY (reference_id) REFERENCES reference(id),\n"
" FOREIGN KEY (tu_id) REFERENCES translation_unit(id));\n";

const char kInsertLocation[] =
"INSERT INTO location\n"
" (id, dirname, basename, start_line, end_line)\n"
Expand All @@ -109,6 +135,21 @@ const char kInsertLink[] =
" (id, parent_entity_id, child_entity_id)\n"
" VALUES (?1, ?2, ?3);";

const char kInsertTranslationUnit[] =
"INSERT INTO translation_unit\n"
" (id, path)\n"
" VALUES (?1, ?2);";

const char kInsertEntityTranslationUnit[] =
"INSERT INTO entity_translation_unit\n"
" (id, entity_id, tu_id)\n"
" VALUES (?1, ?2, ?3);";

const char kInsertReferenceTranslationUnit[] =
"INSERT INTO reference_translation_unit\n"
" (id, reference_id, tu_id)\n"
" VALUES (?1, ?2, ?3);";

const char kFinalizeDb[] =
"VACUUM;\n"
"REINDEX;\n"
Expand Down Expand Up @@ -163,8 +204,8 @@ bool InsertLocations(sqlite3* db, absl::Span<const Location> locations) {

bool InsertEntities(sqlite3* db, absl::Span<const Entity> entities) {
// `substitute_entity_id` foreign key can refer to a yet-unadded entity.
if (sqlite3_exec(db, "PRAGMA foreign_keys = OFF;", nullptr,
nullptr, nullptr) != SQLITE_OK) {
if (sqlite3_exec(db, "PRAGMA foreign_keys = OFF;", nullptr, nullptr,
nullptr) != SQLITE_OK) {
LOG(ERROR) << "sqlite disabling foreign keys failed: `"
<< sqlite3_errmsg(db) << "`";
return false;
Expand Down Expand Up @@ -261,8 +302,8 @@ bool InsertEntities(sqlite3* db, absl::Span<const Entity> entities) {
std::move(cleanup).Cancel();
sqlite3_finalize(insert_entity);

if (sqlite3_exec(db, "PRAGMA foreign_keys = ON;", nullptr, nullptr, nullptr)
!= SQLITE_OK) {
if (sqlite3_exec(db, "PRAGMA foreign_keys = ON;", nullptr, nullptr,
nullptr) != SQLITE_OK) {
LOG(ERROR) << "sqlite re-enabling foreign keys failed: `"
<< sqlite3_errmsg(db) << "`";
return false;
Expand Down Expand Up @@ -320,7 +361,7 @@ bool InsertVirtualMethodLinks(sqlite3* db,
return false;
}

for (ReferenceId i = 0; i < links.size(); ++i) {
for (VirtualMethodLinkId i = 0; i < links.size(); ++i) {
const VirtualMethodLink& link = links[i];
if (sqlite3_bind_int64(insert_link, 1, i) != SQLITE_OK ||
sqlite3_bind_int64(insert_link, 2, link.parent()) != SQLITE_OK ||
Expand All @@ -332,7 +373,7 @@ bool InsertVirtualMethodLinks(sqlite3* db,
}

if (sqlite3_step(insert_link) != SQLITE_DONE) {
LOG(ERROR) << "sqlite executing insert_reference failed: `"
LOG(ERROR) << "sqlite executing insert_link failed: `"
<< sqlite3_errmsg(db) << "`";
sqlite3_finalize(insert_link);
return false;
Expand All @@ -345,6 +386,126 @@ bool InsertVirtualMethodLinks(sqlite3* db,
sqlite3_finalize(insert_link);
return true;
}

bool InsertTranslationUnits(
sqlite3* db, absl::Span<const TranslationUnit> translation_units) {
sqlite3_stmt* insert_tu = nullptr;
if (sqlite3_prepare_v2(db, kInsertTranslationUnit,
sizeof(kInsertTranslationUnit), &insert_tu,
nullptr) != SQLITE_OK) {
LOG(ERROR) << "sqlite compiling prepared statement failed: `"
<< sqlite3_errmsg(db) << "`";
return false;
}

for (TranslationUnitId i = 0; i < translation_units.size(); ++i) {
const TranslationUnit& tu = translation_units[i];
if (sqlite3_bind_int64(insert_tu, 1, i) != SQLITE_OK ||
sqlite3_bind_text(insert_tu, 2, tu.index_path().data(),
tu.index_path().size(), SQLITE_STATIC) != SQLITE_OK) {
LOG(ERROR) << "sqlite binding insert_tu failed: `" << sqlite3_errmsg(db)
<< "`";
sqlite3_finalize(insert_tu);
return false;
}

if (sqlite3_step(insert_tu) != SQLITE_DONE) {
LOG(ERROR) << "sqlite executing insert_tu failed: `" << sqlite3_errmsg(db)
<< "`";
sqlite3_finalize(insert_tu);
return false;
}

sqlite3_reset(insert_tu);
sqlite3_clear_bindings(insert_tu);
}

sqlite3_finalize(insert_tu);
return true;
}

bool InsertEntityTranslationUnits(
sqlite3* db,
absl::Span<const EntityTranslationUnit> entity_translation_units) {
sqlite3_stmt* insert_entity_tu = nullptr;
if (sqlite3_prepare_v2(db, kInsertEntityTranslationUnit,
sizeof(kInsertEntityTranslationUnit),
&insert_entity_tu, nullptr) != SQLITE_OK) {
LOG(ERROR) << "sqlite compiling prepared statement failed: `"
<< sqlite3_errmsg(db) << "`";
return false;
}

for (EntityTranslationUnitId i = 0; i < entity_translation_units.size();
++i) {
const EntityTranslationUnit& entity_tu = entity_translation_units[i];
if (sqlite3_bind_int64(insert_entity_tu, 1, i) != SQLITE_OK ||
sqlite3_bind_int64(insert_entity_tu, 2, entity_tu.entity_id()) !=
SQLITE_OK ||
sqlite3_bind_int64(insert_entity_tu, 3, entity_tu.tu_id()) !=
SQLITE_OK) {
LOG(ERROR) << "sqlite binding insert_entity_tu failed: `"
<< sqlite3_errmsg(db) << "`";
sqlite3_finalize(insert_entity_tu);
return false;
}

if (sqlite3_step(insert_entity_tu) != SQLITE_DONE) {
LOG(ERROR) << "sqlite executing insert_entity_tu failed: `"
<< sqlite3_errmsg(db) << "`";
sqlite3_finalize(insert_entity_tu);
return false;
}

sqlite3_reset(insert_entity_tu);
sqlite3_clear_bindings(insert_entity_tu);
}

sqlite3_finalize(insert_entity_tu);
return true;
}

bool InsertReferenceTranslationUnits(
sqlite3* db,
absl::Span<const ReferenceTranslationUnit> reference_translation_units) {
sqlite3_stmt* insert_reference_tu = nullptr;
if (sqlite3_prepare_v2(db, kInsertReferenceTranslationUnit,
sizeof(kInsertReferenceTranslationUnit),
&insert_reference_tu, nullptr) != SQLITE_OK) {
LOG(ERROR) << "sqlite compiling prepared statement failed: `"
<< sqlite3_errmsg(db) << "`";
return false;
}

for (ReferenceTranslationUnitId i = 0; i < reference_translation_units.size();
++i) {
const ReferenceTranslationUnit& reference_tu =
reference_translation_units[i];
if (sqlite3_bind_int64(insert_reference_tu, 1, i) != SQLITE_OK ||
sqlite3_bind_int64(insert_reference_tu, 2,
reference_tu.reference_id()) != SQLITE_OK ||
sqlite3_bind_int64(insert_reference_tu, 3, reference_tu.tu_id()) !=
SQLITE_OK) {
LOG(ERROR) << "sqlite binding insert_reference_tu failed: `"
<< sqlite3_errmsg(db) << "`";
sqlite3_finalize(insert_reference_tu);
return false;
}

if (sqlite3_step(insert_reference_tu) != SQLITE_DONE) {
LOG(ERROR) << "sqlite executing insert_reference_tu failed: `"
<< sqlite3_errmsg(db) << "`";
sqlite3_finalize(insert_reference_tu);
return false;
}

sqlite3_reset(insert_reference_tu);
sqlite3_clear_bindings(insert_reference_tu);
}

sqlite3_finalize(insert_reference_tu);
return true;
}
} // anonymous namespace

bool SaveAsSqlite(const FlatIndex& index, const std::string& path) {
Expand Down Expand Up @@ -391,6 +552,39 @@ bool SaveAsSqlite(const FlatIndex& index, const std::string& path) {
return false;
}

if (index.incremental_indexing_metadata.has_value()) {
const IncrementalIndexingMetadata& metadata =
*index.incremental_indexing_metadata;

LOG(INFO) << "creating incremental indexing support tables";
if (sqlite3_exec(db, kCreateIncrementalIndexingSupportTables, nullptr,
nullptr, &error) != SQLITE_OK) {
LOG(ERROR) << "incremental indexing support table creation failed: `"
<< error << "`";
sqlite3_close(db);
return false;
}

LOG(INFO) << "inserting translation units";
if (!InsertTranslationUnits(db, metadata.translation_units)) {
sqlite3_close(db);
return false;
}

LOG(INFO) << "inserting entity - translation unit pairs";
if (!InsertEntityTranslationUnits(db, metadata.entity_translation_units)) {
sqlite3_close(db);
return false;
}

LOG(INFO) << "inserting reference - translation unit pairs";
if (!InsertReferenceTranslationUnits(
db, metadata.reference_translation_units)) {
sqlite3_close(db);
return false;
}
}

LOG(INFO) << "finalizing database";
if (sqlite3_exec(db, kFinalizeDb, nullptr, nullptr, &error) != SQLITE_OK) {
LOG(ERROR) << "database finalization failed: `" << error << "`";
Expand Down Expand Up @@ -427,5 +621,6 @@ bool SaveAsSqlite(const FlatIndex& index, const std::string& path) {
sqlite3_close(db);
return backup_success;
}

} // namespace indexer
} // namespace oss_fuzz
82 changes: 82 additions & 0 deletions infra/indexer/index/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,13 @@ using LocationId = uint64_t;
using EntityId = uint64_t;
using ReferenceId = uint64_t;
using VirtualMethodLinkId = uint64_t;
using TranslationUnitId = uint64_t;
using EntityTranslationUnitId = uint64_t;
using ReferenceTranslationUnitId = uint64_t;
constexpr LocationId kInvalidLocationId = 0xffffffffffffffffull;
constexpr EntityId kInvalidEntityId = 0xffffffffffffffffull;
constexpr ReferenceId kInvalidReferenceId = 0xffffffffffffffffull;
constexpr TranslationUnitId kInvalidTranslationUnitId = 0xffffffffffffffffull;

inline bool IsRealPath(absl::string_view path) {
// Examples of built-in paths: `<built-in>` and `<command-line>`.
Expand Down Expand Up @@ -180,6 +185,7 @@ class Entity {
CHECK_EQ(substitute_relationship_.has_value(),
new_substitute_entity_id.has_value());
if (substitute_relationship_.has_value()) {
CHECK_NE(*new_substitute_entity_id, kInvalidEntityId);
substitute_relationship_->entity_id_ = *new_substitute_entity_id;
}
}
Expand Down Expand Up @@ -343,13 +349,89 @@ H AbslHashValue(H h, const VirtualMethodLink& link) {
return H::combine(std::move(h), link.parent(), link.child());
}

// Represents a single translation unit.
class TranslationUnit {
public:
explicit TranslationUnit(const std::string& index_path)
: index_path_(index_path) {}

const std::string& index_path() const { return index_path_; }

bool operator==(const TranslationUnit&) const = default;
std::strong_ordering operator<=>(const TranslationUnit&) const = default;

private:
std::string index_path_;
};

template <typename H>
H AbslHashValue(H h, const TranslationUnit& tu) {
return H::combine(std::move(h), tu.index_path());
}

// Links an entity to a translation unit it is encountered in (many-to-many).
class EntityTranslationUnit {
public:
EntityTranslationUnit(EntityId entity_id, TranslationUnitId tu_id)
: entity_id_(entity_id), tu_id_(tu_id) {
CHECK_NE(entity_id, kInvalidEntityId);
}

EntityId entity_id() const { return entity_id_; }
TranslationUnitId tu_id() const { return tu_id_; }

bool operator==(const EntityTranslationUnit&) const = default;
std::strong_ordering operator<=>(const EntityTranslationUnit&) const =
default;

private:
EntityId entity_id_;
TranslationUnitId tu_id_;
};

template <typename H>
H AbslHashValue(H h, const EntityTranslationUnit& etu) {
return H::combine(std::move(h), etu.entity_id(), etu.tu_id());
}

// Links a reference to a translation unit it is encountered in (many-to-many).
class ReferenceTranslationUnit {
public:
ReferenceTranslationUnit(ReferenceId reference_id, TranslationUnitId tu_id)
: reference_id_(reference_id), tu_id_(tu_id) {}

ReferenceId reference_id() const { return reference_id_; }
TranslationUnitId tu_id() const { return tu_id_; }

bool operator==(const ReferenceTranslationUnit&) const = default;
std::strong_ordering operator<=>(const ReferenceTranslationUnit&) const =
default;

private:
ReferenceId reference_id_;
TranslationUnitId tu_id_;
};

template <typename H>
H AbslHashValue(H h, const ReferenceTranslationUnit& etu) {
return H::combine(std::move(h), etu.reference_id(), etu.tu_id());
}

// A set of optional metadata for incremental indexing support.
struct IncrementalIndexingMetadata {
std::vector<TranslationUnit> translation_units;
std::vector<EntityTranslationUnit> entity_translation_units;
std::vector<ReferenceTranslationUnit> reference_translation_units;
};

// A simple holder for a sorted index, used as an interchange format/interface
// definition between uses of the index.
struct FlatIndex {
std::vector<Location> locations;
std::vector<Entity> entities;
std::vector<Reference> references;
std::vector<VirtualMethodLink> virtual_method_links;
std::optional<IncrementalIndexingMetadata> incremental_indexing_metadata;
};

namespace testing_internal {
Expand Down
Loading