Skip to content

feat: Add COREF for C family extractor source code #73

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ Regarding the openness of languages, you can refer to the table below:
| JavaScript | Y | Y | RELEASE |
| Go | Y | Y | RELEASE |
| XML | Y | Y | RELEASE |
| Cfamily | N | N | BETA |
| Cfamily | Y | Y | BETA |
| SQL | Y | Y | BETA |
| Swift | N | N | BETA |
| Properties | Y | Y | BETA |
Expand Down
2 changes: 1 addition & 1 deletion README_cn.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ CodeFuse-Query 包括**Sparrow CLI **和CodeFuse-Query**在线服务Query中心*
| JavaScript | Y | Y | RELEASE |
| Go | Y | Y | RELEASE |
| XML | Y | Y | RELEASE |
| Cfamily | N | N | BETA |
| Cfamily | Y | Y | BETA |
| SQL | Y | Y | BETA |
| Swift | N | N | BETA |
| Properties | Y | Y | BETA |
Expand Down
6 changes: 6 additions & 0 deletions language/cfamily/extractor/.clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# We'll use defaults from the LLVM style, but with 4 columns indentation.
BasedOnStyle: LLVM
IndentWidth: 4
## The column limit.
## A column limit of 0 means that there is no column limit. In this case, clang-format will respect the input’s line breaking decisions within statements.
ColumnLimit: 100
28 changes: 28 additions & 0 deletions language/cfamily/extractor/.clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Specify a comma-separated list of positive and negative globs: positive globs add subsets of checks, while negative globs (prefixed with "-") remove them.
# Current header guard does not follow preferred style [llvm-header-guard] so disable it
Checks: '-*,clang-diagnostic-*,llvm-*,-llvm-header-guard,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,-misc-no-recursion,readability-identifier-naming'
CheckOptions:
- key: readability-identifier-naming.ClassCase
value: CamelCase
- key: readability-identifier-naming.EnumCase
value: CamelCase
- key: readability-identifier-naming.FunctionCase
value: camelBack
- key: readability-identifier-naming.MemberCase
value: camelBack
- key: readability-identifier-naming.PrivateMemberPrefix
value: '_'
- key: readability-identifier-naming.ProtectedMemberPrefix
value: '_'
- key: readability-identifier-naming.ParameterCase
value: camelBack
- key: readability-identifier-naming.UnionCase
value: CamelCase
- key: readability-identifier-naming.VariableCase
value: camelBack
- key: readability-identifier-naming.IgnoreMainLikeFunctions
value: 1
- key: readability-redundant-member-init.IgnoreBaseInCopyConstructors
value: 1
- key: modernize-use-default-member-init.UseAssignment
value: 1
17 changes: 17 additions & 0 deletions language/cfamily/extractor/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#OS X specific files.
.DS_store

# Nested build directory
/cmake-build-*
/Tests/cmake-build-*

# VS2017 and VSCode config files.
.vscode
.vs

# CLion project configuration
/.idea

# Ignore all bazel-* symlinks. There is no full list since this can change
# based on the name of the directory bazel is cloned into.
/bazel-*
21 changes: 21 additions & 0 deletions language/cfamily/extractor/AST/ASTUtil.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@


#include "ASTUtil.hpp"

using namespace llvm;

/// Get Root-Relative path of the given file
/// \param absoluteFile
/// \param relativeFile
/// \return true if root-relative path solved, or false
bool getRootRelativePath(StringRef &absolutePath, StringRef &relativePath) {
SmallString<256> cwd;
sys::fs::current_path(cwd);
auto npos = cwd.rfind(sys::path::get_separator().data());
if (absolutePath.contains(cwd.substr(0, npos))) {
relativePath = absolutePath.substr(npos);
return true;
}
relativePath = absolutePath;
return false;
}
12 changes: 12 additions & 0 deletions language/cfamily/extractor/AST/ASTUtil.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@


#ifndef COREF_CFAMILY_SRC_EXTRACTOR_ASTUTIL_HPP
#define COREF_CFAMILY_SRC_EXTRACTOR_ASTUTIL_HPP

#include <clang/Tooling/Tooling.h>

using namespace llvm;

bool getRootRelativePath(StringRef &absolutePath, StringRef &relativePath);

#endif // COREF_CFAMILY_SRC_EXTRACTOR_ASTUTIL_HPP
120 changes: 120 additions & 0 deletions language/cfamily/extractor/AST/CorefASTConsumer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@

#ifndef COREF_CFAMILY_SRC_EXTRACTOR_COREFASTCONSUMER_HPP
#define COREF_CFAMILY_SRC_EXTRACTOR_COREFASTCONSUMER_HPP

#include "ASTUtil.hpp"
#include "CorefASTVisitor.hpp"
#include <clang/Tooling/Tooling.h>
#include <llvm/Support/Regex.h>

using namespace llvm;

namespace coref {

class CorefASTConsumer : public clang::ASTConsumer {
private:
const CorefUri _corefUri;
std::set<clang::FileID> _visitedFileIds;
std::unique_ptr<llvm::Regex> _blacklistDirFilter;

inline bool isInBlackListDir(StringRef absolutePath) {
return _blacklistDirFilter && _blacklistDirFilter->match(absolutePath);
}

protected:
/// An override HandleTranslationUnit
/// This method is called when the ASTs for entire translation unit have
/// been parsed. \param astContext
void HandleTranslationUnit(clang::ASTContext &astContext) final {
coref::StorageFacade::transaction([&]() mutable {
// insert entry for Program table, entry could be existed already.
auto programOid = CorefUri::generateCorpusOId(_corefUri.getCorpus());
coref::StorageFacade::insertClassObj(Program{programOid, _corefUri.getCorpus()});

std::unordered_map<CorefOid, File> newVisitFileMap{};
coref::CorefASTVisitor visitor(astContext, _corefUri, programOid, newVisitFileMap);

auto decls = astContext.getTranslationUnitDecl()->decls();
auto &sourceMngr = astContext.getSourceManager();
for (auto &decl : decls) {
auto curFileId = sourceMngr.getFileID(decl->getLocation());

if (_visitedFileIds.find(curFileId) != _visitedFileIds.end()) {
// skip visited files
continue;
}

// skip AST nodes having invalid source location
if (!decl->getLocation().isValid()) {
continue;
}

// todo: need to verify the accuracy of function
// "isInSystemHeader" & "isInSystemMacro"
if (sourceMngr.isInSystemHeader(decl->getLocation()) ||
sourceMngr.isInSystemMacro(decl->getLocation())) {
// skip AST nodes in system headers
_visitedFileIds.insert(curFileId);
continue;
}

StringRef absolutePath = sourceMngr.getFilename(decl->getLocation());
if (absolutePath.empty())
continue;
if (isInBlackListDir(absolutePath)) {
_visitedFileIds.insert(curFileId);
continue;
}

StringRef relativePath;
getRootRelativePath(absolutePath, relativePath);
// note: relativePath would be an absolute path when handling a
// framework file.

auto fileOid =
CorefUri::generateFileOId(_corefUri.getCorpus(), std::string(relativePath));
if (coref::StorageFacade::checkFileObjExist(programOid, fileOid)) {
// skip file that have been added in Sqlite DB
_visitedFileIds.insert(curFileId);
continue;
}

if (newVisitFileMap.find(fileOid) == newVisitFileMap.end()) {
File file{fileOid, std::string(relativePath),
std::string(sys::path::extension(relativePath)),
std::string(sys::path::filename(relativePath)), programOid};
newVisitFileMap.insert({fileOid, std::move(file)});
}
visitor.setExtractFileOid(fileOid);
visitor.TraverseDecl(decl);
}

// traverse the newFileStruct and update the File table
for (auto &[fileOid, f] : newVisitFileMap) {
coref::StorageFacade::insertClassObj(std::move(f));
}

return true;
});
}

public:
CorefASTConsumer(const CorefUri &corefUri, std::vector<std::string> &blacklistDir)
: _corefUri(corefUri) {
std::stringstream regexStr;
bool first = true;
for (auto dir : blacklistDir) {
if (first) {
regexStr << "(" << dir << ")";
first = false;
} else {
regexStr << "|(" << dir << ")";
}
}
_blacklistDirFilter = std::make_unique<llvm::Regex>(regexStr.str());
_blacklistDirFilter->isValid();
};
};
} // namespace coref

#endif // COREF_CFAMILY_SRC_EXTRACTOR_COREFASTCONSUMER_HPP
60 changes: 60 additions & 0 deletions language/cfamily/extractor/AST/CorefASTFrontendAction.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@

#ifndef COREF_CFAMILY_SRC_EXTRACTOR_COREFASTFRONTENDACTION_HPP
#define COREF_CFAMILY_SRC_EXTRACTOR_COREFASTFRONTENDACTION_HPP

#include "../Coref/CorefUri.hpp"
#include "CorefASTConsumer.hpp"
#include <clang/Tooling/Tooling.h>
#include <filesystem>

namespace coref {

class CorefASTFrontendAction : public clang::ASTFrontendAction {
private:
std::string _corpus;
std::vector<std::string>& _blacklistDir;

public:
/// Constructor of CorefASTFrontendAction Class
/// \param corpusName
explicit CorefASTFrontendAction(const std::string &corpusName, std::vector<std::string>& blacklistDir)
: clang::ASTFrontendAction(), _corpus(corpusName), _blacklistDir(blacklistDir) {}

protected:
/// An override of CreateASTConsumer
/// \param ci
/// \param input_file
/// \return
std::unique_ptr<clang::ASTConsumer> CreateASTConsumer(clang::CompilerInstance &ci,
llvm::StringRef inputFile) override {
// TODO fill in corpus info
auto absoluteInputFile = std::filesystem::absolute(inputFile.str());
auto corefUri = CorefUri(_corpus, absoluteInputFile.string());
llvm::outs() << "Consuming file: " << absoluteInputFile.string()
<< " of corpus: " << _corpus << "\n";
return std::unique_ptr<clang::ASTConsumer>(new coref::CorefASTConsumer(corefUri, _blacklistDir));
}
};

struct CorefFrontendActionFactory : public clang::tooling::FrontendActionFactory {
/// Constructor of CorefFrontendActionFactory Class
/// \param corpusName
explicit CorefFrontendActionFactory(const std::string &corpusName, std::vector<std::string>& blacklistDir)
: clang::tooling::FrontendActionFactory(), _corpusName(corpusName), _blacklistDir(blacklistDir) {}

/// a Factory method to create clang::FrontendAction
/// \return
std::unique_ptr<clang::FrontendAction> create() override {
auto *action =
dynamic_cast<clang::FrontendAction *>(new CorefASTFrontendAction(_corpusName, _blacklistDir));
return std::unique_ptr<clang::FrontendAction>(action);
}

private:
std::string _corpusName;
std::vector<std::string>& _blacklistDir;
};

} // namespace coref

#endif // COREF_CFAMILY_SRC_EXTRACTOR_COREFASTFRONTENDACTION_HPP
Loading