swiftlang · vsapsai · May 30, 2025 · Apr 29, 2025 · May 22, 2025 · benlangmuir
diff --git a/clang/include/clang-c/Dependencies.h b/clang/include/clang-c/Dependencies.h
@@ -590,6 +590,61 @@ CXCStringArray
     clang_experimental_DependencyScannerService_getInvalidNegStatCachedPaths(
         CXDependencyScannerService);
 
+/**
+ * Options used to generate a reproducer.
+ */
+typedef struct CXOpaqueDependencyScannerReproducerOptions
+    *CXDependencyScannerReproducerOptions;
+
+/**
+ * Creates a set of settings for
+ * \c clang_experimental_DependencyScanner_generateReproducer action.
+ * Must be disposed with
+ * \c clang_experimental_DependencyScannerReproducerOptions_dispose.
+ *
+ * \param argc the number of compiler invocation arguments (including argv[0]).
+ * \param argv the compiler driver invocation arguments (including argv[0]).
+ * \param ModuleName If non-NULL, reproduce building the named module and all
+ *                   the intermediate modules. Otherwise, reproduce building
+ *                   the whole translation unit.
+ * \param WorkingDirectory the directory in which the invocation runs.
+ * \param ReproducerLocation the directory where to store the reproducer files.
+ *                           If NULL, use a temporary location.
+ * \param UseUniqueReproducerName if reproducer files should have unique names
+ *                                to avoid collisions with existing files.
+ */
+CINDEX_LINKAGE CXDependencyScannerReproducerOptions
+clang_experimental_DependencyScannerReproducerOptions_create(
+    int argc, const char *const *argv, const char *ModuleName,
+    const char *WorkingDirectory, const char *ReproducerLocation,
+    bool UseUniqueReproducerName);
+
+CINDEX_LINKAGE void
+    clang_experimental_DependencyScannerReproducerOptions_dispose(
+        CXDependencyScannerReproducerOptions);
+
+/**
+ * Generates a reproducer to compile a requested file with required modules.
+ *
+ * Here the reproducer means the required input data with the commands to
+ * compile intermediate modules and a requested file. Required intermediate
+ * modules and the order of their compilation are determined by the function
+ * and don't need to be provided.
+ *
+ * \param CXOptions object created via
+ *     \c clang_experimental_DependencyScannerReproducerOptions_create.
+ * \param [out] MessageOut A pointer to store the human-readable message
+ *                         describing the result of the operation. If non-NULL,
+ *                         owned and should be disposed by the caller.
+ *
+ * \returns \c CXError_Success on success; otherwise a non-zero \c CXErrorCode
+ * indicating the kind of error. \p MessageOut is guaranteed to be populated
+ * for a success case but is allowed to be empty when encountered an error.
+ */
+CINDEX_LINKAGE enum CXErrorCode
+clang_experimental_DependencyScanner_generateReproducer(
+    CXDependencyScannerReproducerOptions CXOptions, CXString *MessageOut);
+
 /**
  * @}
  */

diff --git a/clang/test/Modules/reproducer-with-module-dependencies.c b/clang/test/Modules/reproducer-with-module-dependencies.c
@@ -0,0 +1,42 @@
+// Test generating a reproducer for a modular build where required modules are
+// built explicitly as separate steps.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+//
+// RUN: c-index-test core -gen-deps-reproducer -working-dir %t \
+// RUN:   -- clang-executable -c %t/reproducer.c -o %t/reproducer.o \
+// RUN:      -fmodules -fmodules-cache-path=%t | FileCheck %t/reproducer.c
+
+// Test a failed attempt at generating a reproducer.
+// RUN: not c-index-test core -gen-deps-reproducer -working-dir %t \
+// RUN:   -- clang-executable -c %t/failed-reproducer.c -o %t/reproducer.o \
+// RUN:      -fmodules -fmodules-cache-path=%t 2>&1 | FileCheck %t/failed-reproducer.c
+
+// Test the content of a reproducer script.
+// RUN: c-index-test core -gen-deps-reproducer -working-dir %t -o %t/repro-content \
+// RUN:   -- clang-executable -c %t/reproducer.c -o %t/reproducer.o \
+// RUN:      -fmodules -fmodules-cache-path=%t
+// RUN: FileCheck %t/script-expectations.txt --input-file %t/repro-content/reproducer.sh
+
+//--- modular-header.h
+void fn_in_modular_header(void);
+
+//--- module.modulemap
+module Test { header "modular-header.h" export * }
+
+//--- reproducer.c
+// CHECK: Sources and associated run script(s) are located at:
+#include "modular-header.h"
+
+void test(void) {
+  fn_in_modular_header();
+}
+
+//--- failed-reproducer.c
+// CHECK: fatal error: 'non-existing-header.h' file not found
+#include "non-existing-header.h"
+
+//--- script-expectations.txt
+CHECK: clang-executable
+CHECK: -fmodule-file=Test=reproducer.cache/explicitly-built-modules/Test-{{.*}}.pcm
diff --git a/clang/tools/c-index-test/core_main.cpp b/clang/tools/c-index-test/core_main.cpp
@@ -59,6 +59,7 @@ enum class ActionType {
   AggregateAsJSON,
   ScanDeps,
   ScanDepsByModuleName,
+  GenerateDepsReproducer,
   UploadCachedJob,
   MaterializeCachedJob,
   ReplayCachedJob,
@@ -87,6 +88,8 @@ Action(cl::desc("Action:"), cl::init(ActionType::None),
                      "Get file dependencies"),
           clEnumValN(ActionType::ScanDepsByModuleName, "scan-deps-by-mod-name",
                      "Get file dependencies by module name alone"),
+          clEnumValN(ActionType::GenerateDepsReproducer, "gen-deps-reproducer",
+                     "Generate a reproducer for the file"),
           clEnumValN(ActionType::UploadCachedJob, "upload-cached-job",
                      "Upload cached compilation data to upstream CAS"),
           clEnumValN(ActionType::MaterializeCachedJob, "materialize-cached-job",
@@ -923,6 +926,33 @@ static int scanDeps(ArrayRef<const char *> Args, std::string WorkingDirectory,
   return 1;
 }
 
+static int generateDepsReproducer(ArrayRef<const char *> Args,
+                                  std::string WorkingDirectory,
+                                  std::string ReproLocation) {
+  CXDependencyScannerReproducerOptions Opts =
+      clang_experimental_DependencyScannerReproducerOptions_create(
+          Args.size(), Args.data(), /*ModuleName=*/nullptr,
+          WorkingDirectory.c_str(),
+          ReproLocation.empty() ? nullptr : ReproLocation.c_str(),
+          /*UseUniqueReproducerName=*/ReproLocation.empty());
+  auto DisposeOpts = llvm::make_scope_exit([&] {
+    clang_experimental_DependencyScannerReproducerOptions_dispose(Opts);
+  });
+  CXString MessageString;
+  auto DisposeMessageString = llvm::make_scope_exit([&]() {
+    clang_disposeString(MessageString);
+  });
+  CXErrorCode ExitCode =
+      clang_experimental_DependencyScanner_generateReproducer(Opts,
+                                                              &MessageString);
+  if (ExitCode == CXError_Success) {
+    llvm::outs() << clang_getCString(MessageString) << "\n";
+  } else {
+    llvm::errs() << "error: " << clang_getCString(MessageString) << "\n";
+  }
+  return (ExitCode == CXError_Success) ? 0 : 1;
+}
+
 static int uploadCachedJob(std::string CacheKey, CXCASDatabases DBs) {
   CXError Err = nullptr;
   CXCASCachedCompilation CComp = clang_experimental_cas_getCachedCompilation(
@@ -1548,6 +1578,15 @@ int indextest_core_main(int argc, const char **argv) {
                     options::OutputDir, DBs, options::ModuleName);
   }
 
+  if (options::Action == ActionType::GenerateDepsReproducer) {
+    if (options::WorkingDir.empty()) {
+      errs() << "error: missing -working-dir\n";
+      return 1;
+    }
+    return generateDepsReproducer(CompArgs, options::WorkingDir,
+                                  options::OutputFile);
+  }
+
   if (options::Action == ActionType::UploadCachedJob) {
     if (options::InputFiles.empty()) {
       errs() << "error: missing cache key\n";

diff --git a/clang/tools/libclang/CDependencies.cpp b/clang/tools/libclang/CDependencies.cpp
@@ -640,3 +640,188 @@ std::string OutputLookup::lookupModuleOutput(const ModuleDeps &MD,
     PCMPath.first->second = ::lookupModuleOutput(MD, MOK, MLOContext, MLO);
   return PCMPath.first->second;
 }
+
+namespace {
+struct DependencyScannerReproducerOptions {
+  std::vector<std::string> BuildArgs;
+  std::optional<std::string> ModuleName;
+  std::optional<std::string> WorkingDirectory;
+  std::optional<std::string> ReproducerLocation;
+  bool UseUniqueReproducerName;
+
+  DependencyScannerReproducerOptions(int argc, const char *const *argv,
+                                     const char *ModuleName,
+                                     const char *WorkingDirectory,
+                                     const char *ReproducerLocation,
+                                     bool UseUniqueReproducerName)
+      : UseUniqueReproducerName(UseUniqueReproducerName) {
+    if (argv)
+      BuildArgs.assign(argv, argv + argc);
+    if (ModuleName)
+      this->ModuleName = ModuleName;
+    if (WorkingDirectory)
+      this->WorkingDirectory = WorkingDirectory;
+    if (ReproducerLocation)
+      this->ReproducerLocation = ReproducerLocation;
+  }
+};
+
+// Helper class to capture a returnable error code and to return a formatted
+// message in a provided CXString pointer.
+class MessageEmitter {
+  const CXErrorCode ErrorCode;
+  CXString *OutputString;
+  std::string Buffer;
+  llvm::raw_string_ostream Stream;
+
+public:
+  MessageEmitter(CXErrorCode Code, CXString *Output)
+      : ErrorCode(Code), OutputString(Output), Stream(Buffer) {}
+  ~MessageEmitter() {
+    if (OutputString)
+      *OutputString = clang::cxstring::createDup(Buffer.c_str());
+  }
+
+  operator CXErrorCode() const { return ErrorCode; }
+
+  template <typename T> MessageEmitter &operator<<(const T &t) {
+    Stream << t;
+    return *this;
+  }
+};
+} // end anonymous namespace
+
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(DependencyScannerReproducerOptions,
+                                   CXDependencyScannerReproducerOptions)
+
+CXDependencyScannerReproducerOptions
+clang_experimental_DependencyScannerReproducerOptions_create(
+    int argc, const char *const *argv, const char *ModuleName,
+    const char *WorkingDirectory, const char *ReproducerLocation,
+    bool UseUniqueReproducerName) {
+  return wrap(new DependencyScannerReproducerOptions{
+      argc, argv, ModuleName, WorkingDirectory, ReproducerLocation,
+      UseUniqueReproducerName});
+}
+
+void clang_experimental_DependencyScannerReproducerOptions_dispose(
+    CXDependencyScannerReproducerOptions Options) {
+  delete unwrap(Options);
+}
+
+enum CXErrorCode clang_experimental_DependencyScanner_generateReproducer(
+    CXDependencyScannerReproducerOptions CXOptions, CXString *MessageOut) {
+  auto Report = [MessageOut](CXErrorCode ErrorCode) -> MessageEmitter {
+    return MessageEmitter(ErrorCode, MessageOut);
+  };
+  auto ReportFailure = [&Report]() -> MessageEmitter {
+    return Report(CXError_Failure);
+  };
+
+  DependencyScannerReproducerOptions &Opts = *unwrap(CXOptions);
+  if (Opts.BuildArgs.size() < 2)
+    return Report(CXError_InvalidArguments) << "missing compilation command";
+  if (!Opts.WorkingDirectory)
+    return Report(CXError_InvalidArguments) << "missing working directory";
+  if (!Opts.UseUniqueReproducerName && !Opts.ReproducerLocation)
+    return Report(CXError_InvalidArguments)
+           << "non-unique reproducer is allowed only in a custom location";
+
+  CASOptions CASOpts;
+  IntrusiveRefCntPtr<llvm::cas::CachingOnDiskFileSystem> FS;
+  DependencyScanningService DepsService(
+      ScanningMode::DependencyDirectivesScan, ScanningOutputFormat::Full,
+      CASOpts, /*CAS=*/nullptr, /*ActionCache=*/nullptr, FS);
+  DependencyScanningTool DepsTool(DepsService);
+
+  llvm::SmallString<128> ReproScriptPath;
+  int ScriptFD;
+  if (Opts.ReproducerLocation) {
+    if (auto EC = llvm::sys::fs::create_directories(*Opts.ReproducerLocation))
+      return ReportFailure() << "failed to create a reproducer location '"
+                             << *Opts.ReproducerLocation << "'\n"
+                             << EC.message();
+    SmallString<128> Path(*Opts.ReproducerLocation);
+    llvm::sys::path::append(Path, "reproducer");
+    const char *UniqueSuffix = Opts.UseUniqueReproducerName ? "-%%%%%%" : "";
+    if (auto EC = llvm::sys::fs::createUniqueFile(Path + UniqueSuffix + ".sh",
+                                                  ScriptFD, ReproScriptPath))
+      return ReportFailure() << "failed to create a reproducer script file\n"
+                             << EC.message();
+  } else {
+    if (auto EC = llvm::sys::fs::createTemporaryFile(
+            "reproducer", "sh", ScriptFD, ReproScriptPath)) {
+      return ReportFailure() << "failed to create a reproducer script file\n"
+                             << EC.message();
+    }
+  }
+  SmallString<128> FileCachePath = ReproScriptPath;
+  llvm::sys::path::replace_extension(FileCachePath, ".cache");
+
+  std::string FileCacheName = llvm::sys::path::filename(FileCachePath).str();
+  auto LookupOutput = [&FileCacheName](const ModuleDeps &MD,
+                                       ModuleOutputKind MOK) -> std::string {
+    if (MOK != ModuleOutputKind::ModuleFile)
+      return "";
+    return FileCacheName + "/explicitly-built-modules/" +
+           MD.ID.ModuleName + "-" + MD.ID.ContextHash + ".pcm";
+  };
+
+  llvm::DenseSet<ModuleID> AlreadySeen;
+  auto TUDepsOrErr = DepsTool.getTranslationUnitDependencies(
+      Opts.BuildArgs, *Opts.WorkingDirectory, AlreadySeen,
+      std::move(LookupOutput));
+  if (!TUDepsOrErr)
+    return ReportFailure() << "failed to generate a reproducer\n"
+                           << toString(TUDepsOrErr.takeError());
+
+  TranslationUnitDeps TU = *TUDepsOrErr;
+  llvm::raw_fd_ostream ScriptOS(ScriptFD, /*shouldClose=*/true);
+  ScriptOS << "# Original command:\n#";
+  for (StringRef Arg : Opts.BuildArgs)
+    ScriptOS << ' ' << Arg;
+  ScriptOS << "\n\n";
+
+  ScriptOS << "# Dependencies:\n";
+  std::string ReproExecutable = Opts.BuildArgs.front();
+  auto PrintArguments = [&ReproExecutable,
+                         &FileCacheName](llvm::raw_fd_ostream &OS,
+                                         ArrayRef<std::string> Arguments) {
+    OS << ReproExecutable;
+    for (int I = 0, E = Arguments.size(); I < E; ++I)
+      OS << ' ' << Arguments[I];
+    OS << " -ivfsoverlay \"" << FileCacheName << "/vfs/vfs.yaml\"";
+    OS << '\n';
+  };
+  for (ModuleDeps &Dep : TU.ModuleGraph)
+    PrintArguments(ScriptOS, Dep.getBuildArguments());
+  ScriptOS << "\n# Translation unit:\n";
+  for (const Command &BuildCommand : TU.Commands)
+    PrintArguments(ScriptOS, BuildCommand.Arguments);
+
+  SmallString<128> VFSCachePath = FileCachePath;
+  llvm::sys::path::append(VFSCachePath, "vfs");
+  std::string VFSCachePathStr = VFSCachePath.str().str();
+  llvm::FileCollector FileCollector(VFSCachePathStr,
+                                    /*OverlayRoot=*/VFSCachePathStr);
+  for (const auto &FileDep : TU.FileDeps) {
+    FileCollector.addFile(FileDep);
+  }
+  for (ModuleDeps &ModuleDep : TU.ModuleGraph) {
+    ModuleDep.forEachFileDep([&FileCollector](StringRef FileDep) {
+      FileCollector.addFile(FileDep);
+    });
+  }
+  if (FileCollector.copyFiles(/*StopOnError=*/true))
+    return ReportFailure()
+           << "failed to copy the files used for the compilation";
+  SmallString<128> VFSOverlayPath = VFSCachePath;
+  llvm::sys::path::append(VFSOverlayPath, "vfs.yaml");
+  if (FileCollector.writeMapping(VFSOverlayPath))
+    return ReportFailure() << "failed to write a VFS overlay mapping";
+
+  return Report(CXError_Success)
+         << "Created a reproducer. Sources and associated run script(s) are "
+            "located at:\n  "
+         << FileCachePath << "\n  " << ReproScriptPath;
+}
diff --git a/clang/tools/libclang/libclang.map b/clang/tools/libclang/libclang.map
@@ -578,6 +578,9 @@ LLVM_21 {
     clang_experimental_DepGraphModule_isInStableDirs; 
     clang_getFullyQualifiedName;
     clang_experimental_DependencyScannerService_getInvalidNegStatCachedPaths;
+    clang_experimental_DependencyScannerReproducerOptions_create;
+    clang_experimental_DependencyScannerReproducerOptions_dispose;
+    clang_experimental_DependencyScanner_generateReproducer;
 };
 
 # Example of how to add a new symbol version entry.  If you do add a new symbol