Skip to content

Commit febaf43

Browse files
committed
Improve error reporting on profiler startup
1 parent 368851d commit febaf43

File tree

3 files changed

+117
-31
lines changed

3 files changed

+117
-31
lines changed

dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java

+83-15
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@
1616
import java.nio.file.Path;
1717
import java.nio.file.Paths;
1818
import java.nio.file.attribute.BasicFileAttributes;
19+
import java.nio.file.attribute.PosixFilePermission;
1920
import java.nio.file.attribute.PosixFilePermissions;
2021
import java.time.Instant;
2122
import java.time.temporal.ChronoUnit;
2223
import java.util.Set;
2324
import java.util.concurrent.CountDownLatch;
2425
import java.util.concurrent.TimeUnit;
26+
import java.util.concurrent.atomic.AtomicReference;
2527
import java.util.regex.Pattern;
2628
import java.util.stream.Stream;
2729
import org.slf4j.Logger;
@@ -219,6 +221,7 @@ boolean await(long timeout, TimeUnit unit) throws Throwable {
219221
}
220222
}
221223

224+
private final boolean isPosixFs;
222225
private final Path baseTempDir;
223226
private final Path tempDir;
224227
private final long cutoffSeconds;
@@ -262,6 +265,9 @@ private TempLocationManager() {
262265
ConfigProvider configProvider, boolean runStartupCleanup, CleanupHook testHook) {
263266
cleanupTestHook = testHook;
264267

268+
Set<String> supportedViews = FileSystems.getDefault().supportedFileAttributeViews();
269+
isPosixFs = supportedViews.contains("posix");
270+
265271
// In order to avoid racy attempts to clean up files which are currently being processed in a
266272
// JVM which is being shut down (the JVMs far in the shutdown routine may not be reported by
267273
// 'jps' but still can be eg. processing JFR chunks) we will not clean up any files not older
@@ -317,6 +323,8 @@ private TempLocationManager() {
317323
},
318324
"Temp Location Manager Cleanup");
319325
Runtime.getRuntime().addShutdownHook(selfCleanup);
326+
327+
createTempDir(tempDir);
320328
}
321329

322330
// @VisibleForTesting
@@ -362,21 +370,7 @@ public Path getTempDir(Path subPath, boolean create) {
362370
Path rslt =
363371
subPath != null && !subPath.toString().isEmpty() ? tempDir.resolve(subPath) : tempDir;
364372
if (create && !Files.exists(rslt)) {
365-
try {
366-
Set<String> supportedViews = FileSystems.getDefault().supportedFileAttributeViews();
367-
if (supportedViews.contains("posix")) {
368-
Files.createDirectories(
369-
rslt,
370-
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")));
371-
} else {
372-
// non-posix, eg. Windows - let's rely on the created folders being world-writable
373-
Files.createDirectories(rslt);
374-
}
375-
376-
} catch (Exception e) {
377-
log.warn(SEND_TELEMETRY, "Failed to create temp directory: {}", tempDir, e);
378-
throw new IllegalStateException("Failed to create temp directory: " + tempDir, e);
379-
}
373+
createTempDir(rslt);
380374
}
381375
return rslt;
382376
}
@@ -454,4 +448,78 @@ boolean waitForCleanup(long timeout, TimeUnit unit) {
454448
void createDirStructure() throws IOException {
455449
Files.createDirectories(baseTempDir);
456450
}
451+
452+
private void createTempDir(Path tempDir) {
453+
String msg = "Failed to create temp directory: " + tempDir;
454+
try {
455+
if (isPosixFs) {
456+
Files.createDirectories(
457+
tempDir,
458+
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")));
459+
} else {
460+
Files.createDirectories(tempDir);
461+
}
462+
} catch (IOException e) {
463+
log.error("Failed to create temp directory {}", tempDir, e);
464+
// if on a posix fs, let's check the expected permissions
465+
// we will find the first offender not having the expected permissions and fail the check
466+
if (isPosixFs) {
467+
Path root = baseTempDir.resolve(tempDir.relativize(baseTempDir).getRoot());
468+
try {
469+
AtomicReference<Path> failed = new AtomicReference<>();
470+
Files.walkFileTree(
471+
root,
472+
new FileVisitor<Path>() {
473+
@Override
474+
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs)
475+
throws IOException {
476+
Set<PosixFilePermission> perms = Files.getPosixFilePermissions(dir);
477+
if (!perms.contains(PosixFilePermission.OWNER_READ)
478+
|| !perms.contains(PosixFilePermission.OWNER_WRITE)
479+
|| !perms.contains(PosixFilePermission.OWNER_EXECUTE)) {
480+
failed.set(dir);
481+
return FileVisitResult.TERMINATE;
482+
}
483+
return FileVisitResult.CONTINUE;
484+
}
485+
486+
@Override
487+
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
488+
throws IOException {
489+
return FileVisitResult.SKIP_SIBLINGS;
490+
}
491+
492+
@Override
493+
public FileVisitResult visitFileFailed(Path file, IOException exc)
494+
throws IOException {
495+
return FileVisitResult.TERMINATE;
496+
}
497+
498+
@Override
499+
public FileVisitResult postVisitDirectory(Path dir, IOException exc)
500+
throws IOException {
501+
return FileVisitResult.CONTINUE;
502+
}
503+
});
504+
Path failedDir = failed.get();
505+
506+
if (failedDir != null) {
507+
msg +=
508+
" (offender: "
509+
+ failedDir
510+
+ ", permissions: "
511+
+ PosixFilePermissions.toString(Files.getPosixFilePermissions(failedDir))
512+
+ ")";
513+
log.warn(SEND_TELEMETRY, msg, e);
514+
}
515+
} catch (IOException ignored) {
516+
// should not happen, but let's ignore it anyway'
517+
}
518+
throw new IllegalStateException(msg, e);
519+
} else {
520+
log.warn(SEND_TELEMETRY, msg, e);
521+
throw new IllegalStateException(msg, e);
522+
}
523+
}
524+
}
457525
}

dd-java-agent/agent-profiling/src/main/java/com/datadog/profiling/agent/CompositeController.java

+31-14
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package com.datadog.profiling.agent;
22

3+
import static datadog.trace.api.telemetry.LogCollector.SEND_TELEMETRY;
4+
35
import com.datadog.profiling.controller.Controller;
46
import com.datadog.profiling.controller.ControllerContext;
57
import com.datadog.profiling.controller.OngoingRecording;
@@ -140,26 +142,33 @@ public static Controller build(ConfigProvider provider, ControllerContext contex
140142
List<Controller> controllers = new ArrayList<>();
141143
boolean isOracleJDK8 = Platform.isOracleJDK8();
142144
boolean isDatadogProfilerEnabled = Config.get().isDatadogProfilerEnabled();
143-
if (provider.getBoolean(ProfilingConfig.PROFILING_DEBUG_JFR_DISABLED, false)) {
144-
log.warn("JFR is disabled by configuration");
145+
boolean isJfrEnabled =
146+
!provider.getBoolean(ProfilingConfig.PROFILING_DEBUG_JFR_DISABLED, false);
147+
if (!isJfrEnabled) {
148+
log.warn(SEND_TELEMETRY, "JFR is disabled by configuration");
145149
} else {
146150
if (isOracleJDK8 && !isDatadogProfilerEnabled) {
147151
try {
148152
Class.forName("com.oracle.jrockit.jfr.Producer");
149153
controllers.add(OracleJdkController.instance(provider));
150-
} catch (Throwable ignored) {
151-
log.debug("Failed to load oracle profiler", ignored);
154+
} catch (Throwable t) {
155+
log.debug(SEND_TELEMETRY, "Failed to load oracle profiler: " + t.getMessage(), t);
152156
}
153157
}
154158
if (!isOracleJDK8) {
155159
try {
156160
if (Platform.hasJfr()) {
157161
controllers.add(OpenJdkController.instance(provider));
158162
} else {
159-
log.debug("JFR is not available on this platform");
163+
log.debug(
164+
SEND_TELEMETRY,
165+
"JFR is not available on this platform: "
166+
+ OperatingSystem.current()
167+
+ ", "
168+
+ Arch.current());
160169
}
161-
} catch (Throwable ignored) {
162-
log.debug("Failed to load openjdk profiler", ignored);
170+
} catch (Throwable t) {
171+
log.debug(SEND_TELEMETRY, "Failed to load openjdk profiler: " + t.getMessage(), t);
163172
}
164173
}
165174
}
@@ -175,16 +184,20 @@ public static Controller build(ConfigProvider provider, ControllerContext contex
175184
context.setDatadogProfilerUnavailableReason(rootCause.getMessage());
176185
OperatingSystem os = OperatingSystem.current();
177186
if (os != OperatingSystem.linux) {
178-
log.debug("Datadog profiler only supported on Linux", rootCause);
179-
} else if (log.isDebugEnabled()) {
180-
log.warn(
181-
"failed to instantiate Datadog profiler on {} {}", os, Arch.current(), rootCause);
182-
} else {
187+
log.debug(SEND_TELEMETRY, "Datadog profiler only supported on Linux", rootCause);
188+
} else if (!log.isDebugEnabled()) {
183189
log.warn(
184190
"failed to instantiate Datadog profiler on {} {} because: {}",
185191
os,
186192
Arch.current(),
187193
rootCause.getMessage());
194+
} else {
195+
log.debug(
196+
SEND_TELEMETRY,
197+
"failed to instantiate Datadog profiler on {} {}",
198+
os,
199+
Arch.current(),
200+
rootCause);
188201
}
189202
}
190203
} else {
@@ -202,14 +215,18 @@ public static Controller build(ConfigProvider provider, ControllerContext contex
202215
}
203216
controllers.forEach(controller -> controller.configure(context));
204217
if (controllers.isEmpty()) {
205-
throw new UnsupportedEnvironmentException(getFixProposalMessage());
218+
throw new UnsupportedEnvironmentException(
219+
getFixProposalMessage(isDatadogProfilerEnabled, isJfrEnabled));
206220
} else if (controllers.size() == 1) {
207221
return controllers.get(0);
208222
}
209223
return new CompositeController(controllers);
210224
}
211225

212-
private static String getFixProposalMessage() {
226+
private static String getFixProposalMessage(boolean datadogProfilerEnabled, boolean jfrEnabled) {
227+
if (!datadogProfilerEnabled && !jfrEnabled) {
228+
return "Profiling is disabled by configuration. Please, make sure that your configuration is correct.";
229+
}
213230
final String javaVendor = System.getProperty("java.vendor");
214231
final String javaVersion = System.getProperty("java.version");
215232
final String javaRuntimeName = System.getProperty("java.runtime.name");

dd-java-agent/agent-profiling/src/main/java/com/datadog/profiling/agent/ProfilingAgent.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ public static synchronized void run(
109109
return;
110110
}
111111
if (!config.isProfilingEnabled()) {
112-
log.debug("Profiling: disabled");
112+
log.debug(SEND_TELEMETRY, "Profiling: disabled");
113113
return;
114114
}
115115
if (config.getApiKey() != null && !API_KEY_REGEX.test(config.getApiKey())) {
@@ -167,7 +167,8 @@ public static synchronized void run(
167167
}
168168
} catch (final UnsupportedEnvironmentException e) {
169169
log.warn(e.getMessage());
170-
log.debug(SEND_TELEMETRY, "Unsupported environment for Datadog profiler", e);
170+
// no need to send telemetry for this aggregate message
171+
// a detailed telemetry message has been sent from the attempts to enable the controllers
171172
} catch (final ConfigurationException e) {
172173
log.warn("Failed to initialize profiling agent! {}", e.getMessage());
173174
log.debug(SEND_TELEMETRY, "Failed to initialize profiling agent!", e);

0 commit comments

Comments
 (0)