FriendsOfREDAXO · Copilot · Oct 15, 2025 · Oct 15, 2025 · Oct 15, 2025 · Nov 19, 2025
diff --git a/lib/EmailObfuscator.php b/lib/EmailObfuscator.php
@@ -28,7 +28,7 @@ public static function obfuscate($content) {
 
 			// Ersetze E-Mailadressen
 			if (!$emailobfuscator->getConfig('mailto_only')) {
-				$content = preg_replace_callback('/(?<![\/\w])([\w\-\+\.]+)@([\w\-\.]+\.[\w]{2,})(?![\w\/])/', 'emailobfuscator::encodeEmailUnicorn', $content);
+				$content = self::obfuscateEmailsNotInAttributes($content);
 			}
 
 			// Injiziere CSS vors schließende </head> im Seitenkopf
@@ -150,6 +150,70 @@ private static function encodeEmailLinksUnicorn($matches) {
         return 'javascript:decryptUnicorn(' . $mail . ')';
     }
 
+	/**
+	 * Obfuscate emails but skip those within HTML attribute values.
+	 *
+	 * This method uses a heuristic approach (e.g., quote counting) to determine whether an email address
+	 * is inside an HTML attribute value. As such, it has several limitations:
+	 * - It may not handle escaped quotes within attribute values (e.g., `\"`).
+	 * - It may not correctly handle mixed quote types (single and double quotes) within attributes.
+	 * - The algorithm assumes well-formed HTML and may not work correctly on malformed HTML.
+	 * - It is not a full HTML parser and may fail on complex or edge-case HTML constructs.
+	 *
+	 * @param string $content Content to process
+	 * @return string Processed content
+	 */
+	private static function obfuscateEmailsNotInAttributes($content) {
+		$pattern = '/(?<![\/\w])([\w\-\+\.]+)@([\w\-\.]+\.[\w]{2,})(?![\w\/])/';
-		$pattern = '/(?<![\/\w])([\w\-\+\.]+)@([\w\-\.]+\.[\w]{2,})(?![\w\/])/';
+		// Avoid matching emails that are actually filenames like [email protected]
+		// Add negative lookahead for common file extensions after the email
+		$pattern = '/(?<![\/\w])([\w\-\+\.]+)@([\w\-\.]+\.[\w]{2,})(?![\w\/])(?!(\.(png|jpg|jpeg|gif|svg|webp|pdf|docx?|xlsx?|pptx?|zip|rar|tar|gz|mp3|mp4|avi|mov|wmv|flv|mkv|ico|bmp|tiff|psd|ai|eps|csv|json|xml|yml|yaml|txt|log|md|html|htm|php|js|css|scss|less|c|cpp|h|hpp|py|rb|go|rs|sh|bat|exe|dll|bin|dat|bak|tmp)))/i';
-		$pattern = '/(?<![\/\w])([\w\-\+\.]+)@([\w\-\.]+\.[\w]{2,})(?![\w\/])/';
+		// Avoid matching emails that are actually filenames like [email protected]
+		// Add negative lookahead for common file extensions after the email
+		$pattern = '/(?<![\/\w])([\w\-\+\.]+)@([\w\-\.]+\.[\w]{2,})(?![\w\/])(?!(\.(png|jpg|jpeg|gif|svg|webp|pdf|docx?|xlsx?|pptx?|zip|rar|tar|gz|mp3|mp4|avi|mov|wmv|flv|mkv|ico|bmp|tiff|psd|ai|eps|csv|json|xml|yml|yaml|txt|log|md|html|htm|php|js|css|scss|less|c|cpp|h|hpp|py|rb|go|rs|sh|bat|exe|dll|bin|dat|bak|tmp)))/i';
+
+		$offset = 0;
+		$result = $content;
+        // Precompute all HTML tag ranges in the content
+        if (!isset($tagRanges)) {
+            $tagRanges = [];
+            if (preg_match_all('/<[^>]*>/', $result, $tagMatches, PREG_OFFSET_CAPTURE)) {
+                foreach ($tagMatches[0] as $tagMatch) {
+                    $tagStart = $tagMatch[1];
+                    $tagEnd = $tagStart + strlen($tagMatch[0]);
+                    $tagRanges[] = [$tagStart, $tagEnd];
+                }
+            }
+        }
+
+        while (preg_match($pattern, $result, $matches, PREG_OFFSET_CAPTURE, $offset)) {
+            $email = $matches[0][0];
+            $pos = $matches[0][1];
+
+            // Check if the email is inside any HTML tag
+            $shouldObfuscate = true;
+            foreach ($tagRanges as $range) {
+                if ($pos >= $range[0] && $pos < $range[1]) {
+                    $shouldObfuscate = false;
+                    break;
+                }
+            }
+
+            if ($shouldObfuscate) {
+                // Check whitelist
+                if ((isset($_SERVER['REQUEST_METHOD']) && $_SERVER['REQUEST_METHOD'] == 'POST' && isset($_POST) && self::in_array_r($email, $_POST)) || self::in_array_r($email, self::$whitelist)) {
+                    $shouldObfuscate = false;
+                }
+            }
+
+			if ($shouldObfuscate) {
+				// Obfuscate the email
+				$replacement = $matches[1][0] . '<span class="unicorn"><span>_at_</span></span>' . $matches[2][0];
+				$result = substr_replace($result, $replacement, $pos, strlen($email));
+				$offset = $pos + strlen($replacement);
+			} else {
+				// Skip this match
+				$offset = $pos + strlen($email);
+			}
+		}
+
+		return $result;
+	}
+
  	/**
 	 * Encode E-Mail address
 	 * @param string[] $matches 
@@ -179,8 +243,58 @@ private static function in_array_r($needle, $haystack, $strict = false) {
 	 */
 	private static function makeEmailClickable($ret) {
 		$ret = ' ' . $ret;
-		// in testing, using arrays here was found to be faster
-		$ret = preg_replace_callback('#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', 'emailobfuscator::make_email_clickable_callback', $ret);
+
+		// Precompute all HTML tag ranges in the content
+		$tagRanges = [];
+		if (preg_match_all('/<[^>]*>/', $ret, $tagMatches, PREG_OFFSET_CAPTURE)) {
+			foreach ($tagMatches[0] as $tagMatch) {
+				$tagStart = $tagMatch[1];
+				$tagEnd = $tagStart + strlen($tagMatch[0]);
+				$tagRanges[] = [$tagStart, $tagEnd];
+			}
+		}
+
+		// Process emails but skip those in HTML attributes
+		$pattern = '#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i';
+		$offset = 0;
+
+		while (preg_match($pattern, $ret, $matches, PREG_OFFSET_CAPTURE, $offset)) {
+			$fullMatch = $matches[0][0];
+			$pos = $matches[0][1];
+			$email = $matches[2][0] . '@' . $matches[3][0];
+
+			// Skip retina image patterns like @2x.png, @3x.jpg, etc.
+			if (preg_match('/^[^@]+@\d+x\./i', $email)) {
+				$offset = $pos + strlen($fullMatch);
+				continue;
+			}
+
+			// Check if the email is inside any HTML tag
+			$shouldMakeClickable = true;
+			foreach ($tagRanges as $range) {
+				if ($pos >= $range[0] && $pos < $range[1]) {
+					$shouldMakeClickable = false;
+					break;
+				}
+			}
+
+			if ($shouldMakeClickable) {
+				// Check whitelist for consistency with obfuscateEmailsNotInAttributes
+				if (self::in_array_r($email, self::$whitelist)) {
+					$shouldMakeClickable = false;
+				}
+			}
+
+			if ($shouldMakeClickable) {
+				// Make clickable
+				$replacement = $matches[1][0] . "<a href=\"mailto:$email\">$email</a>";
+				$ret = substr_replace($ret, $replacement, $pos, strlen($fullMatch));
+				$offset = $pos + strlen($replacement);
+			} else {
+				// Skip this match
+				$offset = $pos + strlen($fullMatch);
+			}
+		}
 
 		// this one is not in an array because we need it to run last, for cleanup of accidental links within links
 		$ret = preg_replace("#(<a( [^>]+?>|>))<a [^>]+?>([^>]+?)</a></a>#i", "$1$3</a>", $ret);