Skip to content

Commit

Permalink
Switch from html2text to soundasleep/html2text
Browse files Browse the repository at this point in the history
  • Loading branch information
larssandergreen committed Oct 1, 2023
1 parent a8d60e4 commit 5bfe3f8
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 101 deletions.
3 changes: 1 addition & 2 deletions CRM/Utils/String.php
Original file line number Diff line number Diff line change
Expand Up @@ -445,8 +445,7 @@ public static function strtoboolstr($str) {
*/
public static function htmlToText($html) {
$token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
$converter = new \Html2Text\Html2Text($token_html, ['do_links' => 'table', 'width' => 75]);
$token_text = $converter->getText();
$token_text = \Soundasleep\Html2Text::convert($token_html, ['ignore_errors' => TRUE]);
$text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
return $text;
}
Expand Down
5 changes: 1 addition & 4 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@
"symfony/polyfill-php80": "^1.0",
"symfony/polyfill-php81": "^1.0",
"symfony/polyfill-php82": "^1.0",
"html2text/html2text": "^4.3.1",
"soundasleep/html2text": "^2.1",
"psr/container": "~1.0 || ~2.0",
"ext-fileinfo": "*"
},
Expand Down Expand Up @@ -275,9 +275,6 @@
"Update gitignore to ensure that sites that manage via git don't miss out on the important db.json file": "https://patch-diff.githubusercontent.com/raw/adrienrn/php-mimetyper/pull/15.patch",
"Apply patch to fix php8.2 deprecation notice on dynamic property $filename": "https://patch-diff.githubusercontent.com/raw/adrienrn/php-mimetyper/pull/17.patch"
},
"html2text/html2text": {
"Fix deprecation warning in php8.1 on html_entity_decode": "https://raw.githubusercontent.com/civicrm/civicrm-core/e758d20e9f613ca6c4cf652c23d2cd7e5d3af3ce/tools/scripts/composer/html2text_html2_text_php81_deprecation.patch"
},
"pear/db": {
"Apply patch to ensure that MySQLI reporting remains the same in php8.1": "https://patch-diff.githubusercontent.com/raw/pear/DB/pull/13.patch",
"Apply patch to fix deprecations in php8.2": "https://patch-diff.githubusercontent.com/raw/pear/DB/pull/14.patch",
Expand Down
98 changes: 56 additions & 42 deletions composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,14 @@ public function getHrefExamples() {
'<p><a href=\'tracking(https://sub.example.com/foo.php?whiz=%2Fbang%2F&pie[fruit]=apple)\' rel=\'nofollow\'>Foo</a></p>',
];
$exs[] = [
// Messy looking URL, designed to trip-up quote handling
// Messy looking URL, designed to trip-up quote handling, no tracking as no http
'<p><a href="javascript:alert(\'Cheese\')">Foo</a></p>',
'<p><a href="tracking(javascript:alert(\'Cheese\'))" rel=\'nofollow\'>Foo</a></p>',
'<p><a href="javascript:alert(\'Cheese\')" rel=\'nofollow\'>Foo</a></p>',
];
$exs[] = [
// Messy looking URL, designed to trip-up quote handling
// Messy looking URL, designed to trip-up quote handling, no tracking as no http
'<p><a href=\'javascript:alert("Cheese")\'>Foo</a></p>',
'<p><a href=\'tracking(javascript:alert("Cheese"))\' rel=\'nofollow\'>Foo</a></p>',
];
$exs[] = [
// Messy looking URL, funny whitespace
'<p><a href="http://example.com/' . "\n" . 'weird">Foo</a></p>',
'<p><a href="tracking(http://example.com/' . "\n" . 'weird)" rel=\'nofollow\'>Foo</a></p>',
'<p><a href=\'javascript:alert("Cheese")\' rel=\'nofollow\'>Foo</a></p>',
];
$exs[] = [
// Messy looking URL, funny whitespace
Expand Down
30 changes: 8 additions & 22 deletions tests/phpunit/CRM/Mailing/BaseMailingSystemTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,7 @@ public function testHtmlWithOpenTracking(): void {
// Default header
"Sample Header for TEXT formatted content.\n" .
// body_html, filtered
"You can go to Google \\[1\\] or opt out \\[2\\]\\.\n" .
"\n" .
"\n" .
"Links:\n" .
"------\n" .
"\\[1\\] http://example.net/first\\?cs=[0-9a-f_]+\n" .
"\\[2\\] http.*civicrm/mailing/optout.*\n" .
"\n" .
"You can go to \\[Google\\]\\(http://example.net/first\?cs=[0-9a-f_]+\\) or \\[opt out\\]\\(http.*civicrm/mailing/optout.*\\)\\.\n" .
// Default footer
"to unsubscribe: http.*civicrm/mailing/optout" .
";",
Expand Down Expand Up @@ -217,14 +210,7 @@ public function testHtmlWithOpenAndUrlTracking(): void {
$this->assertMatchesRegularExpression(
";" .
// body_html, filtered
"You can go to Google \\[1\\] or opt out \\[2\\]\\.\n" .
"\n" .
"\n" .
"Links:\n" .
"------\n" .
"\\[1\\] http.*(extern/url.php|civicrm/mailing/url)(\?|&)u=\d+&qid=\d+\n" .
"\\[2\\] http.*civicrm/mailing/optout.*\n" .
"\n" .
"You can go to \\[Google\\]\\(http.*(extern/url.php|civicrm/mailing/url)(\?|&)u=\d+&qid=\d+\\) or \\[opt out\\]\\(http.*civicrm/mailing/optout.*\\)\\.\n" .
// Default footer
"to unsubscribe: http.*civicrm/mailing/optout" .
";",
Expand All @@ -249,20 +235,20 @@ public function urlTrackingExamples() {
$cases[0] = [
'<p><a href="http://example.net/">Foo</a></p>',
';<p><a href="http://example\.net/">Foo</a></p>;',
';\\[1\\] http://example\.net/;',
';\\(http://example\.net/\\);',
['url_tracking' => 0],
];
$cases[1] = [
'<p><a href="http://example.net/?id={contact.contact_id}">Foo</a></p>',
// FIXME: Legacy tracker adds extra quote after URL
';<p><a href="http://example\.net/\?id=\d+""?>Foo</a></p>;',
';\\[1\\] http://example\.net/\?id=\d+;',
';\\(http://example\.net/\?id=\d+\\);',
['url_tracking' => 0],
];
$cases[2] = [
'<p><a href="{action.optOutUrl}">Foo</a></p>',
';<p><a href="http.*civicrm/mailing/optout.*">Foo</a></p>;',
';\\[1\\] http.*civicrm/mailing/optout.*;',
';\\(http.*civicrm/mailing/optout.*\\);',
['url_tracking' => 0],
];
$cases[3] = [
Expand All @@ -284,21 +270,21 @@ public function urlTrackingExamples() {
$cases[5] = [
'<p><a href="http://example.net/">Foo</a></p>',
';<p><a href=[\'"].*(extern/url.php|civicrm/mailing/url)(\?|&amp\\;)u=\d+.*[\'"]>Foo</a></p>;',
';\\[1\\] .*(extern/url.php|civicrm/mailing/url)[\?&]u=\d+.*;',
';\\(.*(extern/url.php|civicrm/mailing/url)[\?&]u=\d+.*\\);',
['url_tracking' => 1],
];
$cases['url_trackin_enabled'] = [
'<p><a href="http://example.net/?id={contact.contact_id}">Foo</a></p>',
';<p><a href=[\'"].*(extern/url.php|civicrm/mailing/url)(\?|&amp\\;)u=\d+.*&amp\\;id=\d+.*[\'"]>Foo</a></p>;',
';\\[1\\] .*(extern/url.php|civicrm/mailing/url)[\?&]u=\d+.*&id=\d+.*;',
';\\(.*(extern/url.php|civicrm/mailing/url)[\?&]u=\d+.*&id=\d+.*\\);',
['url_tracking' => 1],
];

$cases[7] = [
// It would be redundant/slow to track the action URLs?
'<p><a href="{action.optOutUrl}">Foo</a></p>',
';<p><a href="http.*civicrm/mailing/optout.*">Foo</a></p>;',
';\\[1\\] http.*civicrm/mailing/optout.*;',
';\\(http.*civicrm/mailing/optout.*\\);',
['url_tracking' => 1],
];
$cases[8] = [
Expand Down
17 changes: 4 additions & 13 deletions tests/phpunit/CRM/Utils/HtmlToTextTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,27 +25,18 @@ public function htmlToTextExamples() {

$cases[] = [
"\n<p>\n" .
"This is a paragraph with <b>Bold</b> and <i>italics</i>\n" .
"This is a paragraph with <b>Bold</b> and <i>italics</i>.\n" .
"Also some <a href=\"http://www.example.com\">hrefs</a> and a\n" .
"few <mailto:\"[email protected]\">mailto</mailto> tags.\n" .
"This is also a really long long line\n" .
"\n",
"This is a paragraph with BOLD and _italics_ Also some hrefs [1] and a few\n" .
"mailto tags. This is also a really long long line\n" .
"\n" .
"Links:\n" .
"------\n" .
"[1] http://www.example.com\n" .
"",
"This is a paragraph with Bold and italics. Also some [hrefs](http://www.example.com)" .
" and a few mailto tags. This is also a really long long line",
];

$cases[] = [
"<p>\nA <a href=\"{action.do_something}\">token</a>\nis not treated as a relative URL",
"A token [1] is not treated as a relative URL\n" .
"\n" .
"Links:\n" .
"------\n" .
"[1] {action.do_something}\n",
"A [token]({action.do_something}) is not treated as a relative URL",
];

return $cases;
Expand Down
11 changes: 3 additions & 8 deletions tests/phpunit/CRM/Utils/TokenConsistencyTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -1122,7 +1122,7 @@ public function testEscaping(): void {
]);
$context['eventId'] = $this->eventCreateUnpaid([
'title' => 'The Webinar',
'description' => '<p>Some online webinar thingy.</p> <p>Attendees will need to install the <a href="http://telefoo.example.com">TeleFoo</a> app.</p>',
'description' => '<p>Some online webinar thingy.</p><p>Attendees will need to install the <a href="http://telefoo.example.com">TeleFoo</a> app.</p>',
])['id'];

$messages = $expected = [];
Expand All @@ -1138,15 +1138,10 @@ public function testEscaping(): void {
$messages['event_text'] = 'You signed up for this event: {event.title}: {event.description}';
$expected['event_text'] = 'You signed up for this event: The Webinar: Some online webinar thingy.
Attendees will need to install the TeleFoo [1] app.
Links:
------
[1] http://telefoo.example.com';
Attendees will need to install the [TeleFoo](http://telefoo.example.com) app.';

$messages['event_html'] = '<p>You signed up for this event:</p> <h3>{event.title}</h3> {event.description}';
$expected['event_html'] = '<p>You signed up for this event:</p> <h3>The Webinar</h3> <p>Some online webinar thingy.</p> <p>Attendees will need to install the <a href="http://telefoo.example.com">TeleFoo</a> app.</p>';
$expected['event_html'] = '<p>You signed up for this event:</p> <h3>The Webinar</h3> <p>Some online webinar thingy.</p><p>Attendees will need to install the <a href="http://telefoo.example.com">TeleFoo</a> app.</p>';

$rendered = CRM_Core_TokenSmarty::render($messages, $context);

Expand Down
2 changes: 1 addition & 1 deletion tests/phpunit/Civi/Token/TokenProcessorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ public function getFilterExamples(): array {
$testCases['TextMessages with HtmlData'] = [
'text/plain',
[
'This is {my_rich_text.and_such}...' => 'This is TESTING & SUCH...',
'This is {my_rich_text.and_such}...' => 'This is testing & such...',
'This is {my_rich_text.and_such|lower}...' => 'This is testing & such...',
'This is {my_rich_text.and_such|upper}!' => 'This is TESTING & SUCH!',
],
Expand Down

0 comments on commit 5bfe3f8

Please sign in to comment.