Skip to content

Commit a0e9d0a

Browse files
committed
HgWeb: refactoring commit_changeset() method
A more robust implementation that processes the output from HgWeb's raw-rev output in several steps 1. Skipping header section 2. Retrieve changeset metadata (Author, Date, Revision and Parent ID) 3. Get the commit message 4. Information about updated files (add/rm/mod) Previously, steps 1-3 were performed in a single loop, which was causing issues depending on contents of the commit message, e.g. when a line starts with a `#` (see #233). Define regex patterns as class constants and use static vars to improve performance, code readability and avoid duplicating patterns. Add PHPDoc block. Fixes #233
1 parent da07e60 commit a0e9d0a

File tree

1 file changed

+74
-28
lines changed

1 file changed

+74
-28
lines changed

SourceHgWeb/SourceHgWeb.php

Lines changed: 74 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,19 @@ class SourceHgWebPlugin extends MantisSourcePlugin {
2222
const PLUGIN_VERSION = '1.0.1';
2323
const FRAMEWORK_VERSION_REQUIRED = '1.3.2';
2424

25+
/**
26+
* Various PCRE patterns used to parse HgWeb output when retrieving
27+
* changeset info
28+
* @see commit_changeset()
29+
*/
30+
const PATTERN_USER = '(?<id>User) (?<user>[^<>]*)(?(?=(?=<))<(?<email>[^<>]*)>|.*)';
31+
const PATTERN_DATE = '(?<id>Date) (?<date>\d+) (?<tz>-?\d+)';
32+
const PATTERN_REVISION = '(?<id>Node ID|Parent) +(?<rev>[0-9a-f]+)';
33+
const PATTERN_DIFF = 'diff[\s]*-r[\s]([^\s]*)[\s]*-r[\s]([^\s]*)[\s]([^\n]*)';
34+
const PATTERN_BINARY_FILE = 'Binary file[\s]([^\r\n\t\f\v]*)[\s]has changed';
35+
# Don't use '/' as pattern delimiter with this one
36+
const PATTERN_PLUS_MINUS = '\-{3}[\s](/dev/null)?[^\t]*[^\n]*\n\+{3}[\s](/dev/null)?[^\t]*\t[^\n]*';
37+
2538
function register() {
2639
$this->name = plugin_lang_get( 'title' );
2740
$this->description = plugin_lang_get( 'description' );
@@ -231,41 +244,69 @@ private function import_commits( $p_repo, $p_uri_base, $p_commit_ids, $p_branch=
231244
return $t_changesets;
232245
}
233246

247+
/**
248+
* Parse changeset data and store it if it does not exist already.
249+
* This assumes a standard Mercurial template for raw changesets. Using a
250+
* customized one may break the parsing logic.
251+
* @param SourceRepo $p_repo Repository
252+
* @param string $p_input Raw changeset data
253+
* @param string $p_branch
254+
* @return array SourceChangeset object, list of parent revisions
255+
*/
234256
private function commit_changeset( $p_repo, $p_input, $p_branch='' ) {
235-
$t_parents = array();
236-
$t_message = array();
237-
238257
$t_input = explode( "\n", $p_input );
258+
$i = 0;
239259

240-
foreach( $t_input as $t_line ) {
241-
if( strpos( $t_line, '#' ) === 0 ) {
242-
if( !isset( $t_commit['revision'] ) && preg_match( '@^# Node ID +([a-f0-9]+)@', $t_line, $t_matches ) ) {
243-
$t_commit['revision'] = $t_matches[1];
244-
echo 'Processing ' . string_display_line( $t_commit['revision'] ) . '... ';
245-
if ( SourceChangeset::exists( $p_repo->id, $t_commit['revision'] ) ) {
246-
echo "already exists.\n";
247-
return array( null, array() );
248-
}
249-
} else if( !isset( $t_commit['author'] ) && preg_match( '@^# User ([^<>]*)(?(?=(?=<))<([^<>]*)>|.*)@', $t_line, $t_matches ) ) {
250-
$t_commit['author'] = trim($t_matches[1]);
251-
$t_commit['author_email'] = $t_matches[2];
252-
} else if( !isset( $t_commit['date'] ) && preg_match( '@^# Date +(\d+) (-?\d+)@', $t_line, $t_matches ) ) {
253-
$t_timestamp_gmt = $t_matches[1] - (int)$t_matches[2];
260+
# Skip changeset header
261+
while( strpos( $t_input[$i++], '# HG changeset patch' ) === false );
262+
263+
# Process changeset metadata
264+
$t_commit = array();
265+
$t_parents = array();
266+
static $s_pattern_metadata = '/^# (?:'
267+
. self::PATTERN_USER . '|'
268+
. self::PATTERN_DATE . '|'
269+
. self::PATTERN_REVISION
270+
. ')/J';
271+
while( true ) {
272+
$t_match = preg_match( $s_pattern_metadata, $t_input[$i], $t_metadata );
273+
if( $t_match == false ) {
274+
# We reached the end of metadata, next line is the commit message
275+
break;
276+
}
277+
switch( $t_metadata['id'] ) {
278+
case 'User':
279+
$t_commit['author'] = isset( $t_metadata['user'] ) ? trim( $t_metadata['user'] ) : '';
280+
$t_commit['author_email'] = isset( $t_metadata['email'] ) ? $t_metadata['email'] : '';
281+
break;
282+
case 'Date':
283+
$t_timestamp_gmt = $t_metadata['date'] - (int)$t_metadata['tz'];
254284
$t_commit['date'] = gmdate( 'Y-m-d H:i:s', $t_timestamp_gmt );
255-
} else if( !isset( $t_commit['parent'] ) && preg_match( '@^# Parent +([a-f0-9]+)@', $t_line, $t_matches ) ) {
256-
$t_parents[] = $t_matches[1];
257-
$t_commit['parent'] = $t_matches[1];
258-
}
259-
} else if( isset( $t_commit['revision'] ) ) {
260-
if ( preg_match( '@^diff @', $t_line, $t_matches ) ) {
261285
break;
262-
}
263-
$t_message[] = $t_line;
286+
case 'Node ID':
287+
$t_commit['revision'] = $t_metadata['rev'];
288+
break;
289+
case 'Parent':
290+
$t_parents[] = $t_commit['parent'] = $t_metadata['rev'];
291+
break;
264292
}
293+
$i++;
265294
}
266295

267-
if ( !SourceChangeset::exists( $p_repo->id, $t_commit['revision'] ) ) {
268-
$t_commit['message'] = implode( "\n", $t_message );
296+
if( !SourceChangeset::exists( $p_repo->id, $t_commit['revision'] ) ) {
297+
# Read commit message
298+
$t_message = '';
299+
while( $i < count( $t_input ) ) {
300+
$t_match = preg_match(
301+
'/^' . self::PATTERN_DIFF . '/',
302+
$t_input[$i]
303+
);
304+
if( $t_match ) {
305+
break;
306+
}
307+
$t_message .= $t_input[$i++] . "\n";
308+
}
309+
$t_commit['message'] = trim( $t_message );
269310

270311
$t_changeset = new SourceChangeset( $p_repo->id, $t_commit['revision'],
271312
$p_branch, $t_commit['date'], $t_commit['author'],
@@ -275,7 +316,12 @@ private function commit_changeset( $p_repo, $p_input, $p_branch='' ) {
275316

276317
$t_changeset->author_email = empty($t_commit['author_email'])? '': $t_commit['author_email'];
277318

278-
preg_match_all('#diff[\s]*-r[\s]([^\s]*)[\s]*-r[\s]([^\s]*)[\s]([^\n]*)\n(Binary file[\s]([^\r\n\t\f\v]*)[\s]has changed|\-{3}[\s](/dev/null)?[^\t]*[^\n]*\n\+{3}[\s](/dev/null)?[^\t]*\t[^\n]*)#u', $p_input, $t_matches, PREG_SET_ORDER);
319+
static $s_pattern_diff = '#'
320+
. self::PATTERN_DIFF . '\n('
321+
. self::PATTERN_BINARY_FILE . '|'
322+
. self::PATTERN_PLUS_MINUS
323+
. ')#u';
324+
preg_match_all( $s_pattern_diff, $p_input, $t_matches, PREG_SET_ORDER );
279325

280326
$t_commit['files'] = array();
281327

0 commit comments

Comments
 (0)