@@ -22,6 +22,19 @@ class SourceHgWebPlugin extends MantisSourcePlugin {
22
22
const PLUGIN_VERSION = '1.0.1 ' ;
23
23
const FRAMEWORK_VERSION_REQUIRED = '1.3.2 ' ;
24
24
25
+ /**
26
+ * Various PCRE patterns used to parse HgWeb output when retrieving
27
+ * changeset info
28
+ * @see commit_changeset()
29
+ */
30
+ const PATTERN_USER = '(?<id>User) (?<user>[^<>]*)(?(?=(?=<))<(?<email>[^<>]*)>|.*) ' ;
31
+ const PATTERN_DATE = '(?<id>Date) (?<date>\d+) (?<tz>-?\d+) ' ;
32
+ const PATTERN_REVISION = '(?<id>Node ID|Parent) +(?<rev>[0-9a-f]+) ' ;
33
+ const PATTERN_DIFF = 'diff[\s]*-r[\s]([^\s]*)[\s]*-r[\s]([^\s]*)[\s]([^\n]*) ' ;
34
+ const PATTERN_BINARY_FILE = 'Binary file[\s]([^\r\n\t\f\v]*)[\s]has changed ' ;
35
+ # Don't use '/' as pattern delimiter with this one
36
+ const PATTERN_PLUS_MINUS = '\-{3}[\s](/dev/null)?[^\t]*[^\n]*\n\+{3}[\s](/dev/null)?[^\t]*\t[^\n]* ' ;
37
+
25
38
function register () {
26
39
$ this ->name = plugin_lang_get ( 'title ' );
27
40
$ this ->description = plugin_lang_get ( 'description ' );
@@ -231,41 +244,69 @@ private function import_commits( $p_repo, $p_uri_base, $p_commit_ids, $p_branch=
231
244
return $ t_changesets ;
232
245
}
233
246
247
+ /**
248
+ * Parse changeset data and store it if it does not exist already.
249
+ * This assumes a standard Mercurial template for raw changesets. Using a
250
+ * customized one may break the parsing logic.
251
+ * @param SourceRepo $p_repo Repository
252
+ * @param string $p_input Raw changeset data
253
+ * @param string $p_branch
254
+ * @return array SourceChangeset object, list of parent revisions
255
+ */
234
256
private function commit_changeset ( $ p_repo , $ p_input , $ p_branch ='' ) {
235
- $ t_parents = array ();
236
- $ t_message = array ();
237
-
238
257
$ t_input = explode ( "\n" , $ p_input );
258
+ $ i = 0 ;
239
259
240
- foreach ( $ t_input as $ t_line ) {
241
- if ( strpos ( $ t_line , '# ' ) === 0 ) {
242
- if ( !isset ( $ t_commit ['revision ' ] ) && preg_match ( '@^# Node ID +([a-f0-9]+)@ ' , $ t_line , $ t_matches ) ) {
243
- $ t_commit ['revision ' ] = $ t_matches [1 ];
244
- echo 'Processing ' . string_display_line ( $ t_commit ['revision ' ] ) . '... ' ;
245
- if ( SourceChangeset::exists ( $ p_repo ->id , $ t_commit ['revision ' ] ) ) {
246
- echo "already exists. \n" ;
247
- return array ( null , array () );
248
- }
249
- } else if ( !isset ( $ t_commit ['author ' ] ) && preg_match ( '@^# User ([^<>]*)(?(?=(?=<))<([^<>]*)>|.*)@ ' , $ t_line , $ t_matches ) ) {
250
- $ t_commit ['author ' ] = trim ($ t_matches [1 ]);
251
- $ t_commit ['author_email ' ] = $ t_matches [2 ];
252
- } else if ( !isset ( $ t_commit ['date ' ] ) && preg_match ( '@^# Date +(\d+) (-?\d+)@ ' , $ t_line , $ t_matches ) ) {
253
- $ t_timestamp_gmt = $ t_matches [1 ] - (int )$ t_matches [2 ];
260
+ # Skip changeset header
261
+ while ( strpos ( $ t_input [$ i ++], '# HG changeset patch ' ) === false );
262
+
263
+ # Process changeset metadata
264
+ $ t_commit = array ();
265
+ $ t_parents = array ();
266
+ static $ s_pattern_metadata = '/^# (?: '
267
+ . self ::PATTERN_USER . '| '
268
+ . self ::PATTERN_DATE . '| '
269
+ . self ::PATTERN_REVISION
270
+ . ')/J ' ;
271
+ while ( true ) {
272
+ $ t_match = preg_match ( $ s_pattern_metadata , $ t_input [$ i ], $ t_metadata );
273
+ if ( $ t_match == false ) {
274
+ # We reached the end of metadata, next line is the commit message
275
+ break ;
276
+ }
277
+ switch ( $ t_metadata ['id ' ] ) {
278
+ case 'User ' :
279
+ $ t_commit ['author ' ] = isset ( $ t_metadata ['user ' ] ) ? trim ( $ t_metadata ['user ' ] ) : '' ;
280
+ $ t_commit ['author_email ' ] = isset ( $ t_metadata ['email ' ] ) ? $ t_metadata ['email ' ] : '' ;
281
+ break ;
282
+ case 'Date ' :
283
+ $ t_timestamp_gmt = $ t_metadata ['date ' ] - (int )$ t_metadata ['tz ' ];
254
284
$ t_commit ['date ' ] = gmdate ( 'Y-m-d H:i:s ' , $ t_timestamp_gmt );
255
- } else if ( !isset ( $ t_commit ['parent ' ] ) && preg_match ( '@^# Parent +([a-f0-9]+)@ ' , $ t_line , $ t_matches ) ) {
256
- $ t_parents [] = $ t_matches [1 ];
257
- $ t_commit ['parent ' ] = $ t_matches [1 ];
258
- }
259
- } else if ( isset ( $ t_commit ['revision ' ] ) ) {
260
- if ( preg_match ( '@^diff @ ' , $ t_line , $ t_matches ) ) {
261
285
break ;
262
- }
263
- $ t_message [] = $ t_line ;
286
+ case 'Node ID ' :
287
+ $ t_commit ['revision ' ] = $ t_metadata ['rev ' ];
288
+ break ;
289
+ case 'Parent ' :
290
+ $ t_parents [] = $ t_commit ['parent ' ] = $ t_metadata ['rev ' ];
291
+ break ;
264
292
}
293
+ $ i ++;
265
294
}
266
295
267
- if ( !SourceChangeset::exists ( $ p_repo ->id , $ t_commit ['revision ' ] ) ) {
268
- $ t_commit ['message ' ] = implode ( "\n" , $ t_message );
296
+ if ( !SourceChangeset::exists ( $ p_repo ->id , $ t_commit ['revision ' ] ) ) {
297
+ # Read commit message
298
+ $ t_message = '' ;
299
+ while ( $ i < count ( $ t_input ) ) {
300
+ $ t_match = preg_match (
301
+ '/^ ' . self ::PATTERN_DIFF . '/ ' ,
302
+ $ t_input [$ i ]
303
+ );
304
+ if ( $ t_match ) {
305
+ break ;
306
+ }
307
+ $ t_message .= $ t_input [$ i ++] . "\n" ;
308
+ }
309
+ $ t_commit ['message ' ] = trim ( $ t_message );
269
310
270
311
$ t_changeset = new SourceChangeset ( $ p_repo ->id , $ t_commit ['revision ' ],
271
312
$ p_branch , $ t_commit ['date ' ], $ t_commit ['author ' ],
@@ -275,7 +316,12 @@ private function commit_changeset( $p_repo, $p_input, $p_branch='' ) {
275
316
276
317
$ t_changeset ->author_email = empty ($ t_commit ['author_email ' ])? '' : $ t_commit ['author_email ' ];
277
318
278
- preg_match_all ('#diff[\s]*-r[\s]([^\s]*)[\s]*-r[\s]([^\s]*)[\s]([^\n]*)\n(Binary file[\s]([^\r\n\t\f\v]*)[\s]has changed|\-{3}[\s](/dev/null)?[^\t]*[^\n]*\n\+{3}[\s](/dev/null)?[^\t]*\t[^\n]*)#u ' , $ p_input , $ t_matches , PREG_SET_ORDER );
319
+ static $ s_pattern_diff = '# '
320
+ . self ::PATTERN_DIFF . '\n( '
321
+ . self ::PATTERN_BINARY_FILE . '| '
322
+ . self ::PATTERN_PLUS_MINUS
323
+ . ')#u ' ;
324
+ preg_match_all ( $ s_pattern_diff , $ p_input , $ t_matches , PREG_SET_ORDER );
279
325
280
326
$ t_commit ['files ' ] = array ();
281
327
0 commit comments