From b5295d8015904793b214a5019df41e76ca6eeeaa Mon Sep 17 00:00:00 2001 From: Michael Hamann Date: Fri, 23 Nov 2012 22:55:50 +0100 Subject: [PATCH] Add "safeindex" feature, prevents indexing of protected included metadata The safeindex feature that is turned on by default prevents the indexer from indexing metadata from included pages that are non-public. This means that for example only links from included pages that are public will be indexed. This affects plugins that add their own metadata to the index in the following ways: * there is no effect when all included pages are public or when no user is logged in when the page is indexed * when the plugin's event handler is called after the include plugin, the plugin will get only metadata from included pages that are public * when the plugin's event handler is called before the include plugin the include plugin will delete the plugin's metadata. I'm happy to add special handlers or exceptions for plugins like the tag plugin (already included) that are affected by this problem. The safeindex feature can be turned off when the ACL rules of all parent pages match the child pages or when information disclosure through metadata like backlinks is no problem. --- _test/safeindex.test.php | 36 +++++++++++++++++ action.php | 84 ++++++++++++++++++++++++++++++++++++++++ conf/default.php | 1 + conf/metadata.php | 1 + lang/en/settings.php | 1 + 5 files changed, 123 insertions(+) create mode 100644 _test/safeindex.test.php diff --git a/_test/safeindex.test.php b/_test/safeindex.test.php new file mode 100644 index 0000000..6b94db0 --- /dev/null +++ b/_test/safeindex.test.php @@ -0,0 +1,36 @@ +pluginsEnabled[] = 'include'; + parent::setup(); + } + + public function test_safeindex() { + global $conf; + global $AUTH_ACL; + $conf['superuser'] = 'john'; + $conf['useacl'] = 1; + + $AUTH_ACL = array( + '* @ALL 0', + '* @user 8', + 'public @ALL 1', + ); + + $_SERVER['REMOTE_USER'] = 'john'; + + saveWikiText('parent', "{{page>child}}\n\n[[public_link]]\n\n{{page>public}}", 'Test parent created'); + saveWikiText('child', "[[foo:private]]", 'Test child created'); + saveWikiText('public', "[[foo:public]]", 'Public page created'); + + idx_addPage('parent'); + idx_addPage('child'); + idx_addPage('public'); + + $this->assertEquals(array('parent', 'public'), ft_backlinks('foo:public')); + $this->assertEquals(array('child'), ft_backlinks('foo:private')); + $this->assertEquals(array('parent'), ft_backlinks('public_link')); + } +} + diff --git a/action.php b/action.php index c45f146..e245616 100644 --- a/action.php +++ b/action.php @@ -32,6 +32,8 @@ function action_plugin_include() { */ function register(&$controller) { /* @var Doku_event_handler $controller */ + $controller->register_hook('INDEXER_PAGE_ADD', 'BEFORE', $this, 'handle_indexer'); + $controller->register_hook('INDEXER_VERSION_GET', 'BEFORE', $this, 'handle_indexer_version'); $controller->register_hook('PARSER_CACHE_USE','BEFORE', $this, '_cache_prepare'); $controller->register_hook('HTML_EDITFORM_OUTPUT', 'BEFORE', $this, 'handle_form'); $controller->register_hook('HTML_CONFLICTFORM_OUTPUT', 'BEFORE', $this, 'handle_form'); @@ -43,6 +45,88 @@ function register(&$controller) { $controller->register_hook('EDITX_HANDLERS_REGISTER', 'BEFORE', $this, 'handle_editx_register'); } + /** + * Add a version string to the index so it is rebuilt + * whenever the handler is updated or the safeindex setting is changed + */ + public function handle_indexer_version($event, $param) { + $event->data['plugin_include'] = '0.1.safeindex='.$this->getConf('safeindex'); + } + + /** + * Handles the INDEXER_PAGE_ADD event, prevents indexing of metadata from included pages that aren't public if enabled + * + * @param Doku_Event $event the event object + * @param array $params optional parameters (unused) + */ + public function handle_indexer(Doku_Event $event, $params) { + global $USERINFO; + + // check if the feature is enabled at all + if (!$this->getConf('safeindex')) return; + + // is there a user logged in at all? If not everything is fine already + if (is_null($USERINFO) && !isset($_SERVER['REMOTE_USER'])) return; + + // get the include metadata in order to see which pages were included + $inclmeta = p_get_metadata($event->data['page'], 'plugin_include', METADATA_RENDER_UNLIMITED); + $all_public = true; // are all included pages public? + // check if the current metadata indicates that non-public pages were included + if ($inclmeta !== null && isset($inclmeta['pages'])) { + foreach ($inclmeta['pages'] as $page) { + if (auth_aclcheck($page['id'], '', array()) < AUTH_READ) { // is $page public? + $all_public = false; + break; + } + } + } + + if (!$all_public) { // there were non-public pages included - action required! + // backup the user information + $userinfo_backup = $USERINFO; + $remote_user = $_SERVER['REMOTE_USER']; + // unset user information - temporary logoff! + $USERINFO = null; + unset($_SERVER['REMOTE_USER']); + + // metadata is only rendered once for a page in one request - thus we need to render manually. + $meta = p_read_metadata($event->data['page']); // load the original metdata + $meta = p_render_metadata($event->data['page'], $meta); // render the metadata + p_save_metadata($event->data['page'], $meta); // save the metadata so other event handlers get the public metadata, too + + $meta = $meta['current']; // we are only interested in current metadata. + + // check if the tag plugin handler has already been called before the include plugin + $tag_called = isset($event->data['metadata']['subject']); + + // Reset the metadata in the renderer. This removes data from all other event handlers, but we need to be on the safe side here. + $event->data['metadata'] = array('title' => $meta['title']); + + // restore the relation references metadata + if (isset($meta['relation']['references'])) { + $event->data['metadata']['relation_references'] = array_keys($meta['relation']['references']); + } else { + $event->data['metadata']['relation_references'] = array(); + } + + // restore the tag metadata if the tag plugin handler has been called before the include plugin handler. + if ($tag_called) { + $tag_helper = $this->loadHelper('tag', false); + if ($tag_helper) { + if (isset($meta['subject'])) { + $event->data['metadata']['subject'] = $tag_helper->_cleanTagList($meta['subject']); + } else { + $event->data['metadata']['subject'] = array(); + } + } + } + + // restore user information + $USERINFO = $userinfo_backup; + $_SERVER['REMOTE_USER'] = $remote_user; + } + } + /** * Used for debugging purposes only */ diff --git a/conf/default.php b/conf/default.php index 8b28c59..09d0cbe 100644 --- a/conf/default.php +++ b/conf/default.php @@ -20,4 +20,5 @@ $conf['title'] = 0; // use first header of page in link $conf['pageexists'] = 0; // no link if page does not exist $conf['parlink'] = 1; // paragraph around link +$conf['safeindex'] = 1; // prevent indexing of protected metadata //Setup VIM: ex: et ts=2 : diff --git a/conf/metadata.php b/conf/metadata.php index ef31cc4..99ed5a9 100644 --- a/conf/metadata.php +++ b/conf/metadata.php @@ -23,4 +23,5 @@ $meta['title'] = array('onoff'); $meta['pageexists'] = array('onoff'); $meta['parlink'] = array('onoff'); +$meta['safeindex'] = array('onoff'); //Setup VIM: ex: et ts=2 : diff --git a/lang/en/settings.php b/lang/en/settings.php index df575a5..1002a29 100644 --- a/lang/en/settings.php +++ b/lang/en/settings.php @@ -25,4 +25,5 @@ $lang['title'] = 'use first heading of page in link even if useheading is off (only affects linkonly mode)'; $lang['pageexists'] = 'do not display a link if the page does not exist (only affects linkonly mode)'; $lang['parlink'] = 'put a paragraph around the link (only affects linkonly mode)'; +$lang['safeindex'] = 'prevent indexing of metadata from non-public included pages'; //Setup VIM: ex: et ts=2 :