Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance improvements for QCH preprocessing #64

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 51 additions & 58 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,41 +32,43 @@ VERSION=20180311

#STANDARD RULES

all: doc_devhelp doc_qch doc_doxygen

DISTFILES= \
commands/ \
gadgets/ \
headers/ \
index_transform/ \
images/ \
index_transform/ \
reference/ \
skins/ \
tests/ \
build_link_map.py \
ddg_parse_html.py \
devhelp2qch.py \
fix_devhelp-links.py \
index2autolinker.py \
index2browser.py \
index2ddg.py \
index2devhelp.py \
index2doxygen-tag.py \
index2highlight.py \
index2search.py \
index-chapters-c.xml \
index-chapters-cpp.xml \
index-cpp-search-app.txt \
index-functions.README \
index-functions-c.xml \
index-functions-cpp.xml \
link_map.py \
preprocess.py \
preprocess-css.css \
test.sh \
xml_utils.py \
Makefile \
all: doc_html doc_devhelp doc_qch doc_doxygen

DISTFILES= \
commands/ \
gadgets/ \
headers/ \
images/ \
index_transform/ \
premailer/ \
reference/ \
skins/ \
tests/ \
build_link_map.py \
ddg_parse_html.py \
devhelp2qch.py \
export.py \
fix_devhelp-links.py \
index2autolinker.py \
index2browser.py \
index2ddg.py \
index2devhelp.py \
index2doxygen-tag.py \
index2highlight.py \
index2search.py \
index-chapters-c.xml \
index-chapters-cpp.xml \
index-cpp-search-app.txt \
index-functions.README \
index-functions-c.xml \
index-functions-cpp.xml \
link_map.py \
preprocess.py \
preprocess-css.css \
preprocess_qch.py \
test.sh \
xml_utils.py \
Makefile \
README.md

CLEANFILES= \
Expand All @@ -81,7 +83,7 @@ ifeq ($(UNAME_S),Linux)
endif

clean:
rm -rf $(CLEANFILES)
rm -rf $(CLEANFILES)

check:

Expand All @@ -92,12 +94,13 @@ dist: clean
rm -rf "cppreference-doc-$(VERSION)"

install: all
# install the devhelp documentation
# install the HTML book
pushd "output/reference" > /dev/null; \
find . -type f \
-exec install -DT -m 644 '{}' "$(DESTDIR)$(docdir)/html/{}" \; ; \
popd > /dev/null

# install the devhelp documentation
install -DT -m 644 "output/cppreference-doc-en-c.devhelp2" \
"$(DESTDIR)$(bookdir)/cppreference-doc-en-c/cppreference-doc-en-c.devhelp2"
install -DT -m 644 "output/cppreference-doc-en-cpp.devhelp2" \
Expand Down Expand Up @@ -154,18 +157,14 @@ output/link-map.xml: output/reference
./build_link_map.py

#build the .devhelp2 index
output/cppreference-doc-en-c.devhelp2: \
output/reference \
output/link-map.xml
output/cppreference-doc-en-c.devhelp2: output/reference output/link-map.xml
./index2devhelp.py $(docdir)/html index-chapters-c.xml \
"C Standard Library reference" "cppreference-doc-en-c" "c" \
index-functions-c.xml "output/devhelp-index-c.xml"
./fix_devhelp-links.py "output/devhelp-index-c.xml" \
"output/cppreference-doc-en-c.devhelp2"

output/cppreference-doc-en-cpp.devhelp2: \
output/reference \
output/link-map.xml
output/cppreference-doc-en-cpp.devhelp2: output/reference output/link-map.xml
./index2devhelp.py $(docdir)/html index-chapters-cpp.xml \
"C++ Standard Library reference" "cppreference-doc-en-cpp" "cpp" \
index-functions-cpp.xml "output/devhelp-index-cpp.xml"
Expand All @@ -183,9 +182,7 @@ output/cppreference-doc-en-cpp.qch: output/qch-help-project-cpp.xml

rm -f "output/reference_cssless/qch.xml"

output/qch-help-project-cpp.xml: \
output/cppreference-doc-en-cpp.devhelp2 \
output/reference_cssless
output/qch-help-project-cpp.xml: output/cppreference-doc-en-cpp.devhelp2 output/reference_cssless
#build the file list
echo "<?xml version=\"1.0\" encoding=\"UTF-8\"?><files>" > "output/qch-files.xml"

Expand All @@ -202,17 +199,13 @@ output/qch-help-project-cpp.xml: \
--virtual_folder=cpp --file_list=output/qch-files.xml

# build doxygen tag file
output/cppreference-doxygen-local.tag.xml: \
output/reference \
output/link-map.xml
output/cppreference-doxygen-local.tag.xml: output/reference output/link-map.xml
./index2doxygen-tag.py "output/link-map.xml" \
"index-functions-cpp.xml" \
"index-chapters-cpp.xml" \
"output/cppreference-doxygen-local.tag.xml"

output/cppreference-doxygen-web.tag.xml: \
output/reference \
output/link-map.xml
output/cppreference-doxygen-web.tag.xml: output/reference output/link-map.xml
./index2doxygen-tag.py web \
"index-functions-cpp.xml" \
"index-chapters-cpp.xml" \
Expand Down Expand Up @@ -249,12 +242,12 @@ source:
regex+="|.*action=.*|.*printable=.*|.*en.cppreference.com/book.*" ; \
echo $$regex ; \
wget --adjust-extension --page-requisites --convert-links \
--force-directories --recursive --level=15 \
--span-hosts --domains=en.cppreference.com,upload.cppreference.com \
--reject-regex $$regex \
--timeout=5 --tries=50 --no-verbose \
--retry-connrefused --waitretry=10 --read-timeout=20 \
http://en.cppreference.com/w/ ; \
--force-directories --recursive --level=15 \
--span-hosts --domains=en.cppreference.com,upload.cppreference.com \
--reject-regex $$regex \
--timeout=5 --tries=50 --no-verbose \
--retry-connrefused --waitretry=10 --read-timeout=20 \
http://en.cppreference.com/w/ ; \
popd > /dev/null

./export.py --url=http://en.cppreference.com/mwiki reference/cppreference-export-ns0,4,8,10.xml 0 4 8 10
77 changes: 48 additions & 29 deletions commands/preprocess_cssless.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
from lxml import html
from lxml import etree
from io import StringIO
from lxml.etree import strip_elements
import logging
import re
import os
import warnings
import io
Expand All @@ -34,7 +34,6 @@ def preprocess_html_merge_cssless(src_path, dst_path):
root = etree.fromstring(stripped, parser)

output = preprocess_html_merge_css(root, src_path)
strip_style_tags(root)
remove_display_none(root)
convert_span_tables_to_tr_td(root)
convert_inline_block_elements_to_table(root)
Expand Down Expand Up @@ -69,49 +68,71 @@ def preprocess_html_merge_css(root, src_path):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
premailer = Premailer(root, base_url=src_path,
disable_link_rewrites=True, remove_classes=True)
disable_link_rewrites=True,
remove_classes=True,
disable_validation=True,
drop_style_tags=True)
root = premailer.transform().getroot()

return output.getvalue()

def strip_style_tags(root):
strip_elements(root, 'style')

def needs_td_wrapper(element):
# element has table:row
if len(element.getchildren()) == 0:
return True
for el in element.getchildren():
if has_css_property_value(el, 'display', 'table-row') or \
has_css_property_value(el, 'display', 'table-cell'):
if get_css_property_value(el, 'display') in ('table-row', 'table-cell'):
return False
return True

def remove_css_property(element, property_name):
atrib = cssutils.parseStyle(element.get('style'))
atrib.removeProperty(property_name)
element.set('style', atrib.getCssText(separator=''))
if len(element.get('style')) == 0:
element.attrib.pop('style')
def remove_css_property(el, prop_name):
if el.get('style') is None:
return

decls = re.split(r'\s*;\s*', el.get('style'))
if decls[-1] == '':
decls.pop()

idx = next((i for i,v in enumerate(decls) if v.startswith(prop_name + ':')), None)
if idx is not None:
del decls[idx]
if len(decls) == 0:
el.attrib.pop('style')
else:
el.set('style', ';'.join(decls))

def get_css_property_value(el, prop_name):
atrib = cssutils.parseStyle(el.get('style'))
value = atrib.getPropertyCSSValue(prop_name)
if value:
return value.cssText
if el.get('style') is None:
return None

for decl in re.split(r'\s*;\s*', el.get('style')):
if decl.startswith(prop_name + ':'):
return decl[len(prop_name)+1:].strip()
return None

def has_css_property_value(el, prop_name, prop_value):
value = get_css_property_value(el, prop_name)
if value and value == prop_value:
return True
return False
if el.get('style') is None:
return False

regex = r'(^|;)\s*{}:\s*{}(;|$)'.format(re.escape(prop_name), re.escape(prop_value))
return re.search(regex, el.get('style')) is not None

def set_css_property_value(el, prop_name, prop_value):
atrib = cssutils.parseStyle(el.get('style'))
atrib.setProperty(prop_name, prop_value)
el.set('style', atrib.getCssText(separator=''))
decl = '{}: {}'.format(prop_name, prop_value)
style = el.get('style')
if style is None or style == '':
el.set('style', decl)
else:
decls = re.split(r'\s*;\s*', style)
if decls[-1] == '':
decls.pop()

try:
idx = next(i for i,v in enumerate(decls) if v.startswith(prop_name + ':'))
decls[idx] = decl
except StopIteration:
decls.append(decl)
el.set('style', ';'.join(decls))

def convert_display_property_to_html_tag(element, element_tag, display_value):
str_attrib_value = element.get('style')
Expand Down Expand Up @@ -170,17 +191,15 @@ def convert_span_tables_to_tr_td(root_el):

def convert_inline_block_elements_to_table(root_el):
for el in root_el.xpath('//*[contains(@style, "display")]'):
if not has_css_property_value(el, 'display', 'inline-block') and \
not has_css_property_value(el, 'display', 'inline-table'):
if get_css_property_value(el, 'display') not in ('inline-block', 'inline-table'):
continue

elements_to_put_into_table = [el]
el = el.getnext()

# find subsequent inline block elements
while el is not None:
if has_css_property_value(el, 'display', 'inline-block') or \
has_css_property_value(el, 'display', 'inline-table'):
if get_css_property_value(el, 'display') in ('inline-block', 'inline-table'):
elements_to_put_into_table.append(el)
else:
break
Expand Down
25 changes: 25 additions & 0 deletions premailer/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Copyright (c) 2009-2012, Peter Bengtsson
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Peter Bengtsson nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Peter Bengtsson OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
4 changes: 4 additions & 0 deletions premailer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from __future__ import absolute_import, unicode_literals
from .premailer import Premailer, transform

__version__ = '3.2.0'
Loading