Browse Source

lxml's HTML parser adds html/body tags, remove them if present...

This causes issues w/ the atom blog as it puts html tags in the middle
of an html document where they shouldn't be..
main
John-Mark Gurney 3 years ago
parent
commit
7f631b112e
1 changed files with 18 additions and 1 deletions
  1. +18
    -1
      encthenet_plugins.py

+ 18
- 1
encthenet_plugins.py View File

@@ -21,7 +21,24 @@ def rellinktoabs(context, value):
# prefix them w/ the content_url
i.attrib['href'] = content_url + i.attrib['href']

return etree.tostring(html, encoding='unicode', method='html')
res = etree.tostring(html, encoding='unicode', method='html')

# lxml.HTML wraps the html w/ html/body tags, strip them
# if present

startstr = '<html><body>'
endstr = '</body></html>'

startpos = 0
endpos = None
if res.startswith(startstr):
startpos = len(startstr)
if res.endswith(endstr):
endpos = -len(endstr)

res = res[startpos:endpos]

return res

# mostly copied from hyde.ext.templates.jinja.py Markdown
# and using docs from:


Loading…
Cancel
Save