From 0d378f8258c16d4ac72a4cae7643ec96aaa86a10 Mon Sep 17 00:00:00 2001 From: Kartik Prabhu Date: Tue, 20 Feb 2018 21:03:01 -0500 Subject: [PATCH 01/11] Squashed commit of the following: commit d0ea9ea9ef2d3cb8839bc66a1b1aef20f2121529 Author: Kartik Prabhu Date: Tue Feb 20 20:56:09 2018 -0500 bump version to 1.0.6 commit 9be5edb8cca285a89fbd3abb210403b212260520 Author: Kartik Prabhu Date: Tue Feb 20 20:52:24 2018 -0500 fix #59 #57 #43 . use html5lib by default unless user-specified a html parser. If neither exist then default to BS4 to decide. use final redirect URL while requesting from server. commit 3995eacabf00001c6d9b18ba5c43d59513bc792a Author: Kartik Prabhu Date: Sat Feb 17 22:10:19 2018 -0500 fix #55; parse title from link element for p-*. parse poster from video element for u-* --- mf2py/parse_property.py | 48 +++++++++++++-------------------- mf2py/parser.py | 60 ++++++++++++++++++++++++++++------------- mf2py/version.py | 2 +- 3 files changed, 61 insertions(+), 49 deletions(-) diff --git a/mf2py/parse_property.py b/mf2py/parse_property.py index 8ee4a37..858f0df 100644 --- a/mf2py/parse_property.py +++ b/mf2py/parse_property.py @@ -39,40 +39,31 @@ def get_vcp_children(el): def text(el): """Process p-* properties""" + # handle value-class-pattern value_els = get_vcp_children(el) if value_els: return ''.join(get_vcp_value(el) for el in value_els) - prop_value = get_attr(el, "title", check_name="abbr") - if prop_value is not None: - return prop_value - - prop_value = get_attr(el, "value", check_name=("data", "input")) - if prop_value is not None: - return prop_value - - prop_value = get_attr(el, "alt", check_name=("img", "area")) - if prop_value is not None: - return prop_value + prop_value = get_attr(el, "title", check_name=("abbr", "link"))\ + or get_attr(el, "value", check_name=("data", "input"))\ + or get_attr(el, "alt", check_name=("img", "area"))\ + or el.get_text() - # see if get_text() replaces img with alts - # strip here? - return el.get_text() + # drop