Source code for flea.html

# Copyright 2014 Oliver Cope
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function, unicode_literals, absolute_import
from import Iterable
from functools import wraps
import random
import re

from lxml.html import tostring
from lxml.cssselect import CSSSelector, SelectorSyntaxError
from lxml.etree import XPath, XPathError

from .util import url_join_same_server, escapeattrib

__all__ = ["first", "last", "by_index", "random_choice"]

#: Registry for xpath multimethods
xpath_registry = {}

#: EXSLT regular expression namespace URI

def _value(el):
    Return the value of input element ``el``
    if el.tag == "textarea":
        return el.text
        return el.attrib["value"]
    except KeyError:
        if el.tag == "input" and el.attrib.get("type") == "checkbox":
            return "On"
        if el.tag == "option":
            return el.text or ""
        return ""

class XPathMultiMethod(object):
    A callable object that has different implementations selected by XPath

    def __init__(self):
        self.__doc__ = ""
        self.__name__ = ""
        self.endpoints = []

    def __call__(self, *args, **kwargs):
        el = args[0]
        el = getattr(el, "el", el)
        for xpath, func in self.endpoints:
            if el in xpath(el):
                return func(*args, **kwargs)
        raise NotImplementedError(
            "Function %s not implemented for element %r" % (self.__name__, el,)

    def register(self, xpath, func):
                XPath("|".join("../%s" % item for item in xpath.split("|"))),
        func_doc = getattr(func, "__doc__", getattr(func, "func_doc", None))
        if not func_doc:

        # Add wrapped function to the object's docstring
        # Note that ".. comment block" is required to fool rst/sphinx into
        # correctly parsing the indented paragraphs when there is only one
        # registered endpoint.
        doc = "For elements matching ``%s``:n%s\n\n.. comment block\n\n" % (
            "\n".join("    %s" % line for line in func_doc.split("\n")),
        self.__doc__ += doc
        self.__name__ = func.__name__

def when(xpath_expr):
    Decorator for methods having different implementations selected by XPath

    def when(func):
        if getattr(func, "__wrapped__", None):
            func = getattr(func, "__wrapped__")
        multimethod = xpath_registry.setdefault(
            func.__name__, XPathMultiMethod()
        multimethod.register(xpath_expr, func)
        wrapped = wraps(func)(
            lambda self, *args, **kwargs: multimethod(self, *args, **kwargs)
        wrapped.__wrapped__ = func
        wrapped.func_doc = multimethod.__doc__
        wrapped.__doc__ = multimethod.__doc__
        return wrapped

    return when

def preprocess_fill_value(el, v):
    Preprocess form fill values to resolve any callable items.
    Form fill values can be specified as a function which will be called
    with the element to be filled. Thus we can do things like:


    Where ``any`` is a function that will be called back with
    ``payment_option`` input element, and should return any valid value.
    if callable(v):
        return preprocess_fill_value(el, v(el))

    if isinstance(v, Iterable) and not isinstance(v, str):
        return [
            preprocess_fill_value(el, item(el)) if callable(item) else item
            for item in v

    return v

class ElementWrapper(object):
    Wrapper for an ``lxml.etree`` element, providing additional methods useful
    for driving/testing WSGI applications. ``ElementWrapper`` objects are
    normally created through the ``find``/``css`` methods of ``Agent``

        >>> from fresco import Response
        >>> from flea import Agent
        >>> myapp = Response(['<html><body><a href="/">link 1</a>'\
        ...                   '<a href="/">link 2</a></body></html>'])
        >>> agent = Agent(myapp).get('/')
        >>> elementwrapper = agent.find('//a')[0]

    ``ElementWrapper`` objects have many methods and properties implemented as
    ``XPathMultiMethod`` objects, meaning their behaviour varies depending on
    the type of element being wrapped. For example, form elements have a
    ``submit`` method, ``a`` elements have a ``click`` method, and ``input``
    elements have a value property.

    def __init__(self, agent, el):
        self.agent = agent
        self.el = el

    def __str__(self):

        if len(self.el) == 0 and self.el.text is None:
            return self.html()

        return "<%s%s>...</%s>" % (
                ' %s="%s"' % (key, escapeattrib(value))
                for key, value in self.el.attrib.items()

    __repr__ = __str__

    def __eq__(self, other):
        if self.__class__ is not other.__class__:
            return False
        return self.el is other.el and self.agent is other.agent

    def __getattr__(self, attr):
        return getattr(self.el, attr)

    def __call__(self, path, flavor="auto", **kwargs):
        if flavor == "auto":
            flavor = guess_expression_flavor(path)

        if flavor == "css":
            return self.css(path, **kwargs)
            return self.find(path, **kwargs)

    def find(self, path, namespaces=None, **kwargs):
        Return elements matching the given xpath expression.

        If the xpath selects a list of elements a ``ResultWrapper`` object is

        If the xpath selects any other type (eg a string attribute value), the
        result of the query is returned directly.

        For convenience that the EXSLT regular expression namespace
        (````) is prebound to
        the prefix ``re``.
        ns = {"re": REGEXP_NAMESPACE}
        if namespaces is not None:
        namespaces = ns

        result = self.el.xpath(path, namespaces=namespaces, **kwargs)

        if not isinstance(result, list):
            return result

        return ResultWrapper(
            (ElementWrapper(self.agent, el) for el in result), "xpath:" + path

    def css(self, selector):
        Return elements matching the given CSS Selector (see
        ``lxml.cssselect`` for documentation on the ``CSSSelector`` class.
        compiled = CSSSelector(selector)
        return ResultWrapper(
            (ElementWrapper(self.agent, el) for el in compiled(self.el)),
            "css:" + selector,

    def __getitem__(self, path):
        result = self.find(path)
        if len(result) == 0:
            raise ValueError("%r matched no elements" % path)
        return result

    @when("a[@href]")  # NOQA
    def click(self, follow=True, check_status=True):
        Follow a link and return a new instance of ``Agent``
        return self.agent._click(
            self, follow=follow, check_status=check_status

        "input[@type='submit' or @type='image']|button[@type='submit' or not(@type)]"
    )  # NOQA
    def click(self, follow=True, check_status=True):
        Alias for submit
        return self.submit(follow, check_status=check_status)

    def _get_value(self):
        Return the value of the input or button element
        if self.el.tag == "input" and self.el.attrib.get("type") == "file":
            return self.agent.file_uploads.get(self.el)
        return _value(self.el)

    @when("input[@type='file']")  # NOQA
    def _set_value(self, value):
        Set the value of the file upload, which must be a tuple of::

            (filename, content-type, data)

        Where data can either be a byte string or file-like object.
        filename, content_type, data = value
        self.agent.file_uploads[self.el] = (filename, content_type, data)

        # Set the value in the DOM to the filename so that it can be seen when
        # the DOM is displayed
        self.el.attrib["value"] = filename

    @when("input|button")  # NOQA
    def _set_value(self, value):
        Set the value of the input or button element
        self.el.attrib["value"] = value

    value = property(_get_value, _set_value)

    @when("textarea|input|select")  # NOQA
    def input_group(self):
        Return the group of inputs sharing the same name attribute
        return self.form.find(
                (local-name() = 'input'
                    or local-name() = 'textarea'
                    or local-name() = 'select')
                and (@name='{fieldname}')

    @when("input[@type='checkbox']")  # NOQA
    def submit_value(self):
        Return the value of the selected checkbox element as the user
        agent would return it to the server in a form submission.
        if "disabled" in self.el.attrib:
            return None
        if "checked" in self.el.attrib:
            return _value(self.el)
        return None

    @when("input[@type='radio']")  # NOQA
    def submit_value(self):
        Return the value of the selected radio element as the user
        agent would return it to the server in a form submission.
        if "disabled" in self.el.attrib:
            return None
        if "checked" in self.el.attrib:
            return _value(self.el)
        return None

    @when("select[@multiple]")  # NOQA
    def submit_value(self):
        Return the value of the selected radio/checkbox element as the user
        agent would return it to the server in a form submission.
        if "disabled" in self.el.attrib:
            return None
        return [_value(item) for item in self.el.xpath(".//option[@selected]")]

    @when("select[not(@multiple)]")  # NOQA
    def submit_value(self):
        Return the value of the selected radio/checkbox element as the user
        agent would return it to the server in a form submission.
        if "disabled" in self.el.attrib:
            return None
            item = self.el.xpath(".//option[@selected]")[0]
        except IndexError:
                item = self.el.xpath(".//option[1]")[0]
            except IndexError:
                return None
        return _value(item)

        "input[not(@type) or @type != 'submit' and @type != 'image' and @type != 'reset']"
    )  # NOQA
    def submit_value(self):
        Return the value of any other input element as the user
        agent would return it to the server in a form submission.
        if "disabled" in self.el.attrib:
            return None
        return self.value

        "input[@type != 'submit' or @type != 'image' or @type != 'reset']"
    )  # NOQA
    def submit_value(self):
        Return the value of any submit/reset input element
        return None

    @when("textarea")  # NOQA
    def submit_value(self):
        Return the value of any submit/reset input element
        return _value(self.el)

    submit_value = property(submit_value)

    def _get_checked(self):
        Return True if the element has the checked attribute
        return "checked" in self.el.attrib

    @when("input[@type='radio']")  # NOQA
    def _set_checked(self, value):
        Set the radio button state to checked (unchecking any others in the
        for el in self.el.xpath(
            "//input[@type='radio' and @name=$name]",
            name=self.el.attrib.get("name", ""),
            if "checked" in el.attrib:
                del el.attrib["checked"]

        if bool(value):
            self.el.attrib["checked"] = "checked"
            if "checked" in self.el.attrib:
                del self.el.attrib["checked"]

    @when("input")  # NOQA
    def _set_checked(self, value):
        Set the (checkbox) input state to checked
        if bool(value):
            self.el.attrib["checked"] = "checked"
                del self.el.attrib["checked"]
            except KeyError:

    checked = property(_get_checked, _set_checked)

    @when("option")  # NOQA
    def _get_selected(self):
        Return True if the given select option is selected
        return "selected" in self.el.attrib

    @when("option")  # NOQA
    def _set_selected(self, value):
        Set the ``selected`` attribute for the select option element. If the
        select does not have the ``multiple`` attribute, unselect any
        previously selected option.
        if (
            not in self.el.xpath("./ancestor-or-self::select[1]")[0].attrib
            for el in self.el.xpath("./ancestor-or-self::select[1]//option"):
                if "selected" in el.attrib:
                    del el.attrib["selected"]

        if bool(value):
            self.el.attrib["selected"] = ""
            if "selected" in self.el.attrib:
                del self.el.attrib["selected"]

    selected = property(_get_selected, _set_selected)

    @property  # NOQA
    def form(self):
        Return the form associated with the wrapped element.
        return self.__class__(
            self.agent, self.el.xpath("./ancestor-or-self::form[1]")[0]

        "input[@type='submit' or @type='image']|button[@type='submit' or not(@type)]"
    )  # NOQA
    def submit(self, follow=True, check_status=True):
        Submit the form, returning a new ``Agent`` object, by clicking on
        the selected submit element (input of
        type submit or image, or button with type submit)
        return self.form.submit(self, follow=follow, check_status=check_status)

    @when("form")  # NOQA
    def submit(self, button=None, follow=True, check_status=True):
        Submit the form, returning a new ``Agent`` object
        method = self.el.attrib.get("method", "GET").upper()
        data = self.submit_data(button)
        path = url_join_same_server(
            self.el.attrib.get("action", self.agent.request.path),
        return {
            ("GET", None): self.agent.get,
            ("POST", None):,
            ("POST", "application/x-www-form-urlencoded"):,
            ("POST", "multipart/form-data"): self.agent.post_multipart,
        }[(method, self.el.attrib.get("enctype"))](
            path, data, follow=follow, check_status=check_status

    def submit_data(self, button=None):
        tag = self.el.tag
        type = self.el.attrib.get("type")

        if tag == "form":
            return self.submit_data_form(button)
        elif (tag == "input" and type in ("submit", "image")) or (
            tag == "button" and type in set(["submit", "", None])
            return self.form.submit_data_form(button or self)
        raise NotImplementedError()

    def submit_data_form(self, button=None):
        Return a list of the data that would be submitted to the server
        in the format ``[(key, value), ...]``, without actually submitting the
        data = []
        if isinstance(button, str):
            button = self(button)

        if button and "name" in button.attrib:
            data.append((button.attrib["name"], button.value))
            if button.el.attrib.get("type") == "image":
                data.append((button.attrib["name"] + ".x", "1"))
                data.append((button.attrib["name"] + ".y", "1"))

        inputs = (
            ElementWrapper(self.agent, el)
            for el in self.el.xpath(".//input|.//textarea|.//select")
        for input in inputs:
                name = input.attrib["name"]
            except KeyError:
            value = input.submit_value
            if value is None:

            elif input.attrib.get("type") == "file" and isinstance(
                value, tuple
                data.append((name, value))

            elif isinstance(value, str):
                data.append((name, value))

                data += [(name, v) for v in value]

        return data

    def fill(self, *args, **kwargs):
        Fill the current form or form element
        if self.el.tag == "form":
            return self.fill_form(*args, **kwargs)

            return self.fill_field(*args, **kwargs)

    def fill_sloppy(self, *args, **kwargs):
        Fill the current form, ignoring missing fields
        kwargs["_fill_strict"] = False
        return self.fill_form(*args, **kwargs)

    def fill_form(self, *args, **kwargs):
        Fill the current form with data.

        :param \*args: Pairs of ``(selector, value)``
        :param \*\*kwargs: mappings of fieldname to value
        :param _fill_strict: If True, raise an error when a field is not found

        See the documentation for :meth:`_set_value` implementations
        for individual form control types to see how values are processed
        as this varies between text inputs, selects, radio buttons,
        checkboxes etc
        strict = kwargs.pop("_fill_strict", True)

        def check_exists(element, name):
            if len(element) > 0:
                return True

            if strict:
                valid = ", ".join(
           for e in self.css("input, textarea, select")
                raise IndexError(
                    "Couldn't find a form element named {0!r}. "
                    "Valid names are {1}".format(name, valid)

        for selector, value in args:
            element = self(selector)
            if check_exists(element, selector):

        for name, value in kwargs.items():
            path = (
                ".//*[(local-name() = 'input' "
                "or local-name() = 'textarea' "
                "or local-name() = 'select') "
                "and (@name=$name or @id=$name)]"
            element = self.find(path, name=name)
            if check_exists(element, name):

        return self

    def fill_field(self, *args, **kwargs):
        Fill the current form element with a value
        args = [preprocess_fill_value(self.el, v) for v in args]
        try_methods = [
            "fill_{0}_{1}".format(self.el.tag, self.el.attrib.get("type")),

        for m in try_methods:
            m = getattr(self, m, None)
            if m is not None:
                return m(*args, **kwargs)
        return self.fill_input(*args, **kwargs)

    def fill_input_checkbox(self, values):

        if values is None:
            values = []

        if isinstance(values, bool):
            self.checked = values

        elif values and all(isinstance(v, bool) for v in values):
            # List of bools, eg ``[True, False, True]``
            for el, checked in zip(self.input_group(), values):
                if checked:
                    el.attrib["checked"] = ""
                elif "checked" in el.attrib:
                    del el.attrib["checked"]

            # List of values, eg ``['1', '23', '8']``
            found = set()
            values = set(str(v) for v in values)
            for el in self.input_group():
                if el.attrib.get("value") in values:
                    el.attrib["checked"] = ""
                elif "checked" in el.attrib:
                    del el.attrib["checked"]
            if found != values:
                raise AssertionError(
                    "Values %r not present"
                    " in checkbox group %r"
                    % (values - found, self.el.attrib.get("name"))

        return self

    def fill_input_radio(self, value):
        Set the value of the radio button, by searching for the radio
        button in the group with the given value and checking it.
        if value is not None:
            value = str(value)
        found = False
        for el in self.el.xpath(
            "./ancestor-or-self::form[1]//input[@type='radio' and @name=$n]",
            n=self.el.attrib.get("name", ""),
            if el.attrib.get("value") == value:
                el.attrib["checked"] = ""
                found = True
            elif "checked" in el.attrib:
                del el.attrib["checked"]
        if value is not None and not found:
            raise AssertionError(
                "Value %r not present"
                " in radio button group %r"
                % (value, self.el.attrib.get("name"))
        return self

    def fill_textarea(self, value):
        Set the value of a textarea control
        if value is not None:
            value = str(value)
        self.el.text = value
        return self

    def fill_input_file(self, value):
        Set the value of a file input box
        if value is None:
                del self.el.attrib["value"]
            except KeyError:
            self.value = value
        return self

    def fill_input(self, value):
        Set the value of a (text, password, ...) input box
        if value is None:
                del self.el.attrib["value"]
            except KeyError:
            self.value = str(value)
        return self

    def fill_select(self, value):
        Set the values of a select box

        :param values: list of values to be selected
        if "multiple" in self.el.attrib:
            return self.fill_select_multiple(value)

        if value is not None:
            value = str(value)
        found = False
        for opt in self.el.xpath(".//option"):
            if opt.attrib.get("value", opt.text) == value:
                opt.attrib["selected"] = ""
                found = True
            elif "selected" in opt.attrib:
                del opt.attrib["selected"]
        if not found and value is not None:
            raise AssertionError(
                "Value %r not present in select %r"
                % (value, self.el.attrib.get("name"))
        return self

    def fill_select_multiple(self, values):
        options = self.el.xpath(".//option")
        if isinstance(values, str) or not isinstance(values, Iterable):
            values = [values]

        if all(isinstance(v, bool) for v in values):
            values = [
                opt.attrib.get("value", opt.text)
                for selected, opt in zip(values, options)

        found = set()
        values = set(str(v) for v in values)
        for opt in options:
            value = opt.attrib.get("value", opt.text)
            if value in values:
                opt.attrib["selected"] = ""
            elif "selected" in opt.attrib:
                del opt.attrib["selected"]
        if found != values:
            raise AssertionError(
                "Values %r not present in select %r"
                % (values - found, self.el.attrib.get("name"))
        return self

    def html(self):
        Return an HTML representation of the element

        :rtype: unicode string
        return tostring(self.el, encoding="unicode")

    def pretty(self):
        Return an pretty-printed string representation of the element

        :rtype: unicode string
        return tostring(self.el, encoding="unicode", pretty_print=True)

    def striptags(self):
        Strip tags out of the element and its children to leave only the
        textual content. Normalize all sequences of whitespace to a single

        Use this for simple text comparisons when testing for document content


            >>> from fresco import Response
            >>> from flea import Agent
            >>> myapp = Response(['<p>the <span>foo</span> is'\
            ...                   ' completely <strong>b</strong>azzed</p>'])
            >>> agent = Agent(myapp).get('/')
            >>> agent['//p'].striptags()
            'the foo is completely bazzed'


        def _striptags(node):
            if node.text:
                yield node.text
            for subnode in node:
                for text in _striptags(subnode):
                    yield text
                if subnode.tail:
                    yield subnode.tail

        return re.sub(r"\s\s*", " ", "".join(_striptags(self.el)))

    def __contains__(self, what):
        return what in self.html()

class ResultWrapper(list):
    Wrap a list of elements (``ElementWrapper`` objects) returned from an xpath
    query, providing reasonable default behaviour for testing.

    ``ResultWrapper`` objects usually wrap ``ElementWrapper`` objects, which in
    turn wrap an lxml element and are normally created through the find/findcss
    methods of ``Agent``::

        >>> from fresco import Response
        >>> myapp = Response(['<html><p>item 1</p><p>item 2</p></html>'])
        >>> agent = Agent(myapp).get('/')
        >>> resultwrapper = agent.find('//p')

    ``ResultWrapper`` objects have list like behaviour::

        >>> len(resultwrapper)
        >>> resultwrapper[0] #doctest: +ELLIPSIS

    Attributes that are not part of the list interface are proxied to the first
    item in the result list for convenience. These two uses are equivalent::

        >>> resultwrapper[0].text
        'item 1'
        >>> resultwrapper.text
        'item 1'

    Items in the ``ResultWrapper`` are ``ElementWrapper`` instances, which
    provide methods in addition to the normal lxml.element methods (eg
    ``click()``, setting/getting form field values etc).


    def __init__(self, elements, expr=None):
        super(ResultWrapper, self).__init__(elements)
        self.__dict__["expr"] = expr

    def __getattr__(self, attr):
        return getattr(self[0], attr)

    def __setattr__(self, attr, value):
        return setattr(self[0], attr, value)

    def __getitem__(self, item):
            if isinstance(item, int):
                return super(ResultWrapper, self).__getitem__(item)
                return self[0][item]
        except IndexError:
            raise IndexError("list index out of range for %r" % (self,))

    def __contains__(self, what):
        return self[0].__contains__(what)

    def __repr__(self):
        return "<ResultWrapper %r>" % (self.__dict__["expr"],)

    def filter_on_text(self, matcher):
        Return a new :class:`ResultWrapper` of the elements in ``elements``
        where applying the function ``matcher`` to the text contained in
        the element results in a truth value.
        return self.__class__(
            (e for e in self if matcher(e.striptags())),
            self.expr + " (filtered by %s)" % (matcher),

    def filter(self, matcher):
        Return a new :class:`ResultWrapper` of the elements in ``elements``
        where applying the function ``matcher`` to the element results in
        a truth value. """
        return self.__class__(
            (e for e in self if matcher(e)),
            self.expr + " (filtered by %s)" % (matcher),

def guess_expression_flavor(expr):
    Try to guess whether ``expr`` is a CSS selector or XPath expression.

    ``css`` is the default value returned for expressions valid in both
    except XPathError:
        return "css"

    except (AssertionError, SelectorSyntaxError):
        return "xpath"

    if "/" in expr:
        return "xpath"
    if "@" in expr:
        return "xpath"
    return "css"

def _get_options(el):
    if el.tag == "select":
        return el.xpath(".//option")
        return el.xpath(

[docs]def by_index(n): """ Select the ``n``\th option from a select box or set of checkboxes/radio buttons """ def by_index(el): return _value(_get_options(el)[n]) return by_index
[docs]def first(el): """ Select the first option from a select box or set of checkboxes/radio buttons """ return by_index(0)
[docs]def last(el): """ Select the last option from a select box or set of checkboxes/radio buttons """ return by_index(-1)
[docs]def random_choice(el): """ Select a randomly chosen option from a select box or set of checkboxes/radio buttons """ options = _get_options(el) selected = options[random.randrange(len(options))] return _value(selected)