Skip to content

lhttpc url parsing and the '@' character in the query string #64

@bituman9

Description

@bituman9

Hi, using lhttpc I ran into a URL parsing issue. Since according to rfc3986 I am allowed to use '@' in query strings I did not urlencode them when sending an lhttpc request. This resulted in lhttp parsing the url wrong in the lhttpc_lib.erl:parse_url/1 function. (It interpreted the domain, the path, and parts of the query string before the '@' sign as username, rest of the query string as the host to connect to.)

I ended up using a workaround locally (but please take the following code with a grain of salt, I just started programming in erlang)

-spec parse_url(string()) -> #lhttpc_url{}.
parse_url(URL) ->
UrlParsingRegexp = "^(?:(?:(([^:\\/#\\?]+:)?(?:(?:\/\/)(?:(?:(?:([^:@\\/#\\?]+)(?:\:([^:@\\/#\\?]))?)@)?(([^:\\/#\\?\\]\[]+|\[[^\\/\\]@#?]+\])(?:\:([0-9]+))?))?)?)?((?:\/?(?:[^\\/\\?#]+\/+))(?:[^\\?#])))?(\?[^#]+)?)(#.)?",
{match, Matches} = re:run(URL, UrlParsingRegexp, [{capture, all, list}]),
{Scheme, User, Passwd, Host, Port, Path} = split_matched_url_to_parts(Matches),
#lhttpc_url{
host = string:to_lower(Host),
port = Port,
path = Path,
user = User,
password = Passwd,
is_ssl = (Scheme =:= https)
}.

split_matched_url_to_parts([_Href, _Origin, Protocol, Username, Password, _Host, HostName, Port, Path, Query | _]) ->
{get_scheme_from_parts(Protocol), Username, Password, HostName, get_port_from_parts(Protocol, Port), Path ++ Query};
split_matched_url_to_parts([_Href, _Origin, Protocol, Username, Password, _Host, HostName, Port, Path | _]) ->
{get_scheme_from_parts(Protocol), Username, Password, HostName, get_port_from_parts(Protocol, Port), Path}.

get_port_from_parts("http:", []) -> 80;
get_port_from_parts("https:", []) -> 443;
get_port_from_parts(Protocol, Port) -> list_to_integer(Port).

get_scheme_from_parts("http:") -> http;
get_scheme_from_parts("https:") -> https.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions