#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
""" Utility functions to work with HTTP headers.
This module provides some utility functions useful for parsing
and dealing with some of the HTTP 1.1 protocol headers which
are not adequately covered by the standard Python libraries.
Requires Python 2.2 or later.
The functionality includes the correct interpretation of the various
Accept-* style headers, content negotiation, byte range requests,
HTTP-style date/times, and more.
There are a few classes defined by this module:
* class content_type -- media types such as 'text/plain'
* class language_tag -- language tags such as 'en-US'
* class range_set -- a collection of (byte) range specifiers
* class range_spec -- a single (byte) range specifier
The primary functions in this module may be categorized as follows:
* Content negotiation functions...
* acceptable_content_type()
* acceptable_language()
* acceptable_charset()
* acceptable_encoding()
* Mid-level header parsing functions...
* parse_accept_header()
* parse_accept_language_header()
* parse_range_header()
* Date and time...
* http_datetime()
* parse_http_datetime()
* Utility functions...
* quote_string()
* remove_comments()
* canonical_charset()
* Low level string parsing functions...
* parse_comma_list()
* parse_comment()
* parse_qvalue_accept_list()
* parse_media_type()
* parse_number()
* parse_parameter_list()
* parse_quoted_string()
* parse_range_set()
* parse_range_spec()
* parse_token()
* parse_token_or_quoted_string()
And there are some specialized exception classes:
* RangeUnsatisfiableError
* RangeUnmergableError
* ParseError
See also:
* RFC 2616, "Hypertext Transfer Protocol -- HTTP/1.1", June 1999.
<http://www.ietf.org/rfc/rfc2616.txt>
Errata at <http://purl.org/NET/http-errata>
* RFC 2046, "(MIME) Part Two: Media Types", November 1996.
<http://www.ietf.org/rfc/rfc2046.txt>
* RFC 3066, "Tags for the Identification of Languages", January 2001.
<http://www.ietf.org/rfc/rfc3066.txt>
Note: I have made a small modification on the regexp for internet date,
to make it more liberal (ie, accept a time zone string of the form +0000)
Ivan Herman <http://www.ivan-herman.net>, March 2011.
Have added statements to make it (hopefully) Python 3 compatible.
Ivan Herman <http://www.ivan-herman.net>, August 2012.
"""
__author__ = "Deron Meranda <http://deron.meranda.us/>"
__date__ = "2012-08-31"
__version__ = "1.02"
__credits__ = """Copyright (c) 2005 Deron E. Meranda <http://deron.meranda.us/>
Licensed under GNU LGPL 2.1 or later. See <http://www.fsf.org/>.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
"""
# Character classes from RFC 2616 section 2.2
SEPARATORS = '()<>@,;:\\"/[]?={} \t'
LWS = ' \t\n\r' # linear white space
CRLF = '\r\n'
DIGIT = '0123456789'
HEX = '0123456789ABCDEFabcdef'
import sys
PY3 = (sys.version_info[0] >= 3)
# Try to get a set/frozenset implementation if possible
try:
type(frozenset)
except NameError:
try:
# The demset.py module is available at http://deron.meranda.us/
from demset import set, frozenset
__emulating_set = True # So we can clean up global namespace later
except ImportError:
pass
try:
# Turn character classes into set types (for Python 2.4 or greater)
SEPARATORS = frozenset([c for c in SEPARATORS])
LWS = frozenset([c for c in LWS])
CRLF = frozenset([c for c in CRLF])
DIGIT = frozenset([c for c in DIGIT])
HEX = frozenset([c for c in HEX])
del c
except NameError:
# Python 2.3 or earlier, leave as simple strings
pass
def _is_string( obj ):
"""Returns True if the object is a string or unicode type."""
if PY3 :
return isinstance(obj,str)
else :
return isinstance(obj,str) or isinstance(obj,unicode)
[docs]def http_datetime( dt=None ):
"""Formats a datetime as an HTTP 1.1 Date/Time string.
Takes a standard Python datetime object and returns a string
formatted according to the HTTP 1.1 date/time format.
If no datetime is provided (or None) then the current
time is used.
ABOUT TIMEZONES: If the passed in datetime object is naive it is
assumed to be in UTC already. But if it has a tzinfo component,
the returned timestamp string will have been converted to UTC
automatically. So if you use timezone-aware datetimes, you need
not worry about conversion to UTC.
"""
if not dt:
import datetime
dt = datetime.datetime.utcnow()
else:
try:
dt = dt - dt.utcoffset()
except:
pass # no timezone offset, just assume already in UTC
s = dt.strftime('%a, %d %b %Y %H:%M:%S GMT')
return s
[docs]def parse_http_datetime( datestring, utc_tzinfo=None, strict=False ):
"""Returns a datetime object from an HTTP 1.1 Date/Time string.
Note that HTTP dates are always in UTC, so the returned datetime
object will also be in UTC.
You can optionally pass in a tzinfo object which should represent
the UTC timezone, and the returned datetime will then be
timezone-aware (allowing you to more easly translate it into
different timzeones later).
If you set 'strict' to True, then only the RFC 1123 format
is recognized. Otherwise the backwards-compatible RFC 1036
and Unix asctime(3) formats are also recognized.
Please note that the day-of-the-week is not validated.
Also two-digit years, although not HTTP 1.1 compliant, are
treated according to recommended Y2K rules.
"""
import re, datetime
m = re.match(r'(?P<DOW>[a-z]+), (?P<D>\d+) (?P<MON>[a-z]+) (?P<Y>\d+) (?P<H>\d+):(?P<M>\d+):(?P<S>\d+(\.\d+)?) (?P<TZ>[a-zA-Z0-9_+]+)$',
datestring, re.IGNORECASE)
if not m and not strict:
m = re.match(r'(?P<DOW>[a-z]+) (?P<MON>[a-z]+) (?P<D>\d+) (?P<H>\d+):(?P<M>\d+):(?P<S>\d+) (?P<Y>\d+)$',
datestring, re.IGNORECASE)
if not m:
m = re.match(r'(?P<DOW>[a-z]+), (?P<D>\d+)-(?P<MON>[a-z]+)-(?P<Y>\d+) (?P<H>\d+):(?P<M>\d+):(?P<S>\d+(\.\d+)?) (?P<TZ>\w+)$',
datestring, re.IGNORECASE)
if not m:
raise ValueError('HTTP date is not correctly formatted')
try:
tz = m.group('TZ').upper()
except:
tz = 'GMT'
if tz not in ('GMT','UTC','0000','00:00'):
raise ValueError('HTTP date is not in GMT timezone')
monname = m.group('MON').upper()
mdict = {'JAN':1, 'FEB':2, 'MAR':3, 'APR':4, 'MAY':5, 'JUN':6,
'JUL':7, 'AUG':8, 'SEP':9, 'OCT':10, 'NOV':11, 'DEC':12}
month = mdict.get(monname)
if not month:
raise ValueError('HTTP date has an unrecognizable month')
y = int(m.group('Y'))
if y < 100:
century = datetime.datetime.utcnow().year / 100
if y < 50:
y = century * 100 + y
else:
y = (century - 1) * 100 + y
d = int(m.group('D'))
hour = int(m.group('H'))
minute = int(m.group('M'))
try:
second = int(m.group('S'))
except:
second = float(m.group('S'))
dt = datetime.datetime( y, month, d, hour, minute, second, tzinfo=utc_tzinfo )
return dt
[docs]class RangeUnsatisfiableError(ValueError):
"""Exception class when a byte range lies outside the file size boundaries."""
[docs] def __init__(self, reason=None):
if not reason:
reason = 'Range is unsatisfiable'
ValueError.__init__(self, reason)
[docs]class RangeUnmergableError(ValueError):
"""Exception class when byte ranges are noncontiguous and can not be merged together."""
[docs] def __init__(self, reason=None):
if not reason:
reason = 'Ranges can not be merged together'
ValueError.__init__(self, reason)
[docs]class ParseError(ValueError):
"""Exception class representing a string parsing error."""
[docs] def __init__(self, args, input_string, at_position):
ValueError.__init__(self, args)
self.input_string = input_string
self.at_position = at_position
[docs] def __str__(self):
if self.at_position >= len(self.input_string):
return '%s\n\tOccured at end of string' % self.args[0]
else:
return '%s\n\tOccured near %s' % (self.args[0], repr(self.input_string[self.at_position:self.at_position+16]))
[docs]def is_token(s):
"""Determines if the string is a valid token."""
for c in s:
if ord(c) < 32 or ord(c) > 128 or c in SEPARATORS:
return False
return True
[docs]def parse_comma_list(s, start=0, element_parser=None, min_count=0, max_count=0):
"""Parses a comma-separated list with optional whitespace.
Takes an optional callback function `element_parser`, which
is assumed to be able to parse an individual element. It
will be passed the string and a `start` argument, and
is expected to return a tuple (parsed_result, chars_consumed).
If no element_parser is given, then either single tokens or
quoted strings will be parsed.
If min_count > 0, then at least that many non-empty elements
must be in the list, or an error is raised.
If max_count > 0, then no more than that many non-empty elements
may be in the list, or an error is raised.
"""
if min_count > 0 and start == len(s):
raise ParseError('Comma-separated list must contain some elements',s,start)
elif start >= len(s):
raise ParseError('Starting position is beyond the end of the string',s,start)
if not element_parser:
element_parser = parse_token_or_quoted_string
results = []
pos = start
while pos < len(s):
e = element_parser( s, pos )
if not e or e[1] == 0:
break # end of data?
else:
results.append( e[0] )
pos += e[1]
while pos < len(s) and s[pos] in LWS:
pos += 1
if pos < len(s) and s[pos] != ',':
break
while pos < len(s) and s[pos] == ',':
# skip comma and any "empty" elements
pos += 1 # skip comma
while pos < len(s) and s[pos] in LWS:
pos += 1
if len(results) < min_count:
raise ParseError('Comma-separated list does not have enough elements',s,pos)
elif max_count and len(results) > max_count:
raise ParseError('Comma-separated list has too many elements',s,pos)
return (results, pos-start)
[docs]def parse_token(s, start=0):
"""Parses a token.
A token is a string defined by RFC 2616 section 2.2 as:
token = 1*<any CHAR except CTLs or separators>
Returns a tuple (token, chars_consumed), or ('',0) if no token
starts at the given string position. On a syntax error, a
ParseError exception will be raised.
"""
return parse_token_or_quoted_string(s, start, allow_quoted=False, allow_token=True)
[docs]def quote_string(s, always_quote=True):
"""Produces a quoted string according to HTTP 1.1 rules.
If always_quote is False and if the string is also a valid token,
then this function may return a string without quotes.
"""
need_quotes = False
q = ''
for c in s:
if ord(c) < 32 or ord(c) > 127 or c in SEPARATORS:
q += '\\' + c
need_quotes = True
else:
q += c
if need_quotes or always_quote:
return '"' + q + '"'
else:
return q
[docs]def parse_quoted_string(s, start=0):
"""Parses a quoted string.
Returns a tuple (string, chars_consumed). The quote marks will
have been removed and all \-escapes will have been replaced with
the characters they represent.
"""
return parse_token_or_quoted_string(s, start, allow_quoted=True, allow_token=False)
[docs]def parse_token_or_quoted_string(s, start=0, allow_quoted=True, allow_token=True):
"""Parses a token or a quoted-string.
's' is the string to parse, while start is the position within the
string where parsing should begin. It will returns a tuple
(token, chars_consumed), with all \-escapes and quotation already
processed.
Syntax is according to BNF rules in RFC 2161 section 2.2,
specifically the 'token' and 'quoted-string' declarations.
Syntax errors in the input string will result in ParseError
being raised.
If allow_quoted is False, then only tokens will be parsed instead
of either a token or quoted-string.
If allow_token is False, then only quoted-strings will be parsed
instead of either a token or quoted-string.
"""
if not allow_quoted and not allow_token:
raise ValueError('Parsing can not continue with options provided')
if start >= len(s):
raise ParseError('Starting position is beyond the end of the string',s,start)
has_quote = (s[start] == '"')
if has_quote and not allow_quoted:
raise ParseError('A quoted string was not expected', s, start)
if not has_quote and not allow_token:
raise ParseError('Expected a quotation mark', s, start)
s2 = ''
pos = start
if has_quote:
pos += 1
while pos < len(s):
c = s[pos]
if c == '\\' and has_quote:
# Note this is NOT C-style escaping; the character after the \ is
# taken literally.
pos += 1
if pos == len(s):
raise ParseError("End of string while expecting a character after '\\'",s,pos)
s2 += s[pos]
pos += 1
elif c == '"' and has_quote:
break
elif not has_quote and (c in SEPARATORS or ord(c)<32 or ord(c)>127):
break
else:
s2 += c
pos += 1
if has_quote:
# Make sure we have a closing quote mark
if pos >= len(s) or s[pos] != '"':
raise ParseError('Quoted string is missing closing quote mark',s,pos)
else:
pos += 1
return s2, (pos - start)
def _test_comments():
"""A self-test on comment processing. Returns number of test failures."""
def _testrm( a, b, collapse ):
b2 = remove_comments( a, collapse )
if b != b2:
print( 'Comment test failed:' )
print( ' remove_comments( %s, collapse_spaces=%s ) -> %s' % (repr(a), repr(collapse), repr(b2)) )
print( ' expected %s' % repr(b) )
return 1
return 0
failures = 0
failures += _testrm( r'', '', False )
failures += _testrm( r'(hello)', '', False)
failures += _testrm( r'abc (hello) def', 'abc def', False)
failures += _testrm( r'abc (he(xyz)llo) def', 'abc def', False)
failures += _testrm( r'abc (he\(xyz)llo) def', 'abc llo) def', False)
failures += _testrm( r'abc(hello)def', 'abcdef', True)
failures += _testrm( r'abc (hello) def', 'abc def', True)
failures += _testrm( r'abc (hello)def', 'abc def', True)
failures += _testrm( r'abc(hello) def', 'abc def', True)
failures += _testrm( r'abc(hello) (world)def', 'abc def', True)
failures += _testrm( r'abc(hello)(world)def', 'abcdef', True)
failures += _testrm( r' (hello) (world) def', 'def', True)
failures += _testrm( r'abc (hello) (world) ', 'abc', True)
return failures
[docs]class range_spec(object):
"""A single contiguous (byte) range.
A range_spec defines a range (of bytes) by specifying two offsets,
the 'first' and 'last', which are inclusive in the range. Offsets
are zero-based (the first byte is offset 0). The range can not be
empty or negative (has to satisfy first <= last).
The range can be unbounded on either end, represented here by the
None value, with these semantics:
* A 'last' of None always indicates the last possible byte
(although that offset may not be known).
* A 'first' of None indicates this is a suffix range, where
the last value is actually interpreted to be the number
of bytes at the end of the file (regardless of file size).
Note that it is not valid for both first and last to be None.
"""
__slots__ = ['first','last']
[docs] def __init__(self, first=0, last=None):
self.set( first, last )
[docs] def set(self, first, last):
"""Sets the value of this range given the first and last offsets.
"""
if first is not None and last is not None and first > last:
raise ValueError("Byte range does not satisfy first <= last.")
elif first is None and last is None:
raise ValueError("Byte range can not omit both first and last offsets.")
self.first = first
self.last = last
[docs] def __repr__(self):
return '%s.%s(%s,%s)' % (self.__class__.__module__, self.__class__.__name__,
self.first, self.last)
[docs] def __str__(self):
"""Returns a string form of the range as would appear in a Range: header."""
if self.first is None and self.last is None:
return ''
s = ''
if self.first is not None:
s += '%d' % self.first
s += '-'
if self.last is not None:
s += '%d' % self.last
return s
[docs] def __eq__(self, other):
"""Compare ranges for equality.
Note that if non-specific ranges are involved (such as 34- and -5),
they could compare as not equal even though they may represent
the same set of bytes in some contexts.
"""
return self.first == other.first and self.last == other.last
[docs] def __ne__(self, other):
"""Compare ranges for inequality.
Note that if non-specific ranges are involved (such as 34- and -5),
they could compare as not equal even though they may represent
the same set of bytes in some contexts.
"""
return not self.__eq__(other)
[docs] def __lt__(self, other):
"""< operator is not defined"""
raise NotImplementedError('Ranges can not be relationally compared')
[docs] def __le__(self, other):
"""<= operator is not defined"""
raise NotImplementedError('Ranges can not be ralationally compared')
[docs] def __gt__(self, other):
"""> operator is not defined"""
raise NotImplementedError('Ranges can not be relationally compared')
[docs] def __ge__(self, other):
""">= operator is not defined"""
raise NotImplementedError('Ranges can not be relationally compared')
[docs] def copy(self):
"""Makes a copy of this range object."""
return self.__class__( self.first, self.last )
[docs] def is_suffix(self):
"""Returns True if this is a suffix range.
A suffix range is one that specifies the last N bytes of a
file regardless of file size.
"""
return self.first == None
[docs] def is_fixed(self):
"""Returns True if this range is absolute and a fixed size.
This occurs only if neither first or last is None. Converse
is the is_unbounded() method.
"""
return first is not None and last is not None
[docs] def is_unbounded(self):
"""Returns True if the number of bytes in the range is unspecified.
This can only occur if either the 'first' or the 'last' member
is None. Converse is the is_fixed() method.
"""
return self.first is None or self.last is None
[docs] def is_whole_file(self):
"""Returns True if this range includes all possible bytes.
This can only occur if the 'last' member is None and the first
member is 0.
"""
return self.first == 0 and self.last is None
[docs] def __contains__(self, offset):
"""Does this byte range contain the given byte offset?
If the offset < 0, then it is taken as an offset from the end
of the file, where -1 is the last byte. This type of offset
will only work with suffix ranges.
"""
if offset < 0:
if self.first is not None:
return False
else:
return self.last >= -offset
elif self.first is None:
return False
elif self.last is None:
return True
else:
return self.first <= offset <= self.last
[docs] def fix_to_size(self, size):
"""Changes a length-relative range to an absolute range based upon given file size.
Ranges that are already absolute are left as is.
Note that zero-length files are handled as special cases,
since the only way possible to specify a zero-length range is
with the suffix range "-0". Thus unless this range is a suffix
range, it can not satisfy a zero-length file.
If the resulting range (partly) lies outside the file size then an
error is raised.
"""
if size == 0:
if self.first is None:
self.last = 0
return
else:
raise RangeUnsatisfiableError("Range can satisfy a zero-length file.")
if self.first is None:
# A suffix range
self.first = size - self.last
if self.first < 0:
self.first = 0
self.last = size - 1
else:
if self.first > size - 1:
raise RangeUnsatisfiableError('Range begins beyond the file size.')
else:
if self.last is None:
# An unbounded range
self.last = size - 1
return
[docs] def merge_with(self, other):
"""Tries to merge the given range into this one.
The size of this range may be enlarged as a result.
An error is raised if the two ranges do not overlap or are not
contiguous with each other.
"""
if self.is_whole_file() or self == other:
return
elif other.is_whole_file():
self.first, self.last = 0, None
return
a1, z1 = self.first, self.last
a2, z2 = other.first, other.last
if self.is_suffix():
if z1 == 0: # self is zero-length, so merge becomes a copy
self.first, self.last = a2, z2
return
elif other.is_suffix():
self.last = max(z1, z2)
else:
raise RangeUnmergableError()
elif other.is_suffix():
if z2 == 0: # other is zero-length, so nothing to merge
return
else:
raise RangeUnmergableError()
assert a1 is not None and a2 is not None
if a2 < a1:
# swap ranges so a1 <= a2
a1, z1, a2, z2 = a2, z2, a1, z1
assert a1 <= a2
if z1 is None:
if z2 is not None and z2 + 1 < a1:
raise RangeUnmergableError()
else:
self.first = min(a1, a2)
self.last = None
elif z2 is None:
if z1 + 1 < a2:
raise RangeUnmergableError()
else:
self.first = min(a1, a2)
self.last = None
else:
if a2 > z1 + 1:
raise RangeUnmergableError()
else:
self.first = a1
self.last = max(z1, z2)
return
[docs]class range_set(object):
"""A collection of range_specs, with units (e.g., bytes).
"""
__slots__ = ['units', 'range_specs']
[docs] def __init__(self):
self.units = 'bytes'
self.range_specs = [] # a list of range_spec objects
[docs] def __str__(self):
return self.units + '=' + ', '.join([str(s) for s in self.range_specs])
[docs] def __repr__(self):
return '%s.%s(%s)' % (self.__class__.__module__,
self.__class__.__name__,
repr(self.__str__()) )
[docs] def from_str(self, s, valid_units=('bytes','none')):
"""Sets this range set based upon a string, such as the Range: header.
You can also use the parse_range_set() function for more control.
If a parsing error occurs, the pre-exising value of this range
set is left unchanged.
"""
r, k = parse_range_set( s, valid_units=valid_units )
if k < len(s):
raise ParseError("Extra unparsable characters in range set specifier",s,k)
self.units = r.units
self.range_specs = r.range_specs
[docs] def is_single_range(self):
"""Does this range specifier consist of only a single range set?"""
return len(self.range_specs) == 1
[docs] def is_contiguous(self):
"""Can the collection of range_specs be coalesced into a single contiguous range?"""
if len(self.range_specs) <= 1:
return True
merged = self.range_specs[0].copy()
for s in self.range_specs[1:]:
try:
merged.merge_with(s)
except:
return False
return True
[docs] def fix_to_size(self, size):
"""Changes all length-relative range_specs to absolute range_specs based upon given file size.
If none of the range_specs in this set can be satisfied, then the
entire set is considered unsatifiable and an error is raised.
Otherwise any unsatisfiable range_specs will simply be removed
from this set.
"""
for i in range(len(self.range_specs)):
try:
self.range_specs[i].fix_to_size( size )
except RangeUnsatisfiableError:
self.range_specs[i] = None
self.range_specs = [s for s in self.range_specs if s is not None]
if len(self.range_specs) == 0:
raise RangeUnsatisfiableError('No ranges can be satisfied')
[docs] def coalesce(self):
"""Collapses all consecutive range_specs which together define a contiguous range.
Note though that this method will not re-sort the range_specs, so a
potentially contiguous range may not be collapsed if they are
not sorted. For example the ranges:
10-20, 30-40, 20-30
will not be collapsed to just 10-40. However if the ranges are
sorted first as with:
10-20, 20-30, 30-40
then they will collapse to 10-40.
"""
if len(self.range_specs) <= 1:
return
for i in range(len(self.range_specs) - 1):
a = self.range_specs[i]
b = self.range_specs[i+1]
if a is not None:
try:
a.merge_with( b )
self.range_specs[i+1] = None # to be deleted later
except RangeUnmergableError:
pass
self.range_specs = [r for r in self.range_specs if r is not None]
[docs]def parse_number( s, start=0 ):
"""Parses a positive decimal integer number from the string.
A tuple is returned (number, chars_consumed). If the
string is not a valid decimal number, then (None,0) is returned.
"""
if start >= len(s):
raise ParseError('Starting position is beyond the end of the string',s,start)
if s[start] not in DIGIT:
return (None,0) # not a number
pos = start
n = 0
while pos < len(s):
c = s[pos]
if c in DIGIT:
n *= 10
n += ord(c) - ord('0')
pos += 1
else:
break
return n, pos-start
[docs]def parse_range_spec( s, start=0 ):
"""Parses a (byte) range_spec.
Returns a tuple (range_spec, chars_consumed).
"""
if start >= len(s):
raise ParseError('Starting position is beyond the end of the string',s,start)
if s[start] not in DIGIT and s[start] != '-':
raise ParseError("Invalid range, expected a digit or '-'",s,start)
first, last = None, None
pos = start
first, k = parse_number( s, pos )
pos += k
if s[pos] == '-':
pos += 1
if pos < len(s):
last, k = parse_number( s, pos )
pos += k
else:
raise ParseError("Byte range must include a '-'",s,pos)
if first is None and last is None:
raise ParseError('Byte range can not omit both first and last indices.',s,start)
R = range_spec( first, last )
return R, pos-start
[docs]def parse_range_set( s, start=0, valid_units=('bytes','none') ):
"""Parses a (byte) range set specifier.
Returns a tuple (range_set, chars_consumed).
"""
if start >= len(s):
raise ParseError('Starting position is beyond the end of the string',s,start)
pos = start
units, k = parse_token( s, pos )
pos += k
if valid_units and units not in valid_units:
raise ParseError('Unsupported units type in range specifier',s,start)
while pos < len(s) and s[pos] in LWS:
pos += 1
if pos < len(s) and s[pos] == '=':
pos += 1
else:
raise ParseError("Invalid range specifier, expected '='",s,pos)
while pos < len(s) and s[pos] in LWS:
pos += 1
range_specs, k = parse_comma_list( s, pos, parse_range_spec, min_count=1 )
pos += k
# Make sure no trash is at the end of the string
while pos < len(s) and s[pos] in LWS:
pos += 1
if pos < len(s):
raise ParseError('Unparsable characters in range set specifier',s,pos)
ranges = range_set()
ranges.units = units
ranges.range_specs = range_specs
return ranges, pos-start
def _split_at_qfactor( s ):
"""Splits a string at the quality factor (;q=) parameter.
Returns the left and right substrings as a two-member tuple.
"""
# It may be faster, but incorrect, to use s.split(';q=',1), since
# HTTP allows any amount of linear white space (LWS) to appear
# between the parts, so it could also be "; q = ".
# We do this parsing 'manually' for speed rather than using a
# regex, which would be r';[ \t\r\n]*q[ \t\r\n]*=[ \t\r\n]*'
pos = 0
while 0 <= pos < len(s):
pos = s.find(';', pos)
if pos < 0:
break # no more parameters
startpos = pos
pos = pos + 1
while pos < len(s) and s[pos] in LWS:
pos = pos + 1
if pos < len(s) and s[pos] == 'q':
pos = pos + 1
while pos < len(s) and s[pos] in LWS:
pos = pos + 1
if pos < len(s) and s[pos] == '=':
pos = pos + 1
while pos < len(s) and s[pos] in LWS: