#!/usr/bin/env python ''' $Id: KeqV.py,v 1.2 2003/11/27 07:30:16 zen Exp $ ''' __rcs_id__ = '$Id: KeqV.py,v 1.2 2003/11/27 07:30:16 zen Exp $' __version__ = '$Revision: 1.2 $'[11:-2] import re, types from shlex import shlex _marker = [] class ParseError(ValueError): pass class KeqV: ''' Class to parse and assemble foo="bar",a="b" style digest messages. Not quite generic, as we are encoding our character set in there. ''' def __init__(self, message_string=None): if message_string: self._map = self.parseKeqV(message_string) else: self._map = {} def parseKeqV(self, s): ''' Split a digest message string (eg. 'foo=bar,a="b"') into a mapping of key -> [value, ...]. All values are Unicode. All keys are ASCII. ''' charset = 'iso-8859-1' # default. Can be overridden by charset=utf-8 class Lex(shlex): def __init__(self, msg): shlex.__init__(self, msg, 'digest', posix=True) self.commenters = '' self.quotes = '"' self.escapedquotes = '"' self.wordchars = self.wordchars + '~!@#$%^&*()-+[{]}|:<>./?' def get_token(self): try: return shlex.get_token(self) except ValueError, x: raise ParseError, x l = Lex(s) out = [] while 1: # Get our key key = l.get_token() if key in ('=', ',', ';'): raise ParseError, \ 'Invalid digest message %r - ' \ 'expecting key, found %r' % (s, key) if key == l.eof: break # Then our separator sep = l.get_token() if sep == l.eof: out.append((key, None)) break elif sep in (',', ';'): out.append((key, None)) continue elif sep == '=': # Get our value value = l.get_token() if value == '=': raise ParseError, \ "Invalid digest message %r - double '='" % (s,) elif value in (',', ';'): out.append((key, '')) continue elif value == l.eof: out.append((key, '')) break else: out.append((key, value)) if key=='charset': if value != 'utf-8': raise ParseError, \ "Invalid charset %r" % (value,) charset = 'utf-8' sep = l.get_token() if sep in (',', ';'): continue elif sep == l.eof: break else: l.push_token(sep) else: l.push_token(sep) out.append((key, None)) # Build map while converting to Unicode digest_map = {} for k, v in out: try: k = k.encode('ascii') if v: v = v.decode(charset) digest_map.setdefault(k, []).append(v) except UnicodeEncodeError: raise ParseError, 'Non-ASCII key %r' % (k,) except UnicodeDecodeError: raise ParseError, 'Value %r for key %r is not valid %s' % ( v, k, charset ) return digest_map def setValue(self, key, value): ''' Set the value for the given key, overwriting any existing values. To add additional values, use getList(key).append(value) ''' self._map[key] = [value] def getValue(self, key, default=_marker): ''' Return the value for the given key, or raise a ValueError if there are multiple values. ''' rv = self._map.get(key, []) if len(rv) == 0: if default is _marker: raise KeyError, key return default elif len(rv) == 1: return rv[0] else: raise ValueError, 'Multiple values for %r' % (key,) def getList(self, key): ''' Return all values for the given key as a list. This list may be mutated. ''' return self._map.setdefault(key, []) def __repr__(self): return repr(str(self)) _quote_unnecessary = re.compile('^[\w_\-]+$') def __str__(self): ''' Return this KeqV as a valid, encoded, string. ''' out = [] for k, vals in self._map.items(): if len(vals) == 0: out.append(k) else: for v in vals: if type(v) not in types.StringTypes: raise ValueError, 'Invalid value %r for key %r' % (k, v) if self._quote_unnecessary.search(v): out.append('%s=%s' % (k,v)) else: v = v.replace('\\','\\\\') v = v.replace('"','\\"') out.append('%s="%s"' % (k,v)) charset = self.getValue('charset', default='iso-8859-1') if charset not in ('iso-8859-1', 'utf-8'): raise ValueError, 'Invalid charset %r' % (charset,) return ','.join(out).encode(charset) def __len__(self): count = 0 for vals in self._map.values(): count += len(vals) return count # vim: set filetype=python ts=4 sw=4 et si