#!/usr/bin/env python
'''
$Id: KeqV.py,v 1.2 2003/11/27 07:30:16 zen Exp $
'''

__rcs_id__  = '$Id: KeqV.py,v 1.2 2003/11/27 07:30:16 zen Exp $'
__version__ = '$Revision: 1.2 $'[11:-2]

import re, types
from shlex import shlex

_marker = []

class ParseError(ValueError):
    pass


class KeqV:
    ''' Class to parse and assemble foo="bar",a="b" style digest messages.
        Not quite generic, as we are encoding our character set in there.
    '''
    def __init__(self, message_string=None):
        if message_string:
            self._map = self.parseKeqV(message_string)
        else:
            self._map = {}

    def parseKeqV(self, s):
        ''' Split a digest message string (eg. 'foo=bar,a="b"') into a mapping
            of key -> [value, ...].
            All values are Unicode. All keys are ASCII.
        '''
        charset = 'iso-8859-1' # default. Can be overridden by charset=utf-8

        class Lex(shlex):
            def __init__(self, msg):
                shlex.__init__(self, msg, 'digest', posix=True)
                self.commenters = ''
                self.quotes = '"'
                self.escapedquotes = '"'
                self.wordchars = self.wordchars + '~!@#$%^&*()-+[{]}|:<>./?'

            def get_token(self):
                try:
                    return shlex.get_token(self)
                except ValueError, x:
                    raise ParseError, x
        
        l = Lex(s)
        out = []
        while 1:
            # Get our key
            key = l.get_token()

            if key in ('=', ',', ';'):
                raise ParseError, \
                    'Invalid digest message %r - ' \
                    'expecting key, found %r' % (s, key)

            if key == l.eof:
                break

            # Then our separator
            sep = l.get_token()
            if sep == l.eof:
                out.append((key, None))
                break
            elif sep in (',', ';'):
                out.append((key, None))
                continue
            elif sep == '=':
                # Get our value
                value = l.get_token()
                if value == '=':
                    raise ParseError, \
                            "Invalid digest message %r - double '='" % (s,)
                elif value in (',', ';'):
                    out.append((key, ''))
                    continue
                elif value == l.eof:
                    out.append((key, ''))
                    break
                else:
                    out.append((key, value))
                    if key=='charset':
                        if value != 'utf-8':
                            raise ParseError, \
                                    "Invalid charset %r" % (value,)
                        charset = 'utf-8'
                sep = l.get_token()
                if sep in (',', ';'):
                    continue
                elif sep == l.eof:
                    break
                else:
                    l.push_token(sep)
            else:
                l.push_token(sep)
                out.append((key, None))

        # Build map while converting to Unicode
        digest_map = {}
        for k, v in out:
            try:
                k = k.encode('ascii')
                if v:
                    v = v.decode(charset)
                digest_map.setdefault(k, []).append(v)
            except UnicodeEncodeError:
                raise ParseError, 'Non-ASCII key %r' % (k,)
            except UnicodeDecodeError:
                raise ParseError, 'Value %r for key %r is not valid %s' % (
                        v, k, charset
                        )
        return digest_map

    def setValue(self, key, value):
        ''' Set the value for the given key, overwriting any existing
            values. To add additional values, use getList(key).append(value)
        '''
        self._map[key] = [value]

    def getValue(self, key, default=_marker):
        ''' Return the value for the given key, or raise a ValueError
            if there are multiple values.
        '''
        rv = self._map.get(key, [])
        if len(rv) == 0:
            if default is _marker:
                raise KeyError, key
            return default
        elif len(rv) == 1:
            return rv[0]
        else:
            raise ValueError, 'Multiple values for %r' % (key,)

    def getList(self, key):
        ''' Return all values for the given key as a list. This list
            may be mutated.
        '''
        return self._map.setdefault(key, [])

    def __repr__(self):
        return repr(str(self))

    _quote_unnecessary = re.compile('^[\w_\-]+$')
    def __str__(self):
        ''' Return this KeqV as a valid, encoded, string. '''
        out = []
        for k, vals in self._map.items():
            if len(vals) == 0:
                out.append(k)
            else:
                for v in vals:
                    if type(v) not in types.StringTypes:
                        raise ValueError, 'Invalid value %r for key %r' % (k, v)
                    if self._quote_unnecessary.search(v):
                        out.append('%s=%s' % (k,v))
                    else:
                        v = v.replace('\\','\\\\')
                        v = v.replace('"','\\"')
                        out.append('%s="%s"' % (k,v))
        charset = self.getValue('charset', default='iso-8859-1')
        if charset not in ('iso-8859-1', 'utf-8'):
            raise ValueError, 'Invalid charset %r' % (charset,)
        return ','.join(out).encode(charset)

    def __len__(self):
        count = 0
        for vals in self._map.values():
            count += len(vals)
        return count



# vim: set filetype=python ts=4 sw=4 et si


