djc
diff --git a/‎rnc2rng/parser.py
+20-5 b/‎rnc2rng/parser.py
+20-5
diff --git a/‎rnc2rng/serializer.py
+68-32 b/‎rnc2rng/serializer.py
+68-32
diff --git a/‎tests/annotations.rnc
+15-1 b/‎tests/annotations.rnc
+15-1
diff --git a/‎tests/annotations.rng
+18-1 b/‎tests/annotations.rng
+18-1
diff --git a/‎tests/datatypes.rnc
+12-1 b/‎tests/datatypes.rnc
+12-1
diff --git a/‎tests/datatypes.rng
+37-1 b/‎tests/datatypes.rng
+37-1
@@ -91,7 +91,7 @@ def pprint(n, level=0):
     'DATATYPES', 'DEFAULT_NS', 'DEFINE', 'DIV', 'DOCUMENTATION', 'ELEM',
     'EMPTY', 'EXCEPT', 'GRAMMAR', 'GROUP', 'INTERLEAVE', 'LIST', 'LITERAL',
     'MAYBE', 'MIXED', 'NAME', 'NOT_ALLOWED', 'NS', 'PARAM', 'PARENT', 'REF',
-    'ROOT', 'SEQ', 'SOME', 'TEXT',
+    'ROOT', 'SEQ', 'SOME', 'TEXT', 'LITERAL_TYPE'
 ]
 
 for _node_type in NODE_TYPES:
@@ -392,7 +392,7 @@ def particle_some(s, p):
 def particle_primary(s, p):
     return p[0]
 
-@pg.production('annotated-primary : LPAREN pattern RPAREN')
+@pg.production('primary : LPAREN pattern RPAREN')
 def annotated_primary_group(s, p):
     return Node('GROUP', None, p[1])
 
@@ -427,7 +427,11 @@ def primary_literal(s, p): # from datatypeValue
 
 @pg.production('primary : CNAME')
 def primary_cname(s, p):
-    return Node('DATATAG', p[0].value.split(':', 1)[1])
+    return Node('DATATAG', p[0].value)
+
+@pg.production('primary : CNAME strlit')
+def primary_ctyped_string(s, p):
+    return Node('LITERAL', p[1].value, [Node('LITERAL_TYPE', p[0].value)])
 
 @pg.production('primary : CNAME LBRACE params RBRACE')
 def primary_type_params(s, p):
@@ -439,12 +443,20 @@ def primary_string(s, p):
 
 @pg.production('primary : STRING strlit')
 def primary_typed_string(s, p):
-    return Node('DATATAG', 'string', [p[1].value])
+    return Node('LITERAL', p[1].value, [Node('LITERAL_TYPE', 'string')])
 
 @pg.production('primary : STRING LBRACE params RBRACE')
 def primary_string_parametrized(s, p):
     return Node('DATATAG', 'string', p[2])
 
+@pg.production('primary : TOKEN')
+def primary_text(s, p):
+    return Node('DATATAG', 'token')
+
+@pg.production('primary : TOKEN strlit')
+def primary_text(s, p):
+    return Node('LITERAL', p[1].value) # the default type is token, so no LITERAL_TYPE
+
 @pg.production('primary : TEXT')
 def primary_text(s, p):
     return Node('TEXT', None)
@@ -532,7 +544,10 @@ def name_class_group(s, p):
 @pg.production('documentations : DOCUMENTATION documentations')
 def documentations_multi(s, p):
     cur = Node('DOCUMENTATION', None, []) if not p[1] else p[1][0]
-    cur.value.insert(0, p[0].value.lstrip('# ').rstrip('\r'))
+    content = p[0].value.lstrip('#').rstrip('\r') # strip all leading "#" ( left-recursion in documentationLineContent)
+    if content.startswith(' '):
+        content = content[1:] # strip *one* " ", but no more (now the production is readOfLine)
+    cur.value.insert(0, content)
     return [cur]
 
 @pg.production('documentations : ')
 
@@ -5,13 +5,15 @@
     ANNO_ATTR, ANNOTATION, ANY, ASSIGN, ATTR, CHOICE, DATATAG, DATATYPES,
     DEFAULT_NS, DEFINE, DIV, DOCUMENTATION, ELEM, EMPTY, EXCEPT, GRAMMAR,
     GROUP, INTERLEAVE, LIST, LITERAL, MAYBE, MIXED, NAME, NOT_ALLOWED, NS,
-    PARAM, PARENT, REF, ROOT, SEQ, SOME, TEXT,
+    PARAM, PARENT, REF, ROOT, SEQ, SOME, TEXT, LITERAL_TYPE
 )
 
 import html
 
 QUANTS = {SOME: 'oneOrMore', MAYBE: 'optional', ANY: 'zeroOrMore'}
-TYPELIB_NS = 'http://www.w3.org/2001/XMLSchema-datatypes'
+TYPELIBS = {
+    'xsd': 'http://www.w3.org/2001/XMLSchema-datatypes'
+}
 NAMESPACES = {
     'a': 'http://relaxng.org/ns/compatibility/annotations/1.0',
     'xml': 'http://www.w3.org/XML/1998/namespace',
@@ -25,15 +27,20 @@ def __init__(self, indent=None):
 
     def reset(self):
         self.buf = []
-        self.needs = {}
-        self.types = None
         self.ns = {}
+        self.typelibs = {}
         self.default = ''
         self.level = 0
 
     def write(self, s):
         self.buf.append(self.indent * self.level + s)
 
+    def datatype_library(self, prefix):
+        assert prefix in self.typelibs or prefix in TYPELIBS, prefix
+        if prefix not in self.typelibs:
+            self.typelibs[prefix] = TYPELIBS[prefix]
+        return self.typelibs[prefix]
+
     def namespace(self, ns):
         assert ns in self.ns or ns in NAMESPACES, ns
         if ns not in self.ns:
@@ -43,11 +50,9 @@ def namespace(self, ns):
     def toxml(self, node):
 
         self.reset()
-        types = None
         for n in node.value:
             if n.type == DATATYPES:
-                types = n.value[0]
-                self.types = types
+                self.typelibs[n.name] = n.value[0]
             elif n.type == DEFAULT_NS:
                 self.default = n.value[0]
                 if n.name is not None:
@@ -63,9 +68,10 @@ def toxml(self, node):
         self.visit(node.value)
         for ns, url in sorted(self.ns.items()):
             prelude.append('         xmlns:%s="%s"' % (ns, url))
-        if types is not None or self.needs.get('types'):
-            url = types if types is not None else TYPELIB_NS
-            prelude.append('         datatypeLibrary="%s"' % url)
+
+        # if xsd:* ever referenced, print it at the grammar level
+        if 'xsd' in self.typelibs:
+            prelude.append('         datatypeLibrary="%s"' % self.typelibs['xsd'])
 
         prelude[-1] = prelude[-1] + '>'
         self.write('</grammar>')
@@ -78,6 +84,19 @@ def anno_attrs(self, nodes):
             return ''
         return ' ' + ' '.join('%s="%s"' % attr for attr in pairs)
 
+    def type_attrs(self, name):
+        if ':' in name:
+            prefix, name = name.split(':', 1)
+            ns = self.datatype_library(prefix)
+        else:
+            assert name in ('string', 'token') # these are the only "built-in" datatypes
+            ns = ""
+
+        attrs = ' type="%s"' % name
+        if ns != TYPELIBS['xsd']:
+            attrs += ' datatypeLibrary="%s"' % ns # write all exceptions explicitly
+        return attrs
+
     def visit(self, nodes, ctx=None, indent=True):
         '''Visiting a list of nodes, writes out the XML content to the internal
         line-based buffer. By default, adds one level of indentation to the
@@ -89,22 +108,22 @@ def visit(self, nodes, ctx=None, indent=True):
 
             if not isinstance(x, parser.Node):
                 raise TypeError("Not a Node: " + repr(x))
-            elif x.type in set([ANNO_ATTR, DATATYPES, DEFAULT_NS, NS]):
+            elif x.type in set([ANNO_ATTR, LITERAL_TYPE, DATATYPES, DEFAULT_NS, NS]):
                 continue
 
             attribs = self.anno_attrs(x.value)
             if x.type == DEFINE:
-
-                op, attrib = x.value[0].name, ''
-                if op in set(['|=', '&=']):
-                    modes = {'|': 'choice', '&': 'interleave'}
-                    attrib = ' combine="%s"' % modes[op[0]]
+                for op in (x.name for x in x.value if x.type == 'ASSIGN'):
+                    modes = {'|=': 'choice', '&=': 'interleave'}
+                    if op in modes:
+                        attribs = (' combine="%s"' % modes[op]) + attribs
+                    break;
 
                 if x.name == 'start':
-                    self.write('<start%s%s>' % (attrib, attribs))
+                    self.write('<start%s>' % attribs)
                 else:
-                    bits = x.name, attrib, attribs
-                    self.write('<define name="%s"%s%s>' % bits)
+                    bits = x.name, attribs
+                    self.write('<define name="%s"%s>' % bits)
 
                 self.visit(x.value)
                 if x.name == 'start':
@@ -158,9 +177,20 @@ def visit(self, nodes, ctx=None, indent=True):
                     self.write('<name ns="%s">%s</name>' % (ns, name))
             elif x.type in set([REF, PARENT]):
                 bits = x.type.lower(), x.name, attribs
-                self.write('<%s name="%s"%s/>' % bits)
+                if not x.value: # no parameters
+                    self.write('<%s name="%s"%s/>' % bits)
+                else:
+                    self.write('<%s name="%s"%s>' % bits)
+                    self.visit(x.value)
+                    self.write('</%s>' % x.type.lower())
             elif x.type == LITERAL:
+                types = [n.name for n in x.value if isinstance(n, parser.Node) and n.type == LITERAL_TYPE]
+                if types:
+                    assert len(types) == 1
+                    attribs += self.type_attrs(types[0])
+
                 bits = attribs, html.escape(x.name)
+
                 self.write('<value%s>%s</value>' % bits)
                 self.visit(x.value, indent=False)
             elif x.type == ANNOTATION:
@@ -178,6 +208,11 @@ def visit(self, nodes, ctx=None, indent=True):
                     tail = html.escape(''.join(literals)) + '</%s>' % x.name
 
                 bits = x.name, attribs, end, tail
+
+                if ':' in x.name:
+                    parts = x.name.split(':', 1)
+                    ns = self.namespace(parts[0])
+
                 self.write('<%s%s%s>%s' % bits)
                 if not rest:
                     continue
@@ -195,9 +230,12 @@ def visit(self, nodes, ctx=None, indent=True):
                 self.write('</%s>' % x.name)
 
             elif x.type == DOCUMENTATION:
-                self.namespace('a')
-                fmt = '<a:documentation>%s</a:documentation>'
-                self.write(fmt % html.escape('\n'.join(x.value)))
+                xmlns_attr = ''
+                if self.namespace('a') != NAMESPACES['a']:
+                    xmlns_attr = ' xmlns:a="%s"' % NAMESPACES['a'] # the user is already using namespace a: for something else
+
+                fmt = '<a:documentation%s>%s</a:documentation>'
+                self.write(fmt % (xmlns_attr, html.escape('\n'.join(x.value))))
             elif x.type == GROUP:
                 if len(x.value) == 1 and x.value[0].type != SEQ:
                     self.visit(x.value, indent=False)
@@ -212,14 +250,10 @@ def visit(self, nodes, ctx=None, indent=True):
             elif x.type == SEQ:
                 self.visit(x.value, indent=False)
             elif x.type == DATATAG:
-                self.needs['types'] = True
                 if not x.value: # no parameters
-                    self.write('<data type="%s"/>' % x.name)
+                    self.write('<data%s/>' % self.type_attrs(x.name))
                 else:
-                    name = x.name
-                    if name not in ('string', 'token'):
-                        name = x.name.split(':', 1)[1]
-                    self.write('<data type="%s">' % name)
+                    self.write('<data%s>' % self.type_attrs(x.name))
                     self.visit(x.value)
                     self.write('</data>')
             elif x.type == PARAM:
@@ -234,11 +268,13 @@ def visit(self, nodes, ctx=None, indent=True):
                 self.visit(x.value, ctx=x.type)
                 self.write('</attribute>')
             elif x.type == ROOT:
-                # Verify the included document has the same metadata
                 for n in x.value:
+                    # Record included document's custom datatypes
                     if n.type == DATATYPES:
-                        assert self.types == n.value[0]
-                    elif n.type == DEFAULT_NS:
+                        self.typelibs[n.name] = n.value[0]
+                    
+                    # Verify the included document has the same metadata
+                    if n.type == DEFAULT_NS:
                         assert self.default == n.value[0]
                     elif n.type == NS:
                         assert n.name in self.ns
 
@@ -1,6 +1,7 @@
 namespace x = "http://www.example.com"
 namespace dc = "http://purl.org/dc/elements/1.1/"
 namespace sch = "http://www.ascc.net/xml/schematron"
+namespace a = "http://relaxng.org/ns/compatibility/annotations/1.0"
 
 x:entity [ name="picture" systemId="picture.jpg" notation="jpg" ]
 dc:title [ "Foo without contents & escaped" ]
@@ -39,5 +40,18 @@ div {
 start = foo
 
 ## documentation for definition
-## continues on the next line
+##     indented continuation on the next line
+## # subheading with leading # (perhaps markdown-style head)
 bar = element bar { empty }
+
+baz = element baz {
+  ## documentation for a group
+  (
+    foo,
+    ## documentation for a ref
+    bar
+  )
+}
+
+## combining definition
+baz |= empty
@@ -36,10 +36,27 @@
   </start>
   <define name="bar">
     <a:documentation>documentation for definition
-continues on the next line</a:documentation>
+    indented continuation on the next line
+# subheading with leading # (perhaps markdown-style head)</a:documentation>
     <element>
       <name ns="">bar</name>
       <empty/>
     </element>
   </define>
+  <define name="baz">
+    <element>
+      <name ns="">baz</name>
+      <group>
+        <a:documentation>documentation for a group</a:documentation>
+        <ref name="foo"/>
+        <ref name="bar">
+          <a:documentation>documentation for a ref</a:documentation>
+        </ref>
+      </group>
+    </element>
+  </define>
+  <define name="baz" combine="choice">
+    <a:documentation>combining definition</a:documentation>
+    <empty/>
+  </define>
 </grammar>
@@ -1,2 +1,13 @@
 datatypes xsd = "http://www.w3.org/2001/XMLSchema-datatypes"
-element height { xsd:double }
+datatypes custom="uri:custom-datatype-library"
+
+start = element token { token },
+	element token_abc { token "abc" },
+	element string { string },
+	element string_abc { string "abc" },
+	element xsd_string { xsd:string },
+	element xsd_string_abc { xsd:string "abc" },
+	element xsd_double { xsd:double },
+	element xsd_double_42 { xsd:double "42" },
+	element custom_foo { custom:foo },
+	element custom_foo_abc { custom:foo "abc" }
@@ -3,8 +3,44 @@
          datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
   <start>
     <element>
-      <name ns="">height</name>
+      <name ns="">token</name>
+      <data type="token" datatypeLibrary=""/>
+    </element>
+    <element>
+      <name ns="">token_abc</name>
+      <value>abc</value>
+    </element>
+    <element>
+      <name ns="">string</name>
+      <data type="string" datatypeLibrary=""/>
+    </element>
+    <element>
+      <name ns="">string_abc</name>
+      <value type="string" datatypeLibrary="">abc</value>
+    </element>
+    <element>
+      <name ns="">xsd_string</name>
+      <data type="string"/>
+    </element>
+    <element>
+      <name ns="">xsd_string_abc</name>
+      <value type="string">abc</value>
+    </element>
+    <element>
+      <name ns="">xsd_double</name>
       <data type="double"/>
     </element>
+    <element>
+      <name ns="">xsd_double_42</name>
+      <value type="double">42</value>
+    </element>
+    <element>
+      <name ns="">custom_foo</name>
+      <data type="foo" datatypeLibrary="uri:custom-datatype-library"/>
+    </element>
+    <element>
+      <name ns="">custom_foo_abc</name>
+      <value type="foo" datatypeLibrary="uri:custom-datatype-library">abc</value>
+    </element>
   </start>
 </grammar>