Skip to content

Commit 03dff10

Browse files
authored
Merge pull request #44 from josephbredahl/annotations
Fix annotation and namespace issue (continued)
2 parents 813bc86 + 12073d9 commit 03dff10

10 files changed

+212
-49
lines changed

rnc2rng/parser.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def pprint(n, level=0):
9191
'DATATYPES', 'DEFAULT_NS', 'DEFINE', 'DIV', 'DOCUMENTATION', 'ELEM',
9292
'EMPTY', 'EXCEPT', 'GRAMMAR', 'GROUP', 'INTERLEAVE', 'LIST', 'LITERAL',
9393
'MAYBE', 'MIXED', 'NAME', 'NOT_ALLOWED', 'NS', 'PARAM', 'PARENT', 'REF',
94-
'ROOT', 'SEQ', 'SOME', 'TEXT',
94+
'ROOT', 'SEQ', 'SOME', 'TEXT', 'LITERAL_TYPE'
9595
]
9696

9797
for _node_type in NODE_TYPES:
@@ -392,7 +392,7 @@ def particle_some(s, p):
392392
def particle_primary(s, p):
393393
return p[0]
394394

395-
@pg.production('annotated-primary : LPAREN pattern RPAREN')
395+
@pg.production('primary : LPAREN pattern RPAREN')
396396
def annotated_primary_group(s, p):
397397
return Node('GROUP', None, p[1])
398398

@@ -427,7 +427,11 @@ def primary_literal(s, p): # from datatypeValue
427427

428428
@pg.production('primary : CNAME')
429429
def primary_cname(s, p):
430-
return Node('DATATAG', p[0].value.split(':', 1)[1])
430+
return Node('DATATAG', p[0].value)
431+
432+
@pg.production('primary : CNAME strlit')
433+
def primary_ctyped_string(s, p):
434+
return Node('LITERAL', p[1].value, [Node('LITERAL_TYPE', p[0].value)])
431435

432436
@pg.production('primary : CNAME LBRACE params RBRACE')
433437
def primary_type_params(s, p):
@@ -439,12 +443,20 @@ def primary_string(s, p):
439443

440444
@pg.production('primary : STRING strlit')
441445
def primary_typed_string(s, p):
442-
return Node('DATATAG', 'string', [p[1].value])
446+
return Node('LITERAL', p[1].value, [Node('LITERAL_TYPE', 'string')])
443447

444448
@pg.production('primary : STRING LBRACE params RBRACE')
445449
def primary_string_parametrized(s, p):
446450
return Node('DATATAG', 'string', p[2])
447451

452+
@pg.production('primary : TOKEN')
453+
def primary_text(s, p):
454+
return Node('DATATAG', 'token')
455+
456+
@pg.production('primary : TOKEN strlit')
457+
def primary_text(s, p):
458+
return Node('LITERAL', p[1].value) # the default type is token, so no LITERAL_TYPE
459+
448460
@pg.production('primary : TEXT')
449461
def primary_text(s, p):
450462
return Node('TEXT', None)
@@ -532,7 +544,10 @@ def name_class_group(s, p):
532544
@pg.production('documentations : DOCUMENTATION documentations')
533545
def documentations_multi(s, p):
534546
cur = Node('DOCUMENTATION', None, []) if not p[1] else p[1][0]
535-
cur.value.insert(0, p[0].value.lstrip('# ').rstrip('\r'))
547+
content = p[0].value.lstrip('#').rstrip('\r') # strip all leading "#" ( left-recursion in documentationLineContent)
548+
if content.startswith(' '):
549+
content = content[1:] # strip *one* " ", but no more (now the production is readOfLine)
550+
cur.value.insert(0, content)
536551
return [cur]
537552

538553
@pg.production('documentations : ')

rnc2rng/serializer.py

+68-32
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55
ANNO_ATTR, ANNOTATION, ANY, ASSIGN, ATTR, CHOICE, DATATAG, DATATYPES,
66
DEFAULT_NS, DEFINE, DIV, DOCUMENTATION, ELEM, EMPTY, EXCEPT, GRAMMAR,
77
GROUP, INTERLEAVE, LIST, LITERAL, MAYBE, MIXED, NAME, NOT_ALLOWED, NS,
8-
PARAM, PARENT, REF, ROOT, SEQ, SOME, TEXT,
8+
PARAM, PARENT, REF, ROOT, SEQ, SOME, TEXT, LITERAL_TYPE
99
)
1010

1111
import html
1212

1313
QUANTS = {SOME: 'oneOrMore', MAYBE: 'optional', ANY: 'zeroOrMore'}
14-
TYPELIB_NS = 'http://www.w3.org/2001/XMLSchema-datatypes'
14+
TYPELIBS = {
15+
'xsd': 'http://www.w3.org/2001/XMLSchema-datatypes'
16+
}
1517
NAMESPACES = {
1618
'a': 'http://relaxng.org/ns/compatibility/annotations/1.0',
1719
'xml': 'http://www.w3.org/XML/1998/namespace',
@@ -25,15 +27,20 @@ def __init__(self, indent=None):
2527

2628
def reset(self):
2729
self.buf = []
28-
self.needs = {}
29-
self.types = None
3030
self.ns = {}
31+
self.typelibs = {}
3132
self.default = ''
3233
self.level = 0
3334

3435
def write(self, s):
3536
self.buf.append(self.indent * self.level + s)
3637

38+
def datatype_library(self, prefix):
39+
assert prefix in self.typelibs or prefix in TYPELIBS, prefix
40+
if prefix not in self.typelibs:
41+
self.typelibs[prefix] = TYPELIBS[prefix]
42+
return self.typelibs[prefix]
43+
3744
def namespace(self, ns):
3845
assert ns in self.ns or ns in NAMESPACES, ns
3946
if ns not in self.ns:
@@ -43,11 +50,9 @@ def namespace(self, ns):
4350
def toxml(self, node):
4451

4552
self.reset()
46-
types = None
4753
for n in node.value:
4854
if n.type == DATATYPES:
49-
types = n.value[0]
50-
self.types = types
55+
self.typelibs[n.name] = n.value[0]
5156
elif n.type == DEFAULT_NS:
5257
self.default = n.value[0]
5358
if n.name is not None:
@@ -63,9 +68,10 @@ def toxml(self, node):
6368
self.visit(node.value)
6469
for ns, url in sorted(self.ns.items()):
6570
prelude.append(' xmlns:%s="%s"' % (ns, url))
66-
if types is not None or self.needs.get('types'):
67-
url = types if types is not None else TYPELIB_NS
68-
prelude.append(' datatypeLibrary="%s"' % url)
71+
72+
# if xsd:* ever referenced, print it at the grammar level
73+
if 'xsd' in self.typelibs:
74+
prelude.append(' datatypeLibrary="%s"' % self.typelibs['xsd'])
6975

7076
prelude[-1] = prelude[-1] + '>'
7177
self.write('</grammar>')
@@ -78,6 +84,19 @@ def anno_attrs(self, nodes):
7884
return ''
7985
return ' ' + ' '.join('%s="%s"' % attr for attr in pairs)
8086

87+
def type_attrs(self, name):
88+
if ':' in name:
89+
prefix, name = name.split(':', 1)
90+
ns = self.datatype_library(prefix)
91+
else:
92+
assert name in ('string', 'token') # these are the only "built-in" datatypes
93+
ns = ""
94+
95+
attrs = ' type="%s"' % name
96+
if ns != TYPELIBS['xsd']:
97+
attrs += ' datatypeLibrary="%s"' % ns # write all exceptions explicitly
98+
return attrs
99+
81100
def visit(self, nodes, ctx=None, indent=True):
82101
'''Visiting a list of nodes, writes out the XML content to the internal
83102
line-based buffer. By default, adds one level of indentation to the
@@ -89,22 +108,22 @@ def visit(self, nodes, ctx=None, indent=True):
89108

90109
if not isinstance(x, parser.Node):
91110
raise TypeError("Not a Node: " + repr(x))
92-
elif x.type in set([ANNO_ATTR, DATATYPES, DEFAULT_NS, NS]):
111+
elif x.type in set([ANNO_ATTR, LITERAL_TYPE, DATATYPES, DEFAULT_NS, NS]):
93112
continue
94113

95114
attribs = self.anno_attrs(x.value)
96115
if x.type == DEFINE:
97-
98-
op, attrib = x.value[0].name, ''
99-
if op in set(['|=', '&=']):
100-
modes = {'|': 'choice', '&': 'interleave'}
101-
attrib = ' combine="%s"' % modes[op[0]]
116+
for op in (x.name for x in x.value if x.type == 'ASSIGN'):
117+
modes = {'|=': 'choice', '&=': 'interleave'}
118+
if op in modes:
119+
attribs = (' combine="%s"' % modes[op]) + attribs
120+
break;
102121

103122
if x.name == 'start':
104-
self.write('<start%s%s>' % (attrib, attribs))
123+
self.write('<start%s>' % attribs)
105124
else:
106-
bits = x.name, attrib, attribs
107-
self.write('<define name="%s"%s%s>' % bits)
125+
bits = x.name, attribs
126+
self.write('<define name="%s"%s>' % bits)
108127

109128
self.visit(x.value)
110129
if x.name == 'start':
@@ -158,9 +177,20 @@ def visit(self, nodes, ctx=None, indent=True):
158177
self.write('<name ns="%s">%s</name>' % (ns, name))
159178
elif x.type in set([REF, PARENT]):
160179
bits = x.type.lower(), x.name, attribs
161-
self.write('<%s name="%s"%s/>' % bits)
180+
if not x.value: # no parameters
181+
self.write('<%s name="%s"%s/>' % bits)
182+
else:
183+
self.write('<%s name="%s"%s>' % bits)
184+
self.visit(x.value)
185+
self.write('</%s>' % x.type.lower())
162186
elif x.type == LITERAL:
187+
types = [n.name for n in x.value if isinstance(n, parser.Node) and n.type == LITERAL_TYPE]
188+
if types:
189+
assert len(types) == 1
190+
attribs += self.type_attrs(types[0])
191+
163192
bits = attribs, html.escape(x.name)
193+
164194
self.write('<value%s>%s</value>' % bits)
165195
self.visit(x.value, indent=False)
166196
elif x.type == ANNOTATION:
@@ -178,6 +208,11 @@ def visit(self, nodes, ctx=None, indent=True):
178208
tail = html.escape(''.join(literals)) + '</%s>' % x.name
179209

180210
bits = x.name, attribs, end, tail
211+
212+
if ':' in x.name:
213+
parts = x.name.split(':', 1)
214+
ns = self.namespace(parts[0])
215+
181216
self.write('<%s%s%s>%s' % bits)
182217
if not rest:
183218
continue
@@ -195,9 +230,12 @@ def visit(self, nodes, ctx=None, indent=True):
195230
self.write('</%s>' % x.name)
196231

197232
elif x.type == DOCUMENTATION:
198-
self.namespace('a')
199-
fmt = '<a:documentation>%s</a:documentation>'
200-
self.write(fmt % html.escape('\n'.join(x.value)))
233+
xmlns_attr = ''
234+
if self.namespace('a') != NAMESPACES['a']:
235+
xmlns_attr = ' xmlns:a="%s"' % NAMESPACES['a'] # the user is already using namespace a: for something else
236+
237+
fmt = '<a:documentation%s>%s</a:documentation>'
238+
self.write(fmt % (xmlns_attr, html.escape('\n'.join(x.value))))
201239
elif x.type == GROUP:
202240
if len(x.value) == 1 and x.value[0].type != SEQ:
203241
self.visit(x.value, indent=False)
@@ -212,14 +250,10 @@ def visit(self, nodes, ctx=None, indent=True):
212250
elif x.type == SEQ:
213251
self.visit(x.value, indent=False)
214252
elif x.type == DATATAG:
215-
self.needs['types'] = True
216253
if not x.value: # no parameters
217-
self.write('<data type="%s"/>' % x.name)
254+
self.write('<data%s/>' % self.type_attrs(x.name))
218255
else:
219-
name = x.name
220-
if name not in ('string', 'token'):
221-
name = x.name.split(':', 1)[1]
222-
self.write('<data type="%s">' % name)
256+
self.write('<data%s>' % self.type_attrs(x.name))
223257
self.visit(x.value)
224258
self.write('</data>')
225259
elif x.type == PARAM:
@@ -234,11 +268,13 @@ def visit(self, nodes, ctx=None, indent=True):
234268
self.visit(x.value, ctx=x.type)
235269
self.write('</attribute>')
236270
elif x.type == ROOT:
237-
# Verify the included document has the same metadata
238271
for n in x.value:
272+
# Record included document's custom datatypes
239273
if n.type == DATATYPES:
240-
assert self.types == n.value[0]
241-
elif n.type == DEFAULT_NS:
274+
self.typelibs[n.name] = n.value[0]
275+
276+
# Verify the included document has the same metadata
277+
if n.type == DEFAULT_NS:
242278
assert self.default == n.value[0]
243279
elif n.type == NS:
244280
assert n.name in self.ns

tests/annotations.rnc

+15-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
namespace x = "http://www.example.com"
22
namespace dc = "http://purl.org/dc/elements/1.1/"
33
namespace sch = "http://www.ascc.net/xml/schematron"
4+
namespace a = "http://relaxng.org/ns/compatibility/annotations/1.0"
45

56
x:entity [ name="picture" systemId="picture.jpg" notation="jpg" ]
67
dc:title [ "Foo without contents & escaped" ]
@@ -39,5 +40,18 @@ div {
3940
start = foo
4041

4142
## documentation for definition
42-
## continues on the next line
43+
## indented continuation on the next line
44+
## # subheading with leading # (perhaps markdown-style head)
4345
bar = element bar { empty }
46+
47+
baz = element baz {
48+
## documentation for a group
49+
(
50+
foo,
51+
## documentation for a ref
52+
bar
53+
)
54+
}
55+
56+
## combining definition
57+
baz |= empty

tests/annotations.rng

+18-1
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,27 @@
3636
</start>
3737
<define name="bar">
3838
<a:documentation>documentation for definition
39-
continues on the next line</a:documentation>
39+
indented continuation on the next line
40+
# subheading with leading # (perhaps markdown-style head)</a:documentation>
4041
<element>
4142
<name ns="">bar</name>
4243
<empty/>
4344
</element>
4445
</define>
46+
<define name="baz">
47+
<element>
48+
<name ns="">baz</name>
49+
<group>
50+
<a:documentation>documentation for a group</a:documentation>
51+
<ref name="foo"/>
52+
<ref name="bar">
53+
<a:documentation>documentation for a ref</a:documentation>
54+
</ref>
55+
</group>
56+
</element>
57+
</define>
58+
<define name="baz" combine="choice">
59+
<a:documentation>combining definition</a:documentation>
60+
<empty/>
61+
</define>
4562
</grammar>

tests/datatypes.rnc

+12-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,13 @@
11
datatypes xsd = "http://www.w3.org/2001/XMLSchema-datatypes"
2-
element height { xsd:double }
2+
datatypes custom="uri:custom-datatype-library"
3+
4+
start = element token { token },
5+
element token_abc { token "abc" },
6+
element string { string },
7+
element string_abc { string "abc" },
8+
element xsd_string { xsd:string },
9+
element xsd_string_abc { xsd:string "abc" },
10+
element xsd_double { xsd:double },
11+
element xsd_double_42 { xsd:double "42" },
12+
element custom_foo { custom:foo },
13+
element custom_foo_abc { custom:foo "abc" }

tests/datatypes.rng

+37-1
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,44 @@
33
datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
44
<start>
55
<element>
6-
<name ns="">height</name>
6+
<name ns="">token</name>
7+
<data type="token" datatypeLibrary=""/>
8+
</element>
9+
<element>
10+
<name ns="">token_abc</name>
11+
<value>abc</value>
12+
</element>
13+
<element>
14+
<name ns="">string</name>
15+
<data type="string" datatypeLibrary=""/>
16+
</element>
17+
<element>
18+
<name ns="">string_abc</name>
19+
<value type="string" datatypeLibrary="">abc</value>
20+
</element>
21+
<element>
22+
<name ns="">xsd_string</name>
23+
<data type="string"/>
24+
</element>
25+
<element>
26+
<name ns="">xsd_string_abc</name>
27+
<value type="string">abc</value>
28+
</element>
29+
<element>
30+
<name ns="">xsd_double</name>
731
<data type="double"/>
832
</element>
33+
<element>
34+
<name ns="">xsd_double_42</name>
35+
<value type="double">42</value>
36+
</element>
37+
<element>
38+
<name ns="">custom_foo</name>
39+
<data type="foo" datatypeLibrary="uri:custom-datatype-library"/>
40+
</element>
41+
<element>
42+
<name ns="">custom_foo_abc</name>
43+
<value type="foo" datatypeLibrary="uri:custom-datatype-library">abc</value>
44+
</element>
945
</start>
1046
</grammar>

0 commit comments

Comments
 (0)