source: trunk/libs/newlib/src/newlib/doc/makedocbook.py @ 444

Last change on this file since 444 was 444, checked in by satin@…, 6 years ago

add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc

File size: 22.4 KB
Line 
1#!/usr/bin/env python
2#
3# python script to process makedoc instructions in a source file and produce
4# DocBook XML output
5#
6
7#
8# This performs 3 stages of processing on it's input, in a similar fashion
9# to makedoc:
10#
11# 1. Discard everything outside of /*  */ comments
12# 2. Identify lines which contains commands (a single uppercase word)
13# 3. Apply each command to the text of the following lines (up to the next
14#    command or the end of the comment block), to produce some output
15#
16# The resulting output contains one or more DocBook XML refentry elements.
17#
18# To make the output a valid XML document which can be xincluded, those refentry
19# elements are contained by a refcontainer element.  refcontainer is not part of
20# the DocBook DTD and should be removed by a suitable XSLT.
21#
22
23from __future__ import print_function
24
25import sys
26import re
27from optparse import OptionParser
28import lxml.etree
29import ply.lex as lex
30import ply.yacc as yacc
31
32rootelement = None # root element of the XML tree
33refentry = None # the current refentry
34verbose = 0
35
36def dump(s, stage, threshold = 1):
37    if verbose > threshold:
38        print('*' * 40, file=sys.stderr)
39        print(stage, file=sys.stderr)
40        print('*' * 40, file=sys.stderr)
41        print('%s' % s, file=sys.stderr)
42        print('*' * 40, file=sys.stderr)
43
44#
45# Stage 1
46#
47
48def skip_whitespace_and_stars(i, src):
49
50    while i < len(src) and (src[i].isspace() or (src[i] == '*' and src[i+1] != '/')):
51        i += 1
52
53    return i
54
55# Discard everything not inside '/*  */' style-comments which start at column 0
56# Discard any leading blank space or '*'
57# Discard a single leading '.'
58# Discard blank lines after a blank line
59def comment_contents_generator(src):
60    i = 0
61
62    while i < len(src) - 2:
63        if src[i] == '\n' and src[i+1] == '/' and src[i+2] == '*':
64            i = i + 3
65
66            i = skip_whitespace_and_stars(i, src)
67
68            if src[i] == '.':
69                i += 1
70
71            while i < len(src):
72                if src[i] == '\n':
73                    yield '\n'
74                    i += 1
75
76                    # allow a single blank line
77                    if i < len(src) and src[i] == '\n':
78                        yield '\n'
79                        i += 1
80
81                    i = skip_whitespace_and_stars(i, src)
82
83                elif src[i] == '*' and src[i+1] == '/':
84                    i = i + 2
85                    # If we have just output \n\n, this adds another blank line.
86                    # This is the only way a double blank line can occur.
87                    yield '\nEND\n'
88                    break
89                else:
90                    yield src[i]
91                    i += 1
92        else:
93            i += 1
94
95def remove_noncomments(src):
96    src = '\n' + src
97    dst = ''.join(comment_contents_generator(src))
98    dump(dst, 'extracted from comments')
99
100    return dst
101
102#
103# Stage 2
104#
105
106# A command is a single word of at least 3 characters, all uppercase, and alone on a line
107def iscommand(l):
108    if re.match('^[A-Z_]{3,}\s*$', l):
109
110        return True
111    return False
112
113def command_block_generator(content):
114    command = 'START'
115    text = ''
116
117    for l in content.splitlines():
118        if iscommand(l):
119            yield (command, text)
120            command = l.rstrip()
121            text = ''
122        else:
123            text = text + l + '\n'
124    yield (command, text)
125
126# Look for commands, which give instructions how to process the following input
127def process(content):
128    content = content.lstrip()
129
130    dump(content, 'about to process for commands')
131
132    # process into a list of tuples of commands and the associated following text
133    # it is important to maintain the order of the sections the commands generate
134    processed = list(command_block_generator(content))
135
136    return processed
137
138#
139# Stage 3
140#
141
142#  invoke each command on it's text
143def perform(processed):
144    for i in processed:
145        c = i[0].rstrip()
146        t = i[1].strip() + '\n'
147
148        if verbose:
149            print("performing command '%s'" % c, file=sys.stderr)
150
151        if c in command_dispatch_dict:
152            command_dispatch_dict[c](c, t)
153        else:
154            print("command '%s' is not recognized" % c, file=sys.stderr)
155            # the text following an unrecognized command is discarded
156
157# FUNCTION (aka TYPEDEF)
158#
159def function(c, l):
160    global refentry
161    global rootelement
162
163    l = l.strip()
164    if verbose:
165        print('FUNCTION %s' % l, file=sys.stderr)
166
167    separator = '---'
168
169    if ';' in l:
170        # fpclassify has an unusual format we also need to handle
171        spliton = ';'
172        l = l.splitlines()[0]
173    elif len(l.splitlines()) > 1:
174        # a few pages like mktemp have two '---' lines
175        spliton = ';'
176        o = ''
177        for i in l.splitlines():
178             if separator in i:
179                 o += i + ';'
180             else:
181                 o += i
182        l = o[:-1]
183    else:
184        spliton = '\n'
185
186    namelist = []
187    descrlist = []
188    for a in l.split(spliton):
189        (n, d) = a.split(separator, 1)
190        namelist = namelist + n.split(',')
191        descrlist = descrlist + [d]
192
193    # only copysign and log1p use <[ ]> markup in descr,
194    # only gets() uses << >> markup
195    # but we should handle it correctly
196    descr = line_markup_convert(', '.join(descrlist))
197
198    # fpclassify includes an 'and' we need to discard
199    namelist = map(lambda v: re.sub('^and ', '', v.strip(), 1), namelist)
200    # strip off << >> surrounding name
201    namelist = map(lambda v: v.strip().lstrip('<').rstrip('>'), namelist)
202
203    if verbose:
204        print(namelist, file=sys.stderr)
205    # additional alternate names may also appear in INDEX commands
206
207    # create the root element if needed
208    if rootelement is None:
209        rootelement = lxml.etree.Element('refentrycontainer')
210
211    # FUNCTION implies starting a new refentry
212    if refentry is not None:
213        print("multiple FUNCTIONs without NEWPAGE", file=sys.stderr)
214        exit(1)
215
216    # create the refentry
217    refentry = lxml.etree.SubElement(rootelement, 'refentry')
218    refentry.append(lxml.etree.Comment(' Generated by makedocbook.py '))
219    refentry.set('id', namelist[0].lstrip('_'))
220
221    refmeta = lxml.etree.SubElement(refentry, 'refmeta')
222    # refentrytitle will be same as refdescriptor, the primary name
223    refentrytitle = lxml.etree.SubElement(refmeta, 'refentrytitle')
224    refentrytitle.text = namelist[0]
225    manvolnum = lxml.etree.SubElement(refmeta, 'manvolnum')
226    manvolnum.text = '3'
227
228    refnamediv = lxml.etree.SubElement(refentry, 'refnamediv')
229    # refdescriptor is the primary name, assume we should use the one which
230    # appears first in the list
231    refdescriptor = lxml.etree.SubElement(refnamediv, 'refdescriptor')
232    refdescriptor.text = namelist[0]
233    # refname elements exist for all alternate names
234    for n in namelist:
235        refname = lxml.etree.SubElement(refnamediv, 'refname')
236        refname.text = n
237    refpurpose = lxml.etree.SubElement(refnamediv, 'refpurpose')
238    refnamediv.replace(refpurpose, lxml.etree.fromstring('<refpurpose>' + descr + '</refpurpose>'))
239
240    # Only FUNCTION currently exists, which implies that the SYNOPSIS should be
241    # a funcsynopsis.  If TYPEDEF was to be added, SYNOPSIS should be processed
242    # in a different way, probably producing a refsynopsis.
243
244# INDEX
245# may occur more than once for each FUNCTION giving alternate names this
246# function should be indexed under
247#
248def index(c, l):
249    l = l.strip()
250
251    if verbose:
252        print('INDEX %s' % l, file=sys.stderr)
253
254    # discard anything after the first word
255    l = l.split()[0]
256
257    # add indexterm
258    # (we could just index under all the refnames, but we control the indexing
259    # separately as that is what makedoc does)
260    indexterm = lxml.etree.SubElement(refentry, 'indexterm')
261    primary = lxml.etree.SubElement(indexterm, 'primary')
262    primary.text = l
263
264    # to validate, it seems we need to maintain refentry elements in a certain order
265    refentry[:] = sorted(refentry, key = lambda x: x.tag)
266
267    # adds another alternate refname
268    refnamediv = refentry.find('refnamediv')
269
270    # as long as it doesn't already exist
271    if not refnamediv.xpath(('refname[.="%s"]') % l):
272        refname = lxml.etree.SubElement(refnamediv, 'refname')
273        refname.text = l
274        if verbose > 1:
275            print('added refname %s' % l, file=sys.stderr)
276    else:
277        if verbose > 1:
278            print('duplicate refname %s discarded' % l, file=sys.stderr)
279
280    # to validate, it seems we need to maintain refnamediv elements in a certain order
281    refnamediv[:] = sorted(refnamediv, key = lambda x: x.tag)
282
283
284# SYNOPSIS aka ANSI_SYNOPSIS
285# ANSI-style synopsis
286#
287# Note that makedoc would also process <<code>> markup here, but there are no
288# such uses.
289#
290def synopsis(c, t):
291    refsynopsisdiv = lxml.etree.SubElement(refentry, 'refsynopsisdiv')
292    funcsynopsis = lxml.etree.SubElement(refsynopsisdiv, 'funcsynopsis')
293
294    s = ''
295    for l in t.splitlines():
296        if re.match('\s*(#|\[|struct)', l):
297            # preprocessor # directives, structs, comments in square brackets
298            funcsynopsisinfo = lxml.etree.SubElement(funcsynopsis, 'funcsynopsisinfo')
299            funcsynopsisinfo.text = l.strip() + '\n'
300        else:
301            s = s + l
302
303            # a prototype without a terminating ';' is an error
304            if s.endswith(')'):
305                print("'%s' missing terminating semicolon" % l, file=sys.stderr)
306                s = s + ';'
307                exit(1)
308
309            if ';' in s:
310                synopsis_for_prototype(funcsynopsis, s)
311                s = ''
312
313    if s.strip():
314        print("surplus synopsis '%s'" % s, file=sys.stderr)
315        raise
316
317def synopsis_for_prototype(funcsynopsis, s):
318    s = s.strip()
319
320    # funcsynopsis has a very detailed content model, so we need to massage the
321    # bare prototype into it.  Fortunately, since the parameter names are marked
322    # up, we have enough information to do this.
323    for fp in s.split(';'):
324        fp = fp.strip()
325        if fp:
326
327            if verbose:
328                print("'%s'" % fp, file=sys.stderr)
329
330            match = re.match(r'(.*?)([\w\d]*) ?\((.*)\)', fp)
331
332            if verbose:
333                print(match.groups(), file=sys.stderr)
334
335            funcprototype = lxml.etree.SubElement(funcsynopsis, 'funcprototype')
336            funcdef = lxml.etree.SubElement(funcprototype, 'funcdef')
337            funcdef.text = match.group(1)
338            function = lxml.etree.SubElement(funcdef, 'function')
339            function.text = match.group(2)
340
341            if match.group(3).strip() == 'void':
342                void = lxml.etree.SubElement(funcprototype, 'void')
343            else:
344                # Split parameters on ',' except if it is inside ()
345                for p in re.split(',(?![^()]*\))', match.group(3)):
346                    p = p.strip()
347
348                    if verbose:
349                        print(p, file=sys.stderr)
350
351                    if p == '...':
352                        varargs = lxml.etree.SubElement(funcprototype, 'varargs')
353                    else:
354                        paramdef = lxml.etree.SubElement(funcprototype, 'paramdef')
355                        parameter = lxml.etree.SubElement(paramdef, 'parameter')
356
357                        # <[ ]> enclose the parameter name
358                        match2 = re.match('(.*)<\[(.*)\]>(.*)', p)
359
360                        if verbose:
361                            print(match2.groups(), file=sys.stderr)
362
363                        paramdef.text = match2.group(1)
364                        parameter.text = match2.group(2)
365                        parameter.tail = match2.group(3)
366
367
368# DESCRIPTION
369# (RETURNS, ERRORS, PORTABILITY, BUGS, WARNINGS, SEEALSO, NOTES  are handled the same)
370#
371# Create a refsect with a title corresponding to the command
372#
373# Nearly all the the existing DESCRIPTION contents could be transformed into
374# DocBook with a few regex substitutions.  Unfortunately, pages like sprintf and
375# sscanf, have very complex layout using nested tables and itemized lists, which
376# it is best to parse in order to transform correctly.
377#
378
379def refsect(t, s):
380    refsect = lxml.etree.SubElement(refentry, 'refsect1')
381    title = lxml.etree.SubElement(refsect, 'title')
382    title.text = t.title()
383
384    if verbose:
385        print('%s has %d paragraphs' % (t, len(s.split('\n\n'))) , file=sys.stderr)
386
387    if verbose > 1:
388        dump(s, 'before lexing')
389
390        # dump out lexer token sequence
391        lex.input(s)
392        for tok in lexer:
393            print(tok, file=sys.stderr)
394
395    # parse the section text for makedoc markup and the few pieces of texinfo
396    # markup we understand, and output an XML marked-up string
397    xml = parser.parse(s, tracking=True, debug=(verbose > 2))
398
399    dump(xml, 'after parsing')
400
401    xml = '<refsect1>' + xml + '</refsect1>'
402
403    refsect.extend(lxml.etree.fromstring(xml))
404
405def seealso(c, t):
406    refsect('SEE ALSO', t)
407
408# NEWPAGE
409#
410# start a new refentry
411
412def newpage(c, t):
413    global refentry
414    refentry = None
415
416# command dispatch table
417
418def discarded(c, t):
419    return
420
421command_dispatch_dict = {
422    'FUNCTION'          : function,
423    'TYPEDEF'           : function,     # TYPEDEF is not currently used, but described in doc.str
424    'INDEX'             : index,
425    'TRAD_SYNOPSIS'     : discarded,    # K&R-style synopsis, obsolete and discarded
426    'ANSI_SYNOPSIS'     : synopsis,
427    'SYNOPSIS'          : synopsis,
428    'DESCRIPTION'       : refsect,
429    'RETURNS'           : refsect,
430    'ERRORS'            : refsect,
431    'PORTABILITY'       : refsect,
432    'BUGS'              : refsect,
433    'WARNINGS'          : refsect,
434    'SEEALSO'           : seealso,
435    'NOTES'             : refsect,      # NOTES is not described in doc.str, so is currently discarded by makedoc, but that doesn't seem right
436    'QUICKREF'          : discarded,    # The intent of QUICKREF and MATHREF is not obvious, but they don't generate any output currently
437    'MATHREF'           : discarded,
438    'START'             : discarded,    # a START command is inserted to contain the text before the first command
439    'END'               : discarded,    # an END command is inserted merely to terminate the text for the last command in a comment block
440    'NEWPAGE'           : newpage,
441}
442
443#
444# Utility functions
445#
446
447# apply transformations which are easy to do in-place
448def line_markup_convert(p):
449    s = p;
450
451    # process the texinfo escape for an @
452    s = s.replace('@@', '@')
453
454    # escape characters not allowed in XML
455    s = s.replace('&','&amp;')
456    s = s.replace('<','&lt;')
457    s = s.replace('>','&gt;')
458
459    # convert <<somecode>> to <code>somecode</code> and <[var]> to
460    # <varname>var</varname>
461    # also handle nested << <[ ]> >> correctly
462    s = s.replace('&lt;&lt;','<code>')
463    s = s.replace('&lt;[','<varname>')
464    s = s.replace(']&gt;','</varname>')
465    s = s.replace('&gt;&gt;','</code>')
466
467    # also convert some simple texinfo markup
468    # convert @emph{foo} to <emphasis>foo</emphasis>
469    s = re.sub('@emph{(.*?)}', '<emphasis>\\1</emphasis>', s)
470    # convert @strong{foo} to <emphasis role=strong>foo</emphasis>
471    s = re.sub('@strong{(.*?)}', '<emphasis role="strong">\\1</emphasis>', s)
472    # convert @minus{} to U+2212 MINUS SIGN
473    s = s.replace('@minus{}', '&#x2212;')
474    # convert @dots{} to U+2026 HORIZONTAL ELLIPSIS
475    s = s.replace('@dots{}', '&#x2026;')
476
477    # convert xref and pxref
478    s = re.sub('@xref{(.*?)}', "See <xref linkend='\\1'/>", s)
479
480    # very hacky way of dealing with @* to force a newline
481    s = s.replace('@*', '</para><para>')
482
483    if (verbose > 3) and (s != p):
484        print('%s-> line_markup_convert ->\n%s' % (p, s), file=sys.stderr)
485
486    return s
487
488#
489# lexer
490#
491
492texinfo_commands = {
493    'ifnottex' : 'IFNOTTEX',
494    'end ifnottex' : 'ENDIFNOTTEX',
495    'tex' : 'IFTEX',
496    'end tex' : 'ENDIFTEX',
497    'comment' : 'COMMENT',
498    'c ' : 'COMMENT',
499    'multitable' : 'MULTICOLUMNTABLE',
500    'end multitable' : 'ENDMULTICOLUMNTABLE',
501    'headitem' : 'MCT_HEADITEM',
502    'tab' : 'MCT_COLUMN_SEPARATOR',
503    'item' : 'MCT_ITEM',
504    }
505
506# token names
507tokens = [
508    'BLANKLINE',
509    'BULLETEND',
510    'BULLETSTART',
511    'COURIER',
512    'EOF',
513    'ITEM',
514    'TABLEEND',
515    'TABLESTART',
516    'TEXINFO',
517    'TEXT',
518] + list(set(texinfo_commands.values()))
519
520# regular expression rules for tokens, in priority order
521# (all these expressions should match a whole line)
522def t_TEXINFO(t):
523    # this matches any @command. but not @command{} which just happens to be at
524    # the start of a line
525    r'@\w+[^{]*?\n'
526
527    # if the line starts with a known texinfo command, change t.type to the
528    # token for that command
529    for k in texinfo_commands.keys():
530        if t.value[1:].startswith(k):
531            t.type = texinfo_commands[k]
532            break
533
534    return t
535
536def t_COURIER(t):
537    r'[.|].*\n'
538    t.value = line_markup_convert(t.value[1:])
539    return t
540
541def t_BULLETSTART(t):
542    r'O\+\n'
543    return t
544
545def t_BULLETEND(t):
546    r'O-\n'
547    return t
548
549def t_TABLESTART(t):
550    r'o\+\n'
551    return t
552
553def t_TABLEEND(t):
554    r'o-\n'
555    return t
556
557def t_ITEM(t):
558    r'o\s.*\n'
559    t.value = re.sub('o\s', '', lexer.lexmatch.group(0), 1)
560    t.value = line_markup_convert(t.value)
561    return t
562
563def t_TEXT(t):
564    r'.+\n'
565    t.value = line_markup_convert(t.value)
566    t.lexer.lineno += 1
567    return t
568
569def t_BLANKLINE(t):
570    r'\n'
571    t.lexer.lineno += 1
572    return t
573
574def t_eof(t):
575    if hasattr(t.lexer,'at_eof'):
576        # remove eof flag ready for lexing next input
577        delattr(t.lexer,'at_eof')
578        t.lexer.lineno = 0
579        return None
580
581    t.type = 'EOF'
582    t.lexer.at_eof = True;
583
584    return t
585
586# Error handling rule
587def t_error(t):
588    print("tokenization error, remaining text '%s'" % t.value, file=sys.stderr)
589    exit(1)
590
591lexer = lex.lex()
592
593#
594# parser
595#
596
597def parser_verbose(p):
598    if verbose > 2:
599        print(p[0], file=sys.stderr)
600
601def p_input(p):
602    '''input : paragraph
603             | input paragraph'''
604    if len(p) == 3:
605        p[0] = p[1] + '\n' + p[2]
606    else:
607        p[0] = p[1]
608    parser_verbose(p)
609
610# Strictly, text at top level should be paragraphs (i.e terminated by a
611# BLANKLINE), while text contained in rows or bullets may not be, but this
612# grammar doesn't enforce that for simplicity's sake.
613def p_paragraph(p):
614    '''paragraph : paragraph_content maybe_eof_or_blankline'''
615    p[0] = '<para>\n' + p[1] + '</para>'
616    parser_verbose(p)
617
618def p_paragraph_content(p):
619    '''paragraph_content : paragraph_line
620                         | paragraph_line paragraph_content'''
621    if len(p) == 3:
622        p[0] = p[1] + p[2]
623    else:
624        p[0] = p[1]
625    parser_verbose(p)
626
627def p_paragraph_line(p):
628    '''paragraph_line : TEXT
629                      | texinfocmd
630                      | courierblock
631                      | table
632                      | bulletlist'''
633    p[0] = p[1]
634
635def p_empty(p):
636    'empty :'
637    p[0] = ''
638
639def p_maybe_eof_or_blankline(p):
640    '''maybe_eof_or_blankline : empty
641                              | EOF
642                              | BLANKLINE
643                              | BLANKLINE EOF'''
644    p[0] = ''
645
646def p_maybe_lines(p):
647    '''maybe_lines : empty
648                   | paragraph maybe_lines'''
649    if len(p) == 3:
650        p[0] = p[1] + p[2]
651    else:
652        p[0] = p[1]
653    parser_verbose(p)
654
655def p_maybe_blankline(p):
656    '''maybe_blankline : empty
657                       | BLANKLINE'''
658    p[0] = ''
659
660def p_courierblock(p):
661    '''courierblock : courier'''
662    p[0] = '<literallayout class="monospaced">' + p[1] + '</literallayout>'
663    parser_verbose(p)
664
665def p_courier(p):
666    '''courier : COURIER
667               | COURIER courier'''
668    if len(p) == 3:
669        p[0] = p[1] + p[2]
670    else:
671        p[0] = p[1]
672    parser_verbose(p)
673
674def p_bullet(p):
675    '''bullet : ITEM maybe_lines
676              | ITEM BLANKLINE maybe_lines'''
677    if len(p) == 3:
678        # Glue any text in ITEM into the first para of maybe_lines
679        # (This is an unfortunate consequence of the line-based tokenization we do)
680        if p[2].startswith('<para>'):
681            p[0] = '<listitem><para>' + p[1] + p[2][len('<para>'):] + '</listitem>'
682        else:
683            p[0] = '<listitem><para>' + p[1] + '</para>' + p[2] + '</listitem>'
684    else:
685        p[0] = '<listitem><para>' + p[1] + '</para>' + p[3] + '</listitem>'
686    parser_verbose(p)
687
688def p_bullets(p):
689    '''bullets : bullet
690               | bullet bullets'''
691    if len(p) == 3:
692        p[0] = p[1] + '\n' + p[2]
693    else:
694        p[0] = p[1]
695    parser_verbose(p)
696
697def p_bulletlist(p):
698    '''bulletlist : BULLETSTART bullets BULLETEND maybe_blankline'''
699    p[0] = '<itemizedlist>' + p[2] + '</itemizedlist>'
700    parser_verbose(p)
701
702def p_row(p):
703    '''row : ITEM maybe_lines
704           | ITEM BLANKLINE maybe_lines'''
705    if len(p) == 3:
706        p[0] = '<row><entry><code>' + p[1] + '</code></entry><entry>' + p[2] + '</entry></row>'
707    else:
708        p[0] = '<row><entry><code>' + p[1] + '</code></entry><entry>' + p[3] + '</entry></row>'
709    parser_verbose(p)
710
711def p_rows(p):
712    '''rows : row
713            | row rows'''
714    if len(p) == 3:
715        p[0] = p[1] + '\n' + p[2]
716    else:
717        p[0] = p[1]
718    parser_verbose(p)
719
720def p_table(p):
721    '''table : TABLESTART rows TABLEEND maybe_blankline'''
722    p[0] = '<informaltable><tgroup cols="2"><tbody>' + p[2] + '</tbody></tgroup></informaltable>'
723    parser_verbose(p)
724
725def p_texinfocmd(p):
726    '''texinfocmd : unknown_texinfocmd
727                  | comment
728                  | multitable
729                  | nottex
730                  | tex'''
731    p[0] = p[1]
732
733def p_unknown_texinfocmd(p):
734    '''unknown_texinfocmd : TEXINFO'''
735    print("unknown texinfo command '%s'" % p[1].strip(), file=sys.stderr)
736    p[0] = p[1]
737    parser_verbose(p)
738
739def p_nottex(p):
740    '''nottex : IFNOTTEX paragraph_content ENDIFNOTTEX'''
741    p[0] = p[2]
742
743def p_tex(p):
744    '''tex : IFTEX paragraph_content ENDIFTEX'''
745    # text for TeX formatter inside @iftex is discarded
746    p[0] = ''
747
748def p_comment(p):
749    '''comment : COMMENT'''
750    # comment text is discarded
751    p[0] = ''
752
753def p_mct_columns(p):
754    '''mct_columns : maybe_lines
755                   | maybe_lines MCT_COLUMN_SEPARATOR mct_columns'''
756    if len(p) == 4:
757        p[0] = '<entry>' + p[1] + '</entry>' + p[3]
758    else:
759        p[0] = '<entry>' + p[1] + '</entry>'
760    parser_verbose(p)
761
762def p_mct_row(p):
763    '''mct_row : MCT_ITEM mct_columns'''
764    p[0] = '<row>' + p[2] + '</row>'
765    parser_verbose(p)
766
767def p_mct_rows(p):
768    '''mct_rows : mct_row
769                | mct_row mct_rows'''
770    if len(p) == 3:
771        p[0] = p[1] + '\n' + p[2]
772    else:
773        p[0] = p[1]
774    parser_verbose(p)
775
776def p_mct_header(p):
777    '''mct_header : MCT_HEADITEM mct_columns'''
778    p[0] = '<row>' + p[2] + '</row>'
779    parser_verbose(p)
780
781def p_multitable(p):
782    '''multitable : MULTICOLUMNTABLE mct_header mct_rows ENDMULTICOLUMNTABLE'''
783    # this doesn't handle the prototype row form of @multitable, only the @columnfractions form
784    colfrac = p[1].replace('@multitable @columnfractions', '').split()
785    colspec = '\n'.join(['<colspec colwidth="%s*"/>' % (c) for c in colfrac])
786    header = '<thead>' + p[2] + '</thead>\n'
787    body = '<tbody>' + p[3] + '</tbody>\n'
788    p[0] = '<informaltable><tgroup cols="' + str(len(colfrac)) +'">' + colspec + header + body  + '</tgroup></informaltable>'
789    parser_verbose(p)
790
791def p_error(t):
792    print('parse error at line %d, token %s, next token %s' % (t.lineno, t, parser.token()), file=sys.stderr)
793    exit(1)
794
795parser = yacc.yacc(start='input')
796
797#
798#
799#
800
801def main(file):
802    content = file.read()
803    content = remove_noncomments(content)
804    processed = process(content)
805    perform(processed)
806
807    # output the XML tree
808    s = lxml.etree.tostring(rootelement, pretty_print=True)
809
810    if not s:
811        print('No output produced (perhaps the input has no makedoc markup?)', file=sys.stderr)
812        exit(1)
813
814    print(s)
815
816    # warn about texinfo commands which didn't get processed
817    match = re.search('@[a-z*]+', s)
818    if match:
819        print('texinfo command %s remains in output' % match.group(), file=sys.stderr)
820
821#
822#
823#
824
825if __name__ == '__main__' :
826    options = OptionParser()
827    options.add_option('-v', '--verbose', action='count', dest = 'verbose')
828    options.add_option('-c', '--cache', action='store_true', dest = 'cache', help="just ensure PLY cache is up to date")
829    (opts, args) = options.parse_args()
830
831    if opts.cache:
832        sys.exit()
833
834    verbose = opts.verbose
835
836    if len(args) > 0:
837        main(open(args[0], 'rb'))
838    else:
839        main(sys.stdin)
Note: See TracBrowser for help on using the repository browser.