#! /usr/bin/env python
"""
this file converts simple html text into a docbook xml variant.
The mapping of markups and links is far from perfect. But all we
want is the docbook-to-pdf converter and similar technology being
present in the world of docbook-to-anything converters. """
from datetime import date
import match
import sys
m = match.Match
class htm2dbk_conversion_base:
regexlist = [
m()("[hH]2>(.*)", "m") >> "\n\\1",
m()("<[hH]2>") >> "",
m()("<[Pp]([> ])","m") >> "") >> "",
m()("<(pre|PRE)>") >> "",
m()("(pre|PRE)>") >> "",
m()("<[hH]3>") >> "",
m()("[hH]3>((?:.(?!))*.?)", "s") >> "\\1",
m()("]*>","s") >> "",
m()("]*>","s") >> "",
m()("(<\w+\b[^<>]*\swidth=)(\d+\%)","s") >> "\\1\"\\2\"",
m()("(<\w+\b[^<>]*\s\w+=)(\d+)","s") >> "\\1\"\\2\"",
m()("&&") >> "\&\;\&\;",
m()("\$\<") >> "\$\<\;",
m()("&(\w+[\),])") >> "\&\;\\1",
m()("(?)span(\s[^<>]*)?>","s") >> "\\1phrase\\2>",
m()("(?)small(\s[^<>]*)?>","s") >> "\\1note\\2>",
m()("(?)(b|em|i)>")>> "\\1emphasis>",
m()("(?)(li)>") >> "\\1listitem>",
m()("(?)(ul)>") >> "\\1itemizedlist>",
m()("(?)(ol)>") >> "\\1orderedlist>",
m()("(?)(dl)>") >> "\\1variablelist>",
m()("