# Python script to translate HTML output from the FrameMaker RPL Functions document
# into HTML usable within RiverWare.
# Phil Weinstein, CADSWES, edit: 3-30-2014
# Note: Non-UTF-8 characters needed to be replaced first. Fancy quotes (three types).
from xml.dom import minidom
import datetime
srcPath = "RPLPredefinedFunctions_FM12_Fix.htm"
htmlOutPath = "GenPredefinedFunctions.html"
xmlOutPath = "GenPredefinedFunctions.xml"
imgMovePath = "imageMove.txt"
InImageDir = "fmFuncImages/"
OutImageDir = "rplFuncImages/"
ShowFunctionNumInHeader = False
with open(srcPath, 'r') as content_file:
xmlStr = content_file.read()
# it looks like minidom doesn't support character entities?
xmlStr = xmlStr.replace (" ", " ")
xmlStr = xmlStr.encode ('utf-8')
impl = minidom.getDOMImplementation()
xmldoc = minidom.parseString(xmlStr)
# Parsing State Globals
functionCounter = 0
currentFuncNode = None
currentFuncName = ""
currentRowNode = None
currentCellNode = None
currentRowType = None
currentParamIndex = None
currentColNumber = 0 # 1...
currentCellHasColorBg = False
currentParagraphClass = ""
priorParagraphClass = ""
indexList = None
imageMoveCommands = ""
# Utility: Return text from node and all its children
def allText (node):
if node.nodeType == node.TEXT_NODE:
return node.data
allChildText = ""
for child in node.childNodes:
allChildText += allText (child)
return allChildText
# Debug Utility: print a node and its content
def printNode(node, level=0):
title = "pn-" + str(level) + ":"
print (title, node, allText (node))
for child in node.childNodes:
printNode(child, level+1)
# Utility: Function Anchor name from Function Name
def funcAnchorName (funcName):
# return ("RplFunc_" + funcName)
return (funcName)
# Utility
def classOfRowType (currentRowType, col):
if (currentRowType == "Type"):
return ("RplFunc_RetType")
if (currentRowType == "MathematicalExpression"):
return ("RplFunc_MathExpr")
if (currentRowType == "Arg"):
if col == 2:
return ("RplFunc_ArgType")
elif col == 3:
return ("RplFunc_ArgName")
return ("RplFunc_" + currentRowType)
# Utility
def translateImageFile (imageNode):
global imageMoveCommands
origFilePath = imageNode.getAttribute ("src")
origFileName = origFilePath.split('/')[-1]
ext = origFilePath.split('.')[-1]
srcFilePath = InImageDir + origFileName
imageFileName = currentFuncName + "." + ext
imageFilePath = OutImageDir + imageFileName
imageNode.setAttribute ("alt", imageFileName)
imageNode.setAttribute ("id", "image" + currentFuncName)
imageNode.setAttribute ("src", imageFilePath)
imageMoveCommands += "cp " + srcFilePath + " " + imageFilePath + "\n"
# Function: Start a new Function element
def startFunction (srcNode,funcName):
global functionCounter
global currentFuncNode
global currentFuncName
global indexList
currentFuncName = funcName
functionCounter += 1
print ("Function", functionCounter, funcName)
# Note: HTML5 doesn't use the "a" (anchor tag) with a "name" attribute.
# It uses "Id"s instead.
#-- funcAnchor = newDoc.createElement ("a")
#-- funcAnchor.setAttribute ("name", funcAnchorName (funcName))
#-- bodyNode.appendChild (funcAnchor)
currentFuncNode = newDoc.createElement ("div")
currentFuncNode.setAttribute("class", "RplFunc_Div")
currentFuncNode.setAttribute("id", funcAnchorName (funcName))
currentFuncNode.setAttribute("funcName", funcName)
currentFuncNode.setAttribute("funcNum", str (functionCounter))
funcHdr = newDoc.createElement ("h3")
funcNameSpan = newDoc.createElement ("span")
funcNameSpan.setAttribute ("class", "RplFunc_Name")
funcNameText = newDoc.createTextNode (funcName)
funcNameSpan.appendChild (funcNameText)
if ShowFunctionNumInHeader:
funcNumText = newDoc.createTextNode (str (functionCounter) + ". ")
funcHdr.appendChild (funcNumText)
funcHdr.appendChild (funcNameSpan)
currentFuncNode.appendChild (funcHdr)
bodyNode.appendChild(currentFuncNode)
if indexList:
indexFuncItem = newDoc.createElement ("li") # HTML list item
indexFuncLink = newDoc.createElement ("a")
indexFuncLink.setAttribute ("href", "#" + funcAnchorName (funcName))
indexFuncText = newDoc.createTextNode (funcName)
indexFuncLink.appendChild (indexFuncText)
indexFuncItem.appendChild (indexFuncLink)
indexList.appendChild (indexFuncItem)
# Node Processing, parameters: (cloned node, original source node)
def conditionNode (node, srcNode):
global currentRowNode
global currentCellNode
global currentRowType
global currentParamIndex
global currentColNumber
global currentCellHasColorBg
global currentParagraphClass
global priorParagraphClass
if node.nodeType == node.TEXT_NODE:
if "HERE" in node.data:
node.data = node.data.replace ("CLICK HERE", "")
node.data = node.data.replace ("HERE", "")
if node.nodeType == node.ELEMENT_NODE:
classAttrib = node.getAttribute ("class") if node.hasAttribute ("class") else ""
if node.hasAttribute ("style"):
node.removeAttribute ("style")
if (node.tagName == "table"):
node.setAttribute ("border", "1")
if node.tagName == "tr":
currentRowType = None # set in cell heading cell
currentParamIndex = None
currentColNumber = 0 # 1...
node.setAttribute ("valign", "top")
node.setAttribute ("class", "CellText")
elif node.tagName == "td":
currentColNumber += 1 # 1...
currentCellHasColorBg = False
elif node.tagName == "p":
priorParagraphClass = currentParagraphClass
currentParagraphClass = classAttrib
elif node.tagName == "img":
translateImageFile (node)
# Translate FrameMaker HTML classes to RplFunc classes
if (classAttrib == "FM_StepNumber"):
node.removeAttribute ("class")
elif (classAttrib == "FM_GUItext"):
node.setAttribute ("class", "RplFunc_GUItext")
elif (classAttrib == "FM_EmphasisBlue"):
node.setAttribute ("class", "RplFunc_BlueLink")
elif (classAttrib == "FM_RulesCodeSyntaxExample") or (classAttrib == "FM_RulesCodeReturnExample"):
node.setAttribute ("class", "RplFunc_ExampleTitle")
elif (classAttrib == "FM_RuleCode"):
if priorParagraphClass == "FM_RulesCodeSyntaxExample":
node.setAttribute ("class", "RplFunc_Syntax_Examp")
elif priorParagraphClass == "FM_RulesCodeReturnExample":
node.setAttribute ("class", "RplFunc_Return_Examp")
else:
node.setAttribute ("class", "RplFunc_Code_Examp")
elif (classAttrib.startswith ("FM_")):
node.removeAttribute ("class")
isCellHeading = (classAttrib == "FM_CellHeading")
isCellBody = (classAttrib == "FM_CellBody")
if currentCellNode and isCellHeading:
currentCellNode.setAttribute ("class", "OrangeCell")
currentCellHasColorBg = True
nodeText = allText (srcNode).replace (" ","") # remove spaces
currentParamIndex = None # default
try:
paramIndex = int (nodeText)
currentRowType = "Arg"
currentParamIndex = paramIndex
# print ("currentRowNode:", currentRowNode, "Arg:", paramIndex)
if currentRowNode:
currentRowNode.setAttribute ("rplArg", str (paramIndex))
except ValueError:
currentRowType = nodeText
# print ("... SET ROW TYPE:", currentRowType, "col:", currentColNumber)
# Semantic Tags
if currentCellNode and currentRowType and isCellBody:
className = classOfRowType (currentRowType, currentColNumber)
currentCellNode.setAttribute ("class", className)
if currentRowType == "Arg":
currentCellNode.setAttribute ("argInx", str (currentParamIndex))
elif currentRowType == "Arguments":
currentCellNode.setAttribute ("class", "OrangeCell")
currentCellHasColorBg = True
return node
# Function: Clone source node, append to target node, and recursively process all child nodes
def appendCloneChildren (targNode, srcNode):
global currentRowNode
global currentCellNode
nodeClone = srcNode.cloneNode (False) # not deep
condNode = conditionNode (nodeClone, srcNode)
isSpan = (srcNode.nodeType == srcNode.ELEMENT_NODE) and (srcNode.tagName == "span")
dropEmptySpan = isSpan and not condNode.attributes.keys()
if dropEmptySpan:
nodeClone = targNode
else:
targNode.appendChild (condNode)
isTableRow = (srcNode.nodeType == srcNode.ELEMENT_NODE) and (srcNode.tagName == "tr")
isTableCell = (srcNode.nodeType == srcNode.ELEMENT_NODE) and (srcNode.tagName == "td")
if isTableRow:
currentRowNode = nodeClone
# print ("ROW START >>>>>>>")
if isTableCell:
currentCellNode = nodeClone
# print ("CELL START >>>>>>>")
for srcChild in srcNode.childNodes:
tagName = srcChild.tagName if (srcChild.nodeType == srcChild.ELEMENT_NODE) else ""
if tagName != "script":
appendCloneChildren (nodeClone, srcChild)
if isTableRow:
currentRowNode = None
# print ("ROW END <<<<<<<")
if isTableCell:
currentCellNode = None
# print ("CELL END <<<<<<<")
# Primary node recursion
def parseNode (node):
global currentFuncNode
global currentFuncName
global newDocTop
tagName = node.tagName if (node.nodeType == node.ELEMENT_NODE) else ""
if (tagName == "p") and node.hasAttribute("class") and \
(node.getAttribute("class") == "FM_Heading1"):
headText = allText (node)
headTextParts = headText.split()
if (headTextParts):
headLastText = headText.split()[-1]
startFunction (node,headLastText)
else:
print ("Empty FM_Heading1 after Function", currentFuncName)
elif tagName == "script":
pass
elif currentFuncNode:
appendCloneChildren (currentFuncNode, node)
else:
for child in node.childNodes:
parseNode(child)
CssStyleText = """
/*** (1) Semantics-Only Classes ***/
.RplFunc_Div {} /* div element for a RplFunction */
/* attrib: id (function name) */
/* attrib: funcName */
/* attrib: funcNum [1..]) */
.RplFunc_Name {} /* function name */
.RplFunc_RetType {} /* return type */
.RplFunc_ArgType {} /* argument type (attrib: argInx [1..]) */
.RplFunc_ArgName {} /* argument name (attrib: argInx [1..]) */
/* xpath "tr[@rplArg]" selects 'tr' elements with an arg type, name */
.RplFunc_Evaluation {} /* evalution description */
.RplFunc_Comments {} /* other comments */
/* .RplFunc_Syntax_Examp - syntax example; defined below. */
/* .RplFunc_Return_Examp - return example; defined below. */
/*** (2) Display Style Classes ***/
.h3 {
font-family: Arial, Helvetica, sans-serif;
font-size: larger;
margin-top: 0px;
margin-bottom: 8px;
}
.OrangeCell {
font-family: Verdana, Geneva, sans-serif;
color: #FFF;
background-color: #cd7345;
vertical-align: top;
font-size: small;
font-weight: bold;
}
.OrangeCell:first-child {
text-align: center;
}
.CellText {
font-family: Verdana, Geneva, sans-serif;
vertical-align: top;
font-size: small;
margin-left: 4px;
}
.CellText p {
margin-top: 12px;
margin-right: 5px;
margin-bottom: 2px;
margin-left: 5px;
}
.CellText p:first-child {
margin-top: 2px;
}
.RplFunc_GUItext {
font-weight: bold;
font-family: Verdana, Geneva, sans-serif;
font-size: small;
}
.RplFunc_ExampleTitle {
font-family: Verdana, Geneva, sans-serif;
font-size: small;
margin-top: 6px;
margin-bottom: 6px;
}
.RplFunc_Syntax_Examp, .RplFunc_Return_Examp, .RplFunc_RuleCodeIndent {
font-family: "Courier New", Courier, monospace;
margin-left: 20px;
font-size: small;
margin-top: 6px;
margin-bottom: 6px;
}
.RplFunc_BlueLink {
color: #03F;
text-decoration: none;
}
"""
def addCssStyles (headNode):
styleElem = newDoc.createElement ("style")
styleElem.setAttribute ("type", "text/css")
styleText = newDoc.createComment (CssStyleText)
styleElem.appendChild (styleText)
headNode.appendChild (styleElem)
titleText = None
def computeTitleText():
global titleText
nowTime = datetime.datetime.now()
nowTimeStr = nowTime.strftime ("%b %d, %Y [%H:%M:%S]")
titleText = "RPL Functions, processed from FrameMaker HTML Output -- " + nowTimeStr
computeTitleText()
def addHeaderTitle (headNode):
titleElem = newDoc.createElement ("title")
titleTextElem = newDoc.createTextNode (titleText)
titleElem.appendChild (titleTextElem)
headNode.appendChild (titleElem)
def addBodyTitle (bodyNode):
global titleTextInst
titleParagraph = newDoc.createElement ("p")
titleTextElem = newDoc.createTextNode (titleText)
titleParagraph.appendChild (titleTextElem)
bodyNode.appendChild (titleParagraph)
# Generated Document / HTML template with CSS
newDoc = impl.createDocument(None, "html", None)
newDocTop = newDoc.documentElement
headNode = newDoc.createElement ("head")
bodyNode = newDoc.createElement ("body")
bodyNode.setAttribute ("bgcolor", "#FFFFFF")
bodyNode.setAttribute ("text", "#000000")
newDocTop.appendChild (headNode)
newDocTop.appendChild (bodyNode)
addHeaderTitle (headNode)
addCssStyles (headNode)
addBodyTitle (bodyNode)
# Setup Function Index
indexDiv = newDoc.createElement ("div")
indexDiv.setAttribute ("id", "FunctionIndex")
indexTitle = newDoc.createElement ("h3")
indexTitleText = newDoc.createTextNode ("RPL Functions:")
indexTitle.appendChild (indexTitleText)
indexDiv.appendChild (indexTitle)
indexList = newDoc.createElement ("ol") # HTML ordered list
indexDiv.appendChild (indexList)
bodyNode.appendChild (indexDiv)
# print ("--- PRINT ORIGINAL DOCUMENT ---")
# printNode(xmldoc.documentElement)
# print ("--- PARSE DOCUMENT ---")
parseNode(xmldoc.documentElement)
# print ("--- PRINT NEW DOCUMENT ---")
# printNode(newDocTop)
# write new document
xml = newDocTop.toxml();
# this trick doesn't work well
#-- import re
#-- almostPrettyXml = newDocTop.toprettyxml(); # not great
#-- text_re = re.compile('>\n\s+([^<>\s].*?)\n\s+', re.DOTALL)
#-- xml = text_re.sub('>\g<1>', almostPrettyXml)
outHandle = open (xmlOutPath, "wb")
outHandle.write (bytes(xml, 'UTF-8'))
outHandle.close()
outHandle = open (htmlOutPath, "wb")
outHandle.write (bytes(xml, 'UTF-8'))
outHandle.close()
outHandle = open (imgMovePath, "wb")
outHandle.write (bytes(imageMoveCommands, 'UTF-8'))
outHandle.close()
print ("Success. Input:", srcPath + ";", "Output:", htmlOutPath + ",", \
xmlOutPath + ",", imgMovePath)
print (titleText)
# --- (end) ---