# Python script to translate HTML output from the FrameMaker RPL Functions document # into HTML usable within RiverWare. # Phil Weinstein, CADSWES, edit: 3-30-2014 # Note: Non-UTF-8 characters needed to be replaced first. Fancy quotes (three types). from xml.dom import minidom import datetime # srcPath = "RPLPredefinedFunctions_FM12.htm" srcPath = "test3.html" htmlOutPath = "out3.html" xmlOutPath = "out3.xml" imgMovePath = "imageMove.txt" InImageDir = "fmFuncImages/" OutImageDir = "rplFuncImages/" ShowFunctionNumInHeader = False with open(srcPath, 'r') as content_file: xmlStr = content_file.read() # it looks like minidom doesn't support character entities? xmlStr = xmlStr.replace (" ", " ") xmlStr = xmlStr.encode ('utf-8') impl = minidom.getDOMImplementation() xmldoc = minidom.parseString(xmlStr) # Parsing State Globals functionCounter = 0 currentFuncNode = None currentFuncName = "" currentRowNode = None currentCellNode = None currentRowType = None currentParamIndex = None currentColNumber = 0 # 1... currentCellHasColorBg = False currentParagraphClass = "" priorParagraphClass = "" indexList = None imageMoveCommands = "" # Utility: Return text from node and all its children def allText (node): if node.nodeType == node.TEXT_NODE: return node.data allChildText = "" for child in node.childNodes: allChildText += allText (child) return allChildText # Debug Utility: print a node and its content def printNode(node, level=0): title = "pn-" + str(level) + ":" print (title, node, allText (node)) for child in node.childNodes: printNode(child, level+1) # Utility: Function Anchor name from Function Name def funcAnchorName (funcName): # return ("RplFunc_" + funcName) return (funcName) # Utility def classOfRowType (currentRowType, col): if (currentRowType == "Type"): return ("RplFunc_RetType") if (currentRowType == "MathematicalExpression"): return ("RplFunc_MathExpr") if (currentRowType == "Arg"): if col == 2: return ("RplFunc_ArgType") elif col == 3: return ("RplFunc_ArgName") return ("RplFunc_" + currentRowType) # Utility def translateImageFile (imageNode): global imageMoveCommands origFilePath = imageNode.getAttribute ("src") origFileName = origFilePath.split('/')[-1] ext = origFilePath.split('.')[-1] srcFilePath = InImageDir + origFileName imageFileName = currentFuncName + "." + ext imageFilePath = OutImageDir + imageFileName imageNode.setAttribute ("alt", imageFileName) imageNode.setAttribute ("id", "image" + currentFuncName) imageNode.setAttribute ("src", imageFilePath) imageMoveCommands += "cp " + srcFilePath + " " + imageFilePath + "\n" # Function: Start a new Function element def startFunction (srcNode,funcName): global functionCounter global currentFuncNode global currentFuncName global indexList currentFuncName = funcName functionCounter += 1 # Note: HTML5 doesn't use the "a" (anchor tag) with a "name" attribute. # It uses "Id"s instead. #-- funcAnchor = newDoc.createElement ("a") #-- funcAnchor.setAttribute ("name", funcAnchorName (funcName)) #-- bodyNode.appendChild (funcAnchor) currentFuncNode = newDoc.createElement ("div") currentFuncNode.setAttribute("class", "RplFunc_Div") currentFuncNode.setAttribute("id", funcAnchorName (funcName)) currentFuncNode.setAttribute("funcName", funcName) currentFuncNode.setAttribute("funcNum", str (functionCounter)) funcHdr = newDoc.createElement ("h3") funcNameSpan = newDoc.createElement ("span") funcNameSpan.setAttribute ("class", "RplFunc_Name") funcNameText = newDoc.createTextNode (funcName) funcNameSpan.appendChild (funcNameText) if ShowFunctionNumInHeader: funcNumText = newDoc.createTextNode (str (functionCounter) + ". ") funcHdr.appendChild (funcNumText) funcHdr.appendChild (funcNameSpan) currentFuncNode.appendChild (funcHdr) bodyNode.appendChild(currentFuncNode) if indexList: indexFuncItem = newDoc.createElement ("li") # HTML list item indexFuncLink = newDoc.createElement ("a") indexFuncLink.setAttribute ("href", "#" + funcAnchorName (funcName)) indexFuncText = newDoc.createTextNode (funcName) indexFuncLink.appendChild (indexFuncText) indexFuncItem.appendChild (indexFuncLink) indexList.appendChild (indexFuncItem) # Node Processing, parameters: (cloned node, original source node) def conditionNode (node, srcNode): global currentRowNode global currentCellNode global currentRowType global currentParamIndex global currentColNumber global currentCellHasColorBg global currentParagraphClass global priorParagraphClass if node.nodeType == node.TEXT_NODE: if "HERE" in node.data: node.data = node.data.replace ("CLICK HERE", "") node.data = node.data.replace ("HERE", "") if node.nodeType == node.ELEMENT_NODE: classAttrib = node.getAttribute ("class") if node.hasAttribute ("class") else "" if node.hasAttribute ("style"): node.removeAttribute ("style") if (node.tagName == "table"): node.setAttribute ("border", "1") if node.tagName == "tr": currentRowType = None # set in cell heading cell currentParamIndex = None currentColNumber = 0 # 1... node.setAttribute ("valign", "top") node.setAttribute ("class", "RplFunc_CellText") elif node.tagName == "td": currentColNumber += 1 # 1... currentCellHasColorBg = False elif node.tagName == "p": priorParagraphClass = currentParagraphClass currentParagraphClass = classAttrib elif node.tagName == "img": translateImageFile (node) # Translate FrameMaker HTML classes to RplFunc classes if (classAttrib == "FM_StepNumber"): node.removeAttribute ("class") elif (classAttrib == "FM_GUItext"): node.setAttribute ("class", "RplFunc_GUItext") elif (classAttrib == "FM_EmphasisBlue"): node.setAttribute ("class", "RplFunc_BlueLink") elif (classAttrib == "FM_RulesCodeSyntaxExample") or (classAttrib == "FM_RulesCodeReturnExample"): node.setAttribute ("class", "RplFunc_ExampleTitle") elif (classAttrib == "FM_RuleCode"): if priorParagraphClass == "FM_RulesCodeSyntaxExample": node.setAttribute ("class", "RplFunc_Syntax_Examp") elif priorParagraphClass == "FM_RulesCodeReturnExample": node.setAttribute ("class", "RplFunc_Return_Examp") else: node.setAttribute ("class", "RplFunc_Code_Examp") elif (classAttrib.startswith ("FM_")): node.removeAttribute ("class") isCellHeading = (classAttrib == "FM_CellHeading") isCellBody = (classAttrib == "FM_CellBody") if currentCellNode and isCellHeading: currentCellNode.setAttribute ("class", "RplFunc_OrangeCell") currentCellHasColorBg = True nodeText = allText (srcNode).replace (" ","") # remove spaces currentParamIndex = None # default try: paramIndex = int (nodeText) currentRowType = "Arg" currentParamIndex = paramIndex # print ("currentRowNode:", currentRowNode, "Arg:", paramIndex) if currentRowNode: currentRowNode.setAttribute ("rplArg", str (paramIndex)) except ValueError: currentRowType = nodeText # print ("... SET ROW TYPE:", currentRowType, "col:", currentColNumber) # Semantic Tags if currentCellNode and currentRowType and isCellBody: className = classOfRowType (currentRowType, currentColNumber) currentCellNode.setAttribute ("class", className) if currentRowType == "Arg": currentCellNode.setAttribute ("argInx", str (currentParamIndex)) elif currentRowType == "Arguments": currentCellNode.setAttribute ("class", "RplFunc_OrangeCell") currentCellHasColorBg = True return node # Function: Clone source node, append to target node, and recursively process all child nodes def appendCloneChildren (targNode, srcNode): global currentRowNode global currentCellNode nodeClone = srcNode.cloneNode (False) # not deep condNode = conditionNode (nodeClone, srcNode) isSpan = (srcNode.nodeType == srcNode.ELEMENT_NODE) and (srcNode.tagName == "span") dropEmptySpan = isSpan and not condNode.attributes.keys() if dropEmptySpan: nodeClone = targNode else: targNode.appendChild (condNode) isTableRow = (srcNode.nodeType == srcNode.ELEMENT_NODE) and (srcNode.tagName == "tr") isTableCell = (srcNode.nodeType == srcNode.ELEMENT_NODE) and (srcNode.tagName == "td") if isTableRow: currentRowNode = nodeClone # print ("ROW START >>>>>>>") if isTableCell: currentCellNode = nodeClone # print ("CELL START >>>>>>>") for srcChild in srcNode.childNodes: tagName = srcChild.tagName if (srcChild.nodeType == srcChild.ELEMENT_NODE) else "" if tagName != "script": appendCloneChildren (nodeClone, srcChild) if isTableRow: currentRowNode = None # print ("ROW END <<<<<<<") if isTableCell: currentCellNode = None # print ("CELL END <<<<<<<") # Primary node recursion def parseNode (node): global currentFuncNode global newDocTop tagName = node.tagName if (node.nodeType == node.ELEMENT_NODE) else "" if (tagName == "p") and node.hasAttribute("class") and \ (node.getAttribute("class") == "FM_Heading1"): headText = allText (node) headLastText = headText.split()[-1] startFunction (node,headLastText) elif tagName == "script": pass elif currentFuncNode: appendCloneChildren (currentFuncNode, node) else: for child in node.childNodes: parseNode(child) CssStyleText = """ /*** (1) Semantics-Only Classes ***/ .RplFunc_Div {} /* div element for a RplFunction */ /* attrib: id (function name) */ /* attrib: funcName */ /* attrib: funcNum [1..]) */ .RplFunc_Name {} /* function name */ .RplFunc_RetType {} /* return type */ .RplFunc_ArgType {} /* argument type (attrib: argInx [1..]) */ .RplFunc_ArgName {} /* argument name (attrib: argInx [1..]) */ /* xpath "tr[@rplArg]" selects 'tr' elements with an arg type, name */ .RplFunc_Evaluation {} /* evalution description */ .RplFunc_Comments {} /* other comments */ /* .RplFunc_Syntax_Examp - syntax example; defined below. */ /* .RplFunc_Return_Examp - return example; defined below. */ /*** (2) Display Style Classes ***/ .h3 { font-family: Arial, Helvetica, sans-serif; font-size: larger; margin-top: 0px; margin-bottom: 8px; } .RplFunc_OrangeCell { font-family: Verdana, Geneva, sans-serif; color: #FFF; background-color: #cd7345; vertical-align: top; font-size: small; font-weight: bold; } .RplFunc_OrangeCell:first-child { text-align: center; } .RplFunc_CellText { font-family: Verdana, Geneva, sans-serif; vertical-align: top; font-size: small; margin-left: 4px; } .RplFunc_CellText p { margin-top: 12px; margin-right: 5px; margin-bottom: 2px; margin-left: 5px; } .RplFunc_CellText p:first-child { margin-top: 2px; } .RplFunc_GUItext { font-weight: bold; font-family: Verdana, Geneva, sans-serif; font-size: small; } .RplFunc_ExampleTitle { font-family: Verdana, Geneva, sans-serif; font-size: small; margin-top: 6px; margin-bottom: 6px; } .RplFunc_Syntax_Examp, .RplFunc_Return_Examp, .RplFunc_RuleCodeIndent { font-family: "Courier New", Courier, monospace; margin-left: 20px; font-size: small; margin-top: 6px; margin-bottom: 6px; } .RplFunc_BlueLink { color: #03F; text-decoration: none; } """ def addCssStyles (headNode): styleElem = newDoc.createElement ("style") styleElem.setAttribute ("type", "text/css") styleText = newDoc.createComment (CssStyleText) styleElem.appendChild (styleText) headNode.appendChild (styleElem) titleText = None def computeTitleText(): global titleText nowTime = datetime.datetime.now() nowTimeStr = nowTime.strftime ("%b %d, %Y [%H:%M:%S]") titleText = "RPL Functions, processed from FrameMaker HTML Output -- " + nowTimeStr computeTitleText() def addHeaderTitle (headNode): titleElem = newDoc.createElement ("title") titleTextElem = newDoc.createTextNode (titleText) titleElem.appendChild (titleTextElem) headNode.appendChild (titleElem) def addBodyTitle (bodyNode): global titleTextInst titleParagraph = newDoc.createElement ("p") titleTextElem = newDoc.createTextNode (titleText) titleParagraph.appendChild (titleTextElem) bodyNode.appendChild (titleParagraph) # Generated Document / HTML template with CSS newDoc = impl.createDocument(None, "html", None) newDocTop = newDoc.documentElement headNode = newDoc.createElement ("head") bodyNode = newDoc.createElement ("body") bodyNode.setAttribute ("bgcolor", "#FFFFFF") bodyNode.setAttribute ("text", "#000000") newDocTop.appendChild (headNode) newDocTop.appendChild (bodyNode) addHeaderTitle (headNode) addCssStyles (headNode) addBodyTitle (bodyNode) # Setup Function Index indexDiv = newDoc.createElement ("div") indexDiv.setAttribute ("id", "FunctionIndex") indexTitle = newDoc.createElement ("h3") indexTitleText = newDoc.createTextNode ("RPL Functions:") indexTitle.appendChild (indexTitleText) indexDiv.appendChild (indexTitle) indexList = newDoc.createElement ("ol") # HTML ordered list indexDiv.appendChild (indexList) bodyNode.appendChild (indexDiv) # print ("--- PRINT ORIGINAL DOCUMENT ---") # printNode(xmldoc.documentElement) # print ("--- PARSE DOCUMENT ---") parseNode(xmldoc.documentElement) # print ("--- PRINT NEW DOCUMENT ---") # printNode(newDocTop) # write new document xml = newDocTop.toxml(); # this trick doesn't work well #-- import re #-- almostPrettyXml = newDocTop.toprettyxml(); # not great #-- text_re = re.compile('>\n\s+([^<>\s].*?)\n\s+\g<1>