# Python script to translate HTML output from the FrameMaker RPL Functions document
#   into HTML usable within RiverWare.
# Phil Weinstein, CADSWES, edit: 3-30-2014
# Note: Non-UTF-8 characters needed to be replaced first. Fancy quotes (three types).

from xml.dom import minidom
import datetime

# srcPath = "RPLPredefinedFunctions_FM12.htm"
srcPath = "test3.html"
htmlOutPath = "out3.html"
xmlOutPath = "out3.xml"
imgMovePath = "imageMove.txt"
InImageDir = "fmFuncImages/"
OutImageDir = "rplFuncImages/"
ShowFunctionNumInHeader = False

with open(srcPath, 'r') as content_file:
    xmlStr = content_file.read()

# it looks like minidom doesn't support character entities?
xmlStr = xmlStr.replace ("&nbsp;", " ")
xmlStr = xmlStr.encode ('utf-8')

impl = minidom.getDOMImplementation()
xmldoc = minidom.parseString(xmlStr)

# Parsing State Globals
functionCounter = 0
currentFuncNode = None
currentFuncName = ""
currentRowNode = None
currentCellNode = None
currentRowType = None
currentParamIndex = None
currentColNumber = 0 # 1...
currentCellHasColorBg = False
currentParagraphClass = ""
priorParagraphClass = ""
indexList = None
imageMoveCommands = ""

# Utility: Return text from node and all its children
def allText (node):
   if node.nodeType == node.TEXT_NODE:
      return node.data
   allChildText = ""
   for child in node.childNodes:
      allChildText += allText (child)
   return allChildText

# Debug Utility: print a node and its content
def printNode(node, level=0):
  title = "pn-" + str(level) + ":"
  print (title, node, allText (node))
  for child in node.childNodes:
	   printNode(child, level+1)
         
# Utility: Function Anchor name from Function Name
def funcAnchorName (funcName):
   # return ("RplFunc_" + funcName)
   return (funcName)
   
# Utility
def classOfRowType (currentRowType, col):
   if (currentRowType == "Type"):
      return ("RplFunc_RetType")
   if (currentRowType == "MathematicalExpression"):
      return ("RplFunc_MathExpr")
   if (currentRowType == "Arg"):
      if col == 2:
         return ("RplFunc_ArgType")
      elif col == 3:
         return ("RplFunc_ArgName")
   return ("RplFunc_" + currentRowType)
         
# Utility
def translateImageFile (imageNode):
   global imageMoveCommands
   origFilePath = imageNode.getAttribute ("src")   
   origFileName = origFilePath.split('/')[-1]
   ext = origFilePath.split('.')[-1]
   srcFilePath = InImageDir + origFileName
   imageFileName = currentFuncName + "." + ext
   imageFilePath = OutImageDir + imageFileName
   imageNode.setAttribute ("alt", imageFileName)
   imageNode.setAttribute ("id", "image" + currentFuncName)
   imageNode.setAttribute ("src", imageFilePath)
   imageMoveCommands += "cp " + srcFilePath + " " + imageFilePath + "\n"
         
# Function: Start a new Function element
def startFunction (srcNode,funcName):
   global functionCounter
   global currentFuncNode
   global currentFuncName
   global indexList
   currentFuncName = funcName
   functionCounter += 1
   
   # Note: HTML5 doesn't use the "a" (anchor tag) with a "name" attribute. 
   #   It uses "Id"s instead.
   #-- funcAnchor = newDoc.createElement ("a")
   #-- funcAnchor.setAttribute ("name", funcAnchorName (funcName))
   #-- bodyNode.appendChild (funcAnchor)
   currentFuncNode = newDoc.createElement ("div")
   currentFuncNode.setAttribute("class", "RplFunc_Div")
   currentFuncNode.setAttribute("id", funcAnchorName (funcName))
   currentFuncNode.setAttribute("funcName", funcName)
   currentFuncNode.setAttribute("funcNum", str (functionCounter))
   
   funcHdr = newDoc.createElement ("h3")
   funcNameSpan = newDoc.createElement ("span")
   funcNameSpan.setAttribute ("class", "RplFunc_Name")
   funcNameText = newDoc.createTextNode (funcName)
   funcNameSpan.appendChild (funcNameText)
   if ShowFunctionNumInHeader:
      funcNumText = newDoc.createTextNode (str (functionCounter) + ". ")
      funcHdr.appendChild (funcNumText)
   funcHdr.appendChild (funcNameSpan)
   currentFuncNode.appendChild (funcHdr)
   bodyNode.appendChild(currentFuncNode)

   if indexList:   
      indexFuncItem = newDoc.createElement ("li")  # HTML list item
      indexFuncLink = newDoc.createElement ("a")
      indexFuncLink.setAttribute ("href", "#" + funcAnchorName (funcName))
      indexFuncText = newDoc.createTextNode (funcName)
      indexFuncLink.appendChild (indexFuncText)
      indexFuncItem.appendChild (indexFuncLink)
      indexList.appendChild (indexFuncItem)
   
# Node Processing, parameters: (cloned node, original source node)
def conditionNode (node, srcNode):
   global currentRowNode
   global currentCellNode
   global currentRowType
   global currentParamIndex
   global currentColNumber
   global currentCellHasColorBg
   global currentParagraphClass
   global priorParagraphClass
   if node.nodeType == node.TEXT_NODE:
      if "HERE" in node.data:
         node.data = node.data.replace ("CLICK HERE", "")
         node.data = node.data.replace ("HERE", "")
   if node.nodeType == node.ELEMENT_NODE:
      classAttrib = node.getAttribute ("class") if node.hasAttribute ("class") else ""

      if node.hasAttribute ("style"):
         node.removeAttribute ("style")
         if (node.tagName == "table"):
            node.setAttribute ("border", "1")
            
      if node.tagName == "tr":
         currentRowType = None # set in cell heading cell
         currentParamIndex = None
         currentColNumber = 0 # 1...
         node.setAttribute ("valign", "top")
         node.setAttribute ("class", "RplFunc_CellText")
      elif node.tagName == "td":
         currentColNumber += 1 # 1...
         currentCellHasColorBg = False
      elif node.tagName == "p":
         priorParagraphClass = currentParagraphClass
         currentParagraphClass = classAttrib 
      elif node.tagName == "img":
         translateImageFile (node)

      # Translate FrameMaker HTML classes to RplFunc classes
      if (classAttrib == "FM_StepNumber"):
         node.removeAttribute ("class")
      elif (classAttrib == "FM_GUItext"):
         node.setAttribute ("class", "RplFunc_GUItext")
      elif (classAttrib == "FM_EmphasisBlue"):
         node.setAttribute ("class", "RplFunc_BlueLink")
      elif (classAttrib == "FM_RulesCodeSyntaxExample") or (classAttrib == "FM_RulesCodeReturnExample"):
         node.setAttribute ("class", "RplFunc_ExampleTitle")
      elif (classAttrib == "FM_RuleCode"):
         if priorParagraphClass == "FM_RulesCodeSyntaxExample":
            node.setAttribute ("class", "RplFunc_Syntax_Examp")
         elif priorParagraphClass == "FM_RulesCodeReturnExample":
            node.setAttribute ("class", "RplFunc_Return_Examp")
         else:
            node.setAttribute ("class", "RplFunc_Code_Examp")
      elif (classAttrib.startswith ("FM_")):
         node.removeAttribute ("class")  
      
      isCellHeading = (classAttrib == "FM_CellHeading")
      isCellBody = (classAttrib == "FM_CellBody")
         
      if currentCellNode and isCellHeading:
         currentCellNode.setAttribute ("class", "RplFunc_OrangeCell")
         currentCellHasColorBg = True
         nodeText = allText (srcNode).replace (" ","")  # remove spaces
         currentParamIndex = None # default
         try:
            paramIndex = int (nodeText)
            currentRowType = "Arg"
            currentParamIndex = paramIndex
            # print ("currentRowNode:", currentRowNode, "Arg:", paramIndex)
            if currentRowNode:
               currentRowNode.setAttribute ("rplArg", str (paramIndex))
         except ValueError:
           currentRowType = nodeText
         # print ("... SET ROW TYPE:", currentRowType, "col:", currentColNumber)
         
      # Semantic Tags
      if currentCellNode and currentRowType and isCellBody:
         className = classOfRowType (currentRowType, currentColNumber)
         currentCellNode.setAttribute ("class", className)
         if currentRowType == "Arg":
            currentCellNode.setAttribute ("argInx", str (currentParamIndex))
         elif currentRowType == "Arguments":
            currentCellNode.setAttribute ("class", "RplFunc_OrangeCell")
            currentCellHasColorBg = True
   
   return node

# Function: Clone source node, append to target node, and recursively process all child nodes
def appendCloneChildren (targNode, srcNode):
   global currentRowNode
   global currentCellNode
   nodeClone = srcNode.cloneNode (False)  # not deep
   condNode = conditionNode (nodeClone, srcNode)
   isSpan = (srcNode.nodeType == srcNode.ELEMENT_NODE) and (srcNode.tagName == "span")
   dropEmptySpan = isSpan and not condNode.attributes.keys()
   if dropEmptySpan:
      nodeClone = targNode
   else:
      targNode.appendChild (condNode)
   isTableRow = (srcNode.nodeType == srcNode.ELEMENT_NODE) and (srcNode.tagName == "tr")
   isTableCell = (srcNode.nodeType == srcNode.ELEMENT_NODE) and (srcNode.tagName == "td")
   if isTableRow:
      currentRowNode = nodeClone
      # print ("ROW START >>>>>>>")
   if isTableCell:
      currentCellNode = nodeClone
      # print ("CELL START >>>>>>>")
   for srcChild in srcNode.childNodes:
      tagName = srcChild.tagName if (srcChild.nodeType == srcChild.ELEMENT_NODE) else ""
      if tagName != "script":
         appendCloneChildren (nodeClone, srcChild)
   if isTableRow:
      currentRowNode = None
      # print ("ROW END <<<<<<<")
   if isTableCell:
      currentCellNode = None
      # print ("CELL END <<<<<<<")

# Primary node recursion
def parseNode (node):
  global currentFuncNode
  global newDocTop
  tagName = node.tagName if (node.nodeType == node.ELEMENT_NODE) else ""
  if (tagName == "p") and node.hasAttribute("class") and \
  (node.getAttribute("class") == "FM_Heading1"):
      headText = allText (node)
      headLastText = headText.split()[-1]
      startFunction (node,headLastText)
  elif tagName == "script":
      pass
  elif currentFuncNode:
      appendCloneChildren (currentFuncNode, node)
  else:
      for child in node.childNodes:
         parseNode(child)
	   
CssStyleText = """ 

/*** (1) Semantics-Only Classes ***/

.RplFunc_Div        {}  /* div element for a RplFunction */
   /* attrib: id (function name) */
   /* attrib: funcName */
   /* attrib: funcNum [1..]) */
   
.RplFunc_Name       {}  /* function name */
.RplFunc_RetType    {}  /* return type */
.RplFunc_ArgType    {}  /* argument type (attrib: argInx [1..]) */
.RplFunc_ArgName    {}  /* argument name (attrib: argInx [1..]) */
   /* xpath "tr[@rplArg]" selects 'tr' elements with an arg type, name */ 
   
.RplFunc_Evaluation {}  /* evalution description */
.RplFunc_Comments   {}  /* other comments */

/* .RplFunc_Syntax_Examp  - syntax example; defined below. */
/* .RplFunc_Return_Examp  - return example; defined below. */

/*** (2) Display Style Classes ***/

.h3 {
	font-family: Arial, Helvetica, sans-serif;
	font-size: larger;
	margin-top: 0px;
	margin-bottom: 8px;
}
.RplFunc_OrangeCell {
	font-family: Verdana, Geneva, sans-serif;
	color: #FFF;
	background-color: #cd7345;
	vertical-align: top;
	font-size: small;
	font-weight: bold;
}
.RplFunc_OrangeCell:first-child {
	text-align: center;
}
.RplFunc_CellText {
	font-family: Verdana, Geneva, sans-serif;
	vertical-align: top;
	font-size: small;
	margin-left: 4px;
}
.RplFunc_CellText p {
	margin-top: 12px;
	margin-right: 5px;
	margin-bottom: 2px;
	margin-left: 5px;
}
.RplFunc_CellText p:first-child {
	margin-top: 2px;
}
.RplFunc_GUItext {
	font-weight: bold;
	font-family: Verdana, Geneva, sans-serif;
	font-size: small;
}
.RplFunc_ExampleTitle {
	font-family: Verdana, Geneva, sans-serif;
	font-size: small;
	margin-top: 6px;
	margin-bottom: 6px;
}
.RplFunc_Syntax_Examp, .RplFunc_Return_Examp, .RplFunc_RuleCodeIndent {
	font-family: "Courier New", Courier, monospace;
	margin-left: 20px;
	font-size: small;
	margin-top: 6px;
	margin-bottom: 6px;
}
.RplFunc_BlueLink {
	color: #03F;
	text-decoration: none;
}
"""
       
def addCssStyles (headNode):
   styleElem = newDoc.createElement ("style")
   styleElem.setAttribute ("type", "text/css")
   styleText = newDoc.createComment (CssStyleText)
   styleElem.appendChild (styleText)   
   headNode.appendChild (styleElem) 

titleText = None
def computeTitleText():
   global titleText
   nowTime = datetime.datetime.now()
   nowTimeStr = nowTime.strftime ("%b %d, %Y [%H:%M:%S]")
   titleText = "RPL Functions, processed from FrameMaker HTML Output -- " + nowTimeStr
computeTitleText()
   
def addHeaderTitle (headNode):
   titleElem =  newDoc.createElement ("title")
   titleTextElem = newDoc.createTextNode (titleText)
   titleElem.appendChild (titleTextElem)
   headNode.appendChild (titleElem)
   
def addBodyTitle (bodyNode):
   global titleTextInst
   titleParagraph =  newDoc.createElement ("p")
   titleTextElem = newDoc.createTextNode (titleText)
   titleParagraph.appendChild (titleTextElem)
   bodyNode.appendChild (titleParagraph)
       
# Generated Document / HTML template with CSS
newDoc = impl.createDocument(None, "html", None)
newDocTop = newDoc.documentElement
headNode = newDoc.createElement ("head")
bodyNode = newDoc.createElement ("body")
bodyNode.setAttribute ("bgcolor", "#FFFFFF")
bodyNode.setAttribute ("text", "#000000")
newDocTop.appendChild (headNode)
newDocTop.appendChild (bodyNode)
addHeaderTitle (headNode)
addCssStyles (headNode)
addBodyTitle (bodyNode)

# Setup Function Index
indexDiv = newDoc.createElement ("div")
indexDiv.setAttribute ("id", "FunctionIndex")
indexTitle = newDoc.createElement ("h3")
indexTitleText = newDoc.createTextNode ("RPL Functions:")
indexTitle.appendChild (indexTitleText)
indexDiv.appendChild (indexTitle)
indexList = newDoc.createElement ("ol") # HTML ordered list
indexDiv.appendChild (indexList)
bodyNode.appendChild (indexDiv)
       
# print ("--- PRINT ORIGINAL DOCUMENT  ---")
# printNode(xmldoc.documentElement)

# print ("--- PARSE DOCUMENT  ---")
parseNode(xmldoc.documentElement)

# print ("--- PRINT NEW DOCUMENT ---")
# printNode(newDocTop)

# write new document
xml = newDocTop.toxml();

# this trick doesn't work well
#-- import re
#-- almostPrettyXml = newDocTop.toprettyxml(); # not great
#-- text_re = re.compile('>\n\s+([^<>\s].*?)\n\s+</', re.DOTALL)    
#-- xml = text_re.sub('>\g<1></', almostPrettyXml)

outHandle = open (xmlOutPath, "wb")
outHandle.write (bytes(xml, 'UTF-8'))
outHandle.close()
   
outHandle = open (htmlOutPath, "wb")
outHandle.write (bytes(xml, 'UTF-8'))
outHandle.close()

outHandle = open (imgMovePath, "wb")
outHandle.write (bytes(imageMoveCommands, 'UTF-8'))
outHandle.close()

print ("Success. Input:", srcPath + ";", "Output:", htmlOutPath + ",", \
       xmlOutPath + ",", imgMovePath)
print (titleText)

# --- (end) ---