使用Python转Office文档为HTML
MS Office文档批量转HTML工具
- 仅支持Windows操作系统。
- 运行环境需要安装Office软件。
- 支持Word和Excel格式。
- 缺省转换控制文件为trans_list.txt。
Python脚本(trans_office.py):
- snippet.python
#! /usr/bin/env python #coding: utf-8 import multiprocessing import codecs import string from win32com.client.gencache import EnsureDispatch from win32com.client import constants ######################################################################## # 说明: # 本工具可以批量转换Office文档格式。运行环境需要安装Office软件。 # 缺省转换控制文件为trans_list.txt。 # 常用Office格式定义: # constants.wdFormatHTML = 8 # constants.wdFormatFilteredHTML = 10 # constants.xlFormatHtml = 44 ######################################################################## ### Global param g_TRSOFAPPTitle = u"Transform Office Format Tool" g_TRSOFAPPVersion = u"1.0" g_TRSOFList = [] ### Transform Word def TRSOFWord(src, des, des_type): print "Transform", src, "to", des, "use type", des_type try: wdApp = EnsureDispatch('Word.Application') wdApp.Visible = False wdApp.DisplayAlerts = False except: print "Init Word.Application failed!" return try: wdDoc = wdApp.Documents.Open(src) wdDoc.SaveAs(des, des_type) wdDoc.Close() wdApp.Quit() except: wdDoc.Close() wdApp.Quit() ### Transform Excel def TRSOFExcel(src, des, des_type): print "Transform", src, "to", des, "use type", des_type try: xlApp = EnsureDispatch('Excel.Application') xlApp.Visible = False xlApp.EnableEvents = False xlApp.DisplayAlerts = False except: print "Init Excel.Application failed!" return try: xlDoc = xlApp.Workbooks.Open(src) xlDoc.SaveAs(des, des_type) xlDoc.Close() xlApp.Quit() except: xlDoc.Close() xlApp.Quit() ### Read List def TRSOFReadList(): global g_TRSOFList try: f = codecs.open("trans_list.txt", 'r', 'utf-8') szLines = f.readlines() f.close() for l in szLines[0:]: l = l.strip(u"\r\n") if len(l) < 1: continue if l[:len(u";")] == u";": continue try: l_item = l.split("|") l_type = l_item[0].strip(" ").lower() g_TRSOFList.append({"src_type" : l_type, \ "des_type" : string.atoi(l_item[1].strip(" ")), \ "src_path" : l_item[2].strip(" "), \ "des_path" : l_item[3].strip(" ")}) except: #print "Failed on read line", l continue except: print "Read trans_list.txt failed!" return False return True ### Transform List def TRSOFTransList(trList): tr_cnt = len(trList) ri = 0 while ri < tr_cnt: tr_res = trList[ri] l_type = tr_res["src_type"] try: if l_type == "word": TRSOFWord(tr_res["src_path"], tr_res["des_path"], tr_res["des_type"]) elif l_type == "excel": TRSOFExcel(tr_res["src_path"], tr_res["des_path"], tr_res["des_type"]) except: print "Failed on:", tr_res ri += 1 continue ri += 1 ### Main Funcation def TRSOFRun(): global g_TRSOFAPPTitle global g_TRSOFAPPVersion global g_TRSOFList print g_TRSOFAPPTitle + ", Version " + g_TRSOFAPPVersion + "." TRSOFReadList() TRSOFTransList(g_TRSOFList) ### main if __name__ == '__main__': multiprocessing.freeze_support TRSOFRun()
在同目录下准备一个配置文件(trans_list.txt):
- snippet.txt
; Default transform list for Transform Office Format Tool ; constants.wdFormatHTML = 8 ; constants.wdFormatFilteredHTML = 10 ; constants.xlFormatHtml = 44 ; Example: excel | 44 | C:\Document\工作周报_2016_Part2.xlsx | C:\www\report\weekly_2016.htm ;word | 10 | C:\Document\工作周报_2016_Part1.doc | C:\www\report\weekly_2016_part1.htm
打赏作者以资鼓励: