# MS Office文档批量转HTML工具
1. 仅支持Windows操作系统。
2. 运行环境需要安装Office软件。
3. 支持Word和Excel格式。
4. 缺省转换控制文件为trans_list.txt。
**Python脚本(trans_office.py):**
```python
#! /usr/bin/env python
#coding: utf-8
import multiprocessing
import codecs
import string
from win32com.client.gencache import EnsureDispatch
from win32com.client import constants
########################################################################
# 说明:
# 本工具可以批量转换Office文档格式。运行环境需要安装Office软件。
# 缺省转换控制文件为trans_list.txt。
# 常用Office格式定义:
# constants.wdFormatHTML = 8
# constants.wdFormatFilteredHTML = 10
# constants.xlFormatHtml = 44
########################################################################
### Global param
g_TRSOFAPPTitle = u"Transform Office Format Tool"
g_TRSOFAPPVersion = u"1.0"
g_TRSOFList = []
### Transform Word
def TRSOFWord(src, des, des_type):
print "Transform", src, "to", des, "use type", des_type
try:
wdApp = EnsureDispatch('Word.Application')
wdApp.Visible = False
wdApp.DisplayAlerts = False
except:
print "Init Word.Application failed!"
return
try:
wdDoc = wdApp.Documents.Open(src)
wdDoc.SaveAs(des, des_type)
wdDoc.Close()
wdApp.Quit()
except:
wdDoc.Close()
wdApp.Quit()
### Transform Excel
def TRSOFExcel(src, des, des_type):
print "Transform", src, "to", des, "use type", des_type
try:
xlApp = EnsureDispatch('Excel.Application')
xlApp.Visible = False
xlApp.EnableEvents = False
xlApp.DisplayAlerts = False
except:
print "Init Excel.Application failed!"
return
try:
xlDoc = xlApp.Workbooks.Open(src)
xlDoc.SaveAs(des, des_type)
xlDoc.Close()
xlApp.Quit()
except:
xlDoc.Close()
xlApp.Quit()
### Read List
def TRSOFReadList():
global g_TRSOFList
try:
f = codecs.open("trans_list.txt", 'r', 'utf-8')
szLines = f.readlines()
f.close()
for l in szLines[0:]:
l = l.strip(u"\r\n")
if len(l) < 1:
continue
if l[:len(u";")] == u";":
continue
try:
l_item = l.split("|")
l_type = l_item[0].strip(" ").lower()
g_TRSOFList.append({"src_type" : l_type, \
"des_type" : string.atoi(l_item[1].strip(" ")), \
"src_path" : l_item[2].strip(" "), \
"des_path" : l_item[3].strip(" ")})
except:
#print "Failed on read line", l
continue
except:
print "Read trans_list.txt failed!"
return False
return True
### Transform List
def TRSOFTransList(trList):
tr_cnt = len(trList)
ri = 0
while ri < tr_cnt:
tr_res = trList[ri]
l_type = tr_res["src_type"]
try:
if l_type == "word":
TRSOFWord(tr_res["src_path"], tr_res["des_path"], tr_res["des_type"])
elif l_type == "excel":
TRSOFExcel(tr_res["src_path"], tr_res["des_path"], tr_res["des_type"])
except:
print "Failed on:", tr_res
ri += 1
continue
ri += 1
### Main Funcation
def TRSOFRun():
global g_TRSOFAPPTitle
global g_TRSOFAPPVersion
global g_TRSOFList
print g_TRSOFAPPTitle + ", Version " + g_TRSOFAPPVersion + "."
TRSOFReadList()
TRSOFTransList(g_TRSOFList)
### main
if __name__ == '__main__':
multiprocessing.freeze_support
TRSOFRun()
```
**在同目录下准备一个配置文件(trans_list.txt):**
```txt
; Default transform list for Transform Office Format Tool
; constants.wdFormatHTML = 8
; constants.wdFormatFilteredHTML = 10
; constants.xlFormatHtml = 44
; Example:
excel | 44 | C:\Document\工作周报_2016_Part2.xlsx | C:\www\report\weekly_2016.htm
;word | 10 | C:\Document\工作周报_2016_Part1.doc | C:\www\report\weekly_2016_part1.htm
```