FAIRYFAR-INTERNAL
 
  FAIRYFAR-INTERNAL  |  SITEMAP  |  ABOUT-ME  |  HOME  
使用Python转Office文档为HTML

MS Office文档批量转HTML工具

  1. 仅支持Windows操作系统。
  2. 运行环境需要安装Office软件。
  3. 支持Word和Excel格式。
  4. 缺省转换控制文件为trans_list.txt。

Python脚本(trans_office.py):

snippet.python
#! /usr/bin/env python
#coding: utf-8
 
import multiprocessing
import codecs
import string
from win32com.client.gencache import EnsureDispatch
from win32com.client import constants
 
########################################################################
# 说明:
# 本工具可以批量转换Office文档格式。运行环境需要安装Office软件。
# 缺省转换控制文件为trans_list.txt。
# 常用Office格式定义:
# constants.wdFormatHTML = 8
# constants.wdFormatFilteredHTML = 10
# constants.xlFormatHtml = 44
########################################################################
 
### Global param
g_TRSOFAPPTitle = u"Transform Office Format Tool"
g_TRSOFAPPVersion = u"1.0"
g_TRSOFList = []
 
### Transform Word
def TRSOFWord(src, des, des_type):
	print "Transform", src, "to", des, "use type", des_type
	try:
		wdApp = EnsureDispatch('Word.Application')
		wdApp.Visible = False
		wdApp.DisplayAlerts = False
	except:
		print "Init Word.Application failed!"
		return
 
	try:
		wdDoc = wdApp.Documents.Open(src)
		wdDoc.SaveAs(des, des_type)
		wdDoc.Close()
		wdApp.Quit()
	except:
		wdDoc.Close()
		wdApp.Quit()
 
### Transform Excel
def TRSOFExcel(src, des, des_type):
	print "Transform", src, "to", des, "use type", des_type
	try:
		xlApp = EnsureDispatch('Excel.Application')
		xlApp.Visible = False
		xlApp.EnableEvents = False
		xlApp.DisplayAlerts = False
	except:
		print "Init Excel.Application failed!"
		return
 
	try:
		xlDoc = xlApp.Workbooks.Open(src)
		xlDoc.SaveAs(des, des_type)
		xlDoc.Close()
		xlApp.Quit()
	except:
		xlDoc.Close()
		xlApp.Quit()
 
### Read List
def TRSOFReadList():
	global g_TRSOFList
	try:
		f = codecs.open("trans_list.txt", 'r', 'utf-8')
		szLines = f.readlines()
		f.close()
		for l in szLines[0:]:
			l = l.strip(u"\r\n")
			if len(l) < 1:
				continue
			if l[:len(u";")] == u";":
				continue
 
			try:
				l_item = l.split("|")
				l_type = l_item[0].strip(" ").lower()
				g_TRSOFList.append({"src_type" : l_type, \
					"des_type" : string.atoi(l_item[1].strip(" ")), \
					"src_path" : l_item[2].strip(" "), \
					"des_path" : l_item[3].strip(" ")})
			except:
				#print "Failed on read line", l
				continue
	except:
		print "Read trans_list.txt failed!"
		return False
	return True
 
### Transform List
def TRSOFTransList(trList):
	tr_cnt = len(trList)
	ri = 0
	while ri < tr_cnt:
		tr_res = trList[ri]
		l_type = tr_res["src_type"]
		try:
			if l_type == "word":
				TRSOFWord(tr_res["src_path"], tr_res["des_path"], tr_res["des_type"])
			elif l_type == "excel":
				TRSOFExcel(tr_res["src_path"], tr_res["des_path"], tr_res["des_type"])
		except:
			print "Failed on:", tr_res
			ri += 1
			continue
		ri += 1
 
### Main Funcation
def TRSOFRun():
	global g_TRSOFAPPTitle
	global g_TRSOFAPPVersion
	global g_TRSOFList
 
	print g_TRSOFAPPTitle + ", Version " + g_TRSOFAPPVersion + "."
	TRSOFReadList()
	TRSOFTransList(g_TRSOFList)
 
### main
if __name__ == '__main__':
	multiprocessing.freeze_support
	TRSOFRun()

在同目录下准备一个配置文件(trans_list.txt):

snippet.txt
; Default transform list for Transform Office Format Tool
; constants.wdFormatHTML = 8
; constants.wdFormatFilteredHTML = 10
; constants.xlFormatHtml = 44
 
; Example:
excel | 44 | C:\Document\工作周报_2016_Part2.xlsx | C:\www\report\weekly_2016.htm
;word | 10 | C:\Document\工作周报_2016_Part1.doc | C:\www\report\weekly_2016_part1.htm


打赏作者以资鼓励:
移动端扫码阅读: