-
Notifications
You must be signed in to change notification settings - Fork 0
/
newtran.py
87 lines (77 loc) · 2.64 KB
/
newtran.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# -*- coding: utf8 -*-
import os
import sys
SRCDIR = ".\\scriptCN"
TGTDIR = os.path.dirname(sys.argv[0]) + '\\scriptTarget'
TBLFILE = os.path.dirname(sys.argv[0]) + '\\tableData.tbl'
ERRPATH = os.path.dirname(sys.argv[0]) + '\\err.log'
LOG_FILE_NAME = '/err.log'
if len(sys.argv) > 1 :
arg1 = sys.argv[1]
SRCDIR = arg1
# if os.path.isdir(arg1):
TGTDIR = arg1 + 'Target'
'''
elif os.path.isfile(arg1):
pathSplit = os.path.splitext(arg1)
TGTDIR = pathSplit[0] + 'Target' + pathSplit[1]
else:
pass
'''
def getFileList(path, fl): #return a list
try:
files = os.listdir(path)
for f in files:
subPath = path + '/' + f
if (os.path.isdir(subPath)):
getFileList(subPath, fl)
else:
if(os.path.splitext(subPath)[1] == '.txt'):
fl.append(subPath)
except: #permission denied
pass
# create code table
def loadTbl(tableFile):
tableData = file(tableFile, 'r')
tbl = {}
tbl['='] = '3D'
for tableLine in tableData.readlines():
if not tableLine == '\n' :
d = tableLine.split('=')
tableHex , tableWord = d[0], d[1][:-1].decode('utf-8')
tbl[tableWord] = tableHex
return tbl
# read file, convert file
def convertFile(sourceDir, targetDir, tblDir, tbl):
fileList = []
getFileList(sourceDir, fileList)
errFile = file(os.path.dirname(tblDir) + LOG_FILE_NAME, 'w')
for sourcePath in fileList :
targetPath = targetDir + '\\' + sourcePath.lstrip(sourceDir)
print 'Converting : ' + targetPath
sourceFile = file(sourcePath , 'r')
targetPathDir = os.path.dirname(targetPath)
if not os.path.exists(targetPathDir):
os.makedirs(targetPathDir)
targetFile = file(targetPath , 'w')
writebuf = ''
for line in sourceFile.readlines() :
lineStr = line.decode('utf-8').strip(u'\ufeff') #anti UTF-8 BOM
for sourceWord in lineStr :
if sourceWord == u'\n' :
targetWord = '\n'
else:
try:
targetWord = tbl[sourceWord].decode('hex')
except:
errFile.write(sourcePath + '\n' + sourceWord.encode('utf-8') + '\n')
targetWord = '.noWord.'
writebuf += targetWord
targetFile.write(writebuf)
if __name__ == "__main__" :
'''
#打开码表
#读取源文件 - 转换 - 目标文件
'''
tbl = loadTbl(TBLFILE)
convertFile(SRCDIR, TGTDIR, os.path.dirname(sys.argv[0]), tbl)