代码拉取完成,页面将自动刷新
#-*- coding:utf-8 -*-
__author__ = 'luguolei'
import os
import sys
import re
import traceback
import codecs
_convertExt = [r'.*\.h$',r'.*\.c$',r'.*\.cpp$',r'.*\.hpp$',r'.*\.cpp$', r'.*\.tmpl$']
splt = os.path.sep
if splt == '\\':
splt='\\\\'
_ignore = [r'.*'+splt+r'\.svn$',
r'.*'+splt+r'\.git$']
_converted = []
_failed = []
_failedToWrite = []
_passed = [] #already utf8 with bom
_defaultEncoding = 'cp936'
_convertTo = ('utf8',codecs.BOM_UTF8)
_logFile = None
def WriteLog(s):
cs = s
if isinstance(cs, unicode):
cs = s.encode('utf-8')
global _logFile
if _logFile is None:
absFile = os.path.abspath('toutf8.log')
_logFile = open(absFile, 'wb')
_logFile.write(codecs.BOM_UTF8)
_logFile.write(cs)
if not s.endswith('\n'):
_logFile.write("\n")
_logFile.write("\n")
print s
def CloseLog():
if not _logFile:
_logFile.close()
def Usage():
print "Usage:"
print "python toutf8.py <-d <directory or file>>"
print """Example:
python toutf8.py -d c:\myproject
The command will convert *.c *.cpp and *.h files into utf-8 unless the file contain a valid BOM.
all directory and file start with '.' will be ignored.
a log file named 'converted.log' will be generated.
"""
def ParseParam(argv):
def _Parse(mark, v):
if mark not in v or v.index(mark) >= len(argv)-1:
return []
out = []
for s in argv[argv.index(mark)+1:]:
if s.startswith('-'):
break;
out.append(s)
return out
dir = _Parse('-d', argv)
if not dir:
print 'Invalid -d parameter'
Usage()
sys.exit(-1)
if len(dir) != 1:
print 'invalid -d parameter.'
Usage()
sys.exit(-1)
return dir[0]
def DetectEncoding(path):
assert os.path.isfile(path)
supported = {codecs.BOM_UTF8:'utf8',
codecs.BOM_UTF16_BE:'UTF-16BE',
codecs.BOM_UTF16_LE:'UTF-16LE',
codecs.BOM_UTF32_BE:'UTF-32BE',
codecs.BOM_UTF32_LE:'UTF-32LE'}
f = open(path,'rb')
head = f.read(4)
f.close()
encoding = None
bomlen = 0
for k in supported:
if k == head[0:len(k)]:
encoding = supported[k]
bomlen = len(k)
break;
if encoding is None:
encoding = _defaultEncoding
return encoding,bomlen
def ConvertFile(path):
assert os.path.isfile(path)
encoding,bomlen = DetectEncoding(path)
if encoding == _convertTo[0]:
_passed.append(path)
return
f = open(path,'rb')
content = f.read()
f.close()
assert len(content) >= bomlen
if bomlen > 0:
content = content[bomlen:]
encoded = None
try:
decoded = content.decode(encoding)
encoded = decoded.encode(_convertTo[0])
except:
_failed.append(path)
ex = traceback.format_exc()
WriteLog("Ignore file:" + path + " because of error:")
WriteLog(ex)
return
#delete original file
os.remove(path)
try:
fw = open(path,'wb')
fw.write(_convertTo[1])
fw.write(encoded)
fw.close()
_converted.append(path)
except:
_failedToWrite.append(path)
ex = traceback.format_exc()
WriteLog("*Failed to create file: " + path + " because of error:")
WriteLog(ex)
def SearchDirForFiles(start, matches=None, ignores=None, recursive=True):
u"""搜索一个目录,返回符合要求的文件的一个生成器(generator).
函数从指定的目录开始搜索符合要求的文件,最后,返回符合要求的文件绝对目录的全名。
如果指定了matches和ignores参数(两组正则表达式),函数首先使用ignores中的条件检
查文件的绝对名称,如果被ignores中的任何一条表达式匹配,便忽略此文件。然后,再
使用matches中的表达式匹配绝对文件名,被匹配的文件被存入列表准备返回。
例如::
SearchDirForFiles(ur'c:\') #返回C盘下的所有文件(这可能需要一些时间)
SearchDirForFiles(ur'c:\',recursive=False) #只返回根目录的文件,不搜索子目录
SearchDirForFiles(ur'c:\', #返回所有扩展名为.py或.pyw的文件,但是
matches=(r'.*\.pyw?$',), #不包括在一个子目录名为dtea下的。
matches=(r'.*\\dtea$',))
SearchDirForFiles(ur'c:\', #返回所有扩展名为.py或.pyw的文件,但是
matches=(r'.*\.pyw?$',), #不包括在一个子目录名为dtea下的所有,以及
matches=(r'.*\\dtea$', r'.*\\test_.*.pyw?$')) #文件名前面是test_开头的.
...
考虑到在搜索一些顶级目录时,可能有大量的返回结果,考虑到对内存友好一些,函数以生成器的
形式,返回一个迭代对象,典型的使用方法为::
for f in SearchDirForFiles(....):
Process(f)
不过,如果你确实需要获得这个文件的列表::
lst = [f for f in SearchDirForFiles(...)]
@attention: 你不可能对一个生成器使用像len()这样的函数. 因此,在迭代完成之前,你不可
能知道有多少元素会输出.
@param start: 起始目录,搜索的范围包括起始目录. 最好使用unicode字符串
@type start: unicode,str
@param matches: 匹配选项,一个包含一条或多条正则表达式的list或tuple. 如果指定
为None,则表示所有文件都匹配(除了被ignores匹配的之外)。
@type matches: list,tuple
@param ignores: 忽略选项,一个包含一条或多条正则表达式的list或tuple. 如果为
None, 则不忽略match的文件.
@param recursive: 是否搜索子目录. 如果为False,则只搜索start目录下的文件.
@type recursive: bool
@return: 搜索到的文件的绝对路径名.
@rtype: unicode list
"""
# 编译正则表达式
if matches is not None:
reMatch = [re.compile(m.lower()) for m in matches]
else:
reMatch = []
if ignores is not None:
reIgnore = [re.compile(ig.lower()) for ig in ignores]
else:
reIgnore = []
def _Match(fileName):
u"""如果文件名匹配,返回True;否则False."""
if len(reMatch) == 0:
return True
for r in reMatch:
if r.match(fileName.lower()) is not None:
return True
return False
def _Ignore(fileName):
u"""如果文件名应该被忽略,返回True;否则False."""
if len(reIgnore) == 0:
return False
for r in reIgnore:
if r.match(fileName.lower()) is not None:
return True
return False
# 即使起始目录不是中文,子目录也有可能是中文的,为了防止在返回中文目录时处理
# 出错,将起始目录用unicode字符串表示,这样搜索结果也会是unicode的
if isinstance(start,str):
ustart = start.decode('utf-8')
elif isinstance(start,unicode):
ustart = start
else:
raise TypeError(u"start参数要求是str或unicode类型.")
# 非递归的情况,只用列出起始目录下的文件
if not recursive:
for f in os.listdir(ustart):
full = os.path.join(ustart,f)
if (not _Ignore(full)) and _Match(full) and os.path.isfile(full):
yield full
else:
# 递归的情况遍历
for curdir, subdirs, files in os.walk(ustart):
# 先检查是否需要忽略当前目录
if _Ignore(curdir):
# 不再搜索当前目录下的文件和子目录
while len(subdirs)>0:
del subdirs[0]
continue
for f in files:
full = os.path.join(curdir,f)
if (not _Ignore(full)) and _Match(full):
yield full
#完成一个子目录的迭代
def SearchFiles(start):
return [f for f in SearchDirForFiles(start,_convertExt,_ignore)]
def AreYouSure(path):
if isinstance(path, (unicode,str)):
displayPath = path
if isinstance(path,unicode):
displayPath = path.encode(sys.getfilesystemencoding())
answer = raw_input("About to convert file "+displayPath+" to utf-8(with BOM), are you sure?[yes/no]:")
else:
answer = raw_input("About to convert file "+str(len(path))+" files to utf-8(with BOM), are you sure?[yes/no]:")
if answer.strip().lower() != 'yes':
print 'Canceled.'
sys.exit(0)
def Convert(path):
absPath = os.path.abspath(path)
if isinstance(absPath, str):
absPath = absPath.decode(sys.getfilesystemencoding())
if os.path.isfile(absPath):
AreYouSure(absPath)
ConvertFile(absPath) # single file
return
allFiles = SearchFiles(absPath)
AreYouSure(allFiles)
for f in allFiles:
ConvertFile(f)
if __name__=='__main__':
path = ParseParam(sys.argv)
Convert(path)
WriteLog("------------------------------------------------")
WriteLog("----------------Files converted-----------------")
if not _converted:
WriteLog("None.")
for f in _converted:
WriteLog(f)
WriteLog("--------Files passed(Already UTF-8 with BOM)-----")
if not _passed:
WriteLog("None.")
else:
for f in _passed:
WriteLog(f)
WriteLog("--------------Files failed to convert------------")
if not _failed:
WriteLog("None.")
else:
for f in _failed:
WriteLog(f)
WriteLog("******Files failed to create(original file has been deleted)********\n****** !!! The file has been lost !!! ********")
if not _failedToWrite:
WriteLog("None.")
else:
for f in _failedToWrite:
WriteLog(f)
CloseLog()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。