To fix all these errors at once, I made 2 python programs:
save this one as findbadcharacters.py:
Code: Select all
#!/usr/bin/env python
#-*- coding: utf-8 -*-
import codecs
import sys
def main(path):
#path="freecivtranslate/2.4/nl.po"
reload(sys)
sys.setdefaultencoding("utf-8")
readfile=open(path,'r')
writefile=open(path+'.badlines','w')
writefile2=open(path+'.badlinenrs','w')
countline=1
badcount=0
while True:
testline = readfile.readline()
if len(testline) ==0:
break # EOF
try:
result=testline.index('�')
print 'bad char found at line ' +str(countline)
writefile.write(testline.encode('utf-8'))
writefile2.write(str(countline)+'\n')
badcount=badcount+1
except:
pass
countline=countline+1
print 'badcount='+str(badcount)+' lines'
print 'edit '+path+'.badlines untill satisfied, then run'
print 'python fixbadcharacters.py '+path
print 'to replace bad char lines with the new ones'
if __name__ == "__main__":
main(sys.argv[1])
Code: Select all
python findbadcharacters.py freecivtranslate/2.4/nl.po
And another called nl.po.badlines. Inside it, are all the lines with �-errors in it.
Change them with some editor to the proper characters: Babyloni� into Babylonië etc.
When you are done, save the file, then run
Code: Select all
python fixbadcharacters.py freecivtranslate/2.4/nl.po
The code for the fixbadcharacters.py is:
Code: Select all
#!/usr/bin/env python
#-*- coding: utf-8 -*-
import codecs
import sys
def main(path):
#path="freecivtranslate/2.4/nl.po"
reload(sys)
sys.setdefaultencoding("utf-8")
readfile1=open(path,'r')
readfile2=open(path+'.badlines','r')
readfile3=open(path+'.badlinenrs','r')
writefile=open(path+'.fixed','w')
currentlinenr=1
problemlinenr=int(readfile3.readline())
goodline=readfile2.readline()
while True:
testline = readfile1.readline().encode('utf-8')
if len(testline) ==0:
break # EOF
if problemlinenr==currentlinenr:
testline=goodline.encode('utf-8')
testlinenr=readfile3.readline()
if len(testlinenr) ==0:
testlinenr=9999999999999
problemlinenr=int(testlinenr)
goodline=readfile2.readline()
writefile.write(testline)
currentlinenr=currentlinenr+1
print 'saved to '+path+'.fixed'
if __name__ == "__main__":
main(sys.argv[1])