Page 1 of 1

Python program to merge two PO files

Posted: Mon Feb 18, 2013 2:27 am
by BsKnng
Below is a python program that is able to merge two language files (of the same language) gracefully and fast.
This way, two or more people can work at random on the same language file, do a merge, and all is fine.

For each translation item, the program takes a look at both language (.po) files:

-If the item is completely the same in both: the item is written down in the output file.
-If there is something written in one msgstr, but nothing in the other, the first is used, and vice versa.
-If the two items are different, and both have something in their msgstr, it prints both items out and ask you to choose: 1 or 2.

Until all items are done, and the merge is complete.

Usage:

Code: Select all

python mergepofiles.py output.nl.po my.nl.po friends.nl.po

Save as mergepofiles.py:

Code: Select all

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import codecs
import sys

class recorditem:
	def __init__(self, id):
		self.id=int(id)
		self.comments=[]
		self.msgid=[]
		self.msgid_plural=[]
		self.msgstr=[]
		self.msgstr_plural=[]
		self.is_plural=False

class translateitems:
	def __init__(self):
		self.dataraw=[]
		self.records=[]
		
	def writetofile(self,path):
		writefile=open(path,'w')
		for i in range(0,len(self.records)):
			#print 'writing record nr '+str(i+1) +' to '+ path
			if i>0:
				writefile.write('\n')
			is_plural=self.records[i].is_plural
			for j in range(0,len(self.records[i].comments)):
				writefile.write(self.records[i].comments[j])
			for j in range(0,len(self.records[i].msgid)):
				if j==0:
					writefile.write('msgid '+self.records[i].msgid[j])
				else:
					writefile.write(self.records[i].msgid[j])
			if is_plural:
				for j in range(0,len(self.records[i].msgid_plural)):
					if j==0:
						writefile.write('msgid_plural '+self.records[i].msgid_plural[j])
					else:
						writefile.write(self.records[i].msgid_plural[j])
				for j in range(0,len(self.records[i].msgstr)):
					if j==0:
						writefile.write('msgstr[0] '+self.records[i].msgstr[j])
					else:
						writefile.write(self.records[i].msgstr[j])
				for j in range(0,len(self.records[i].msgstr_plural)):
					if j==0:
						writefile.write('msgstr[1] '+self.records[i].msgstr_plural[j])
					else:
						writefile.write(self.records[i].msgstr_plural[j])
			else:
				for j in range(0,len(self.records[i].msgstr)):
					if j==0:
						writefile.write('msgstr '+self.records[i].msgstr[j])
					else:
						writefile.write(self.records[i].msgstr[j])
		writefile.close()
			
	def fillrecordswithdata(self):
		numberoflines=len(self.dataraw.lines)-1
		lines=[]
		linecount=0	
		recordcount=0
		alldone=False
		while not alldone:
			nextline=self.dataraw.lines[linecount]
			if linecount==numberoflines:
				lines.append(nextline)
			if nextline.strip()=="" or linecount==numberoflines:
				#save to record
				self.records.append(recorditem(recordcount))
				laststatus="unknown"
				for i in range(0,len(lines)):
					line=lines[i]
					if line.startswith("#"):
						laststatus="comment"
					elif line.startswith("msgid "):
						laststatus="msgid"
						line=line.split("msgid ")[1]
					elif line.startswith("msgstr "):
						laststatus="msgstr"
						line=line.split("msgstr ")[1]
					elif line.startswith("msgid_plural "):
						laststatus="msgid_plural"
						line=line.split("msgid_plural ")[1]
						self.records[recordcount].is_plural=True
					elif line.startswith("msgstr[0] "):
						laststatus="msgstr"
						line=line.split("msgstr[0] ")[1]
					elif line.startswith("msgstr[1] "):
						laststatus="msgstr_plural"
						line=line.split("msgstr[1] ")[1]

					if laststatus=="comment":
						self.records[recordcount].comments.append(line)	
					elif laststatus=="msgid":
						self.records[recordcount].msgid.append(line)	
					elif laststatus=="msgstr":
						self.records[recordcount].msgstr.append(line)	
					elif laststatus=="msgid_plural":
						self.records[recordcount].msgid_plural.append(line)	
					elif laststatus=="msgstr_plural":
						self.records[recordcount].msgstr_plural.append(line)	
				
				recordcount=recordcount+1	
				print "found record "+str(recordcount)+" at line "+str(linecount)+"/"+str(numberoflines)+" of "+self.dataraw.path
				lines=[]
			else:
				lines.append(nextline)
			
			linecount=linecount+1
			if linecount>numberoflines:
				alldone=True
		
class rawdata:
	def __init__(self, path):
		self.path=path
		self.lines=[]
		f=open(path,'r')
		while True:
			testline=f.readline().encode('utf-8')
			if len(testline)==0:
				break
			self.lines.append(testline)
		f.close()

def main(outputpath, merge1path, merge2path):
	data1=translateitems()
	data1.dataraw=rawdata(merge1path)
	data1.fillrecordswithdata()
	data2=translateitems()
	data2.dataraw=rawdata(merge2path)
	data2.fillrecordswithdata()
	data3=translateitems()
	
	for i in range(0,len(data1.records)):
		comments1= data1.records[i].comments
		is_plural1=data1.records[i].is_plural
		msgid1= data1.records[i].msgid
		msgid_plural1= data1.records[i].msgid_plural
		msgstr1= data1.records[i].msgstr
		msgstr_plural1= data1.records[i].msgstr_plural
		
		comments2= data2.records[i].comments
		is_plural2=data2.records[i].is_plural
		msgid2= data2.records[i].msgid
		msgid_plural2= data2.records[i].msgid_plural
		msgstr2= data2.records[i].msgstr
		msgstr_plural2= data2.records[i].msgstr_plural
		
		chosen=1
		if msgstr1<>msgstr2 or msgstr_plural1<>msgstr_plural2 or comments1<>comments2 or msgid1<>msgid2 or msgid_plural1<>msgid_plural2:
			print "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
			if len(msgstr1)==1 and str(msgstr1[0]).strip()=='""':
				chosen=2
				print '1='+str(msgstr1).strip()
				print '2='+str(msgstr2).strip()
				print 'auto-chose:2'
			elif len(msgstr2)==1 and str(msgstr2[0]).strip()=='""':
				chosen=1
				print '1='+str(msgstr1).strip()
				print '2='+str(msgstr2).strip()
				print 'auto-chose:1'
			else:				
				print "1:"
				for j in range(0,len(comments1)):
					print comments1[j].strip().decode('utf-8')
				for j in range(0,len(msgid1)):
					if j==0:
						print 'msgid '+ msgid1[j].strip().decode('utf-8')
					else:
						print msgid1[j].strip().decode('utf-8')
				if is_plural1:
					for j in range(0,len(msgid_plural1)):
						if j==0:
							print 'msgid_plural ' + msgid_plural1[j].strip().decode('utf-8')
						else:
							print msgid_plural1[j].strip().decode('utf-8')
					for j in range(0,len(msgstr1)):
						if j==0:
							print 'msgstr[0] ' + msgstr1[j].strip().decode('utf-8')
						else:
							print msgstr1[j].strip().decode('utf-8')
					for j in range(0,len(msgstr_plural1)):
						if j==0:
							print 'msgstr[1] ' + msgstr_plural1[j].strip().decode('utf-8')
						else:
							print msgstr_plural1[j].strip().decode('utf-8')
				else:
					for j in range(0,len(msgstr1)):
						if j==0:
							print 'msgstr '+ msgstr1[j].strip().decode('utf-8')
						else:
							print msgstr1[j].strip().decode('utf-8')
				print
				print "2:"
				for j in range(0,len(comments2)):
					print comments2[j].strip().decode('utf-8')
				for j in range(0,len(msgid2)):
					if j==0:
						print 'msgid '+ msgid2[j].strip().decode('utf-8')
					else:
						print msgid2[j].strip().decode('utf-8')
				if is_plural2:
					for j in range(0,len(msgid_plural2)):
						if j==0:
							print 'msgid_plural ' + msgid_plural2[j].strip().decode('utf-8')
						else:
							print msgid_plural2[j].strip().decode('utf-8')
					for j in range(0,len(msgstr2)):
						if j==0:
							print 'msgstr[0] ' + msgstr2[j].strip().decode('utf-8')
						else:
							print msgstr2[j].strip().decode('utf-8')
					for j in range(0,len(msgstr_plural2)):
						if j==0:
							print 'msgstr[1] ' + msgstr_plural2[j].strip().decode('utf-8')
						else:
							print msgstr_plural2[j].strip().decode('utf-8')
				else:
					for j in range(0,len(msgstr2)):
						if j==0:
							print 'msgstr '+ msgstr2[j].strip().decode('utf-8')
						else:
							print msgstr2[j].strip().decode('utf-8')
				print
				chosen=int(raw_input('1 or 2? '))
		data3.records.append(recorditem(i))
		if chosen==1:
			data3.records[i]=data1.records[i]
		else:
			data3.records[i]=data2.records[i]
	data3.writetofile(outputpath)
	print
	print 'merge to '+outputpath+' complete'
	
if __name__ == "__main__":
	reload(sys)
	sys.setdefaultencoding("utf-8")
	if len(sys.argv)==4:
		main(sys.argv[1], sys.argv[2], sys.argv[3])
	else:
		print 'usage:'
		print 'python ' + sys.argv[0]+ ' pathtooutputfile.po pathtomerge1file.po pathtomerge2file.po'
		print
		print 'example:'
		print 'python ' + sys.argv[0]+ ' nl.po.merged nl.po downloads/nl.po'
		print 'would take the nl.po in this folder and the nl.po in the downloads folder'
		print 'and would merge the two into a file named nl.po.merged'
		
Example output:

Code: Select all

...
found record 7167 at line 45793/45806 of freecivtranslate/2.4/friend.nl.po
found record 7168 at line 45797/45806 of freecivtranslate/2.4/friend.nl.po
found record 7169 at line 45801/45806 of freecivtranslate/2.4/friend.nl.po
found record 7170 at line 45804/45806 of freecivtranslate/2.4/friend.nl.po
found record 7171 at line 45806/45806 of freecivtranslate/2.4/friend.nl.po
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1:
#: data/helpdata.txt:1470
msgid ""
"There are a few ways to gain advances from other civilizations: you will "
"sometimes discover enemy technology when you capture a city; you can steal "
"advances with Diplomats and Spies; wonders can provide you with technology; "
"and another player might grant technology in the terms of a pact. But "
"otherwise advances must be discovered through the efforts of your own people."
msgstr "yes"

2:
#: data/helpdata.txt:1470
msgid ""
"There are a few ways to gain advances from other civilizations: you will "
"sometimes discover enemy technology when you capture a city; you can steal "
"advances with Diplomats and Spies; wonders can provide you with technology; "
"and another player might grant technology in the terms of a pact. But "
"otherwise advances must be discovered through the efforts of your own people."
msgstr "oh noooooo"

1 or 2? 2
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1=['"really"\n']
2=['""\n']
auto-chose:1
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1=['"ohoh"\n']
2=['""\n']
auto-chose:1
merge to freecivtranslate/2.4/nl.po complete