#!/usr/bin/env python

# find all .po language template files under the current directory, 
# parse their contents into 1-liners like
#	  source phrase => translated phrase
# and output these to stdout
import sys, os

class Entry:
	"""Represents a dictionary entry of 2 parts -- source phrase and translated
	equivalent
	valid if both source and trans are populated
	string representation is 
	"""
	def valid(self):
		return True if ( self.source and self.trans) else False
	def __str__(self):
		return "%s => %s" % (self.source, self.trans)

filelist = [ ]

# walk down from current directory and add all .po files to our filelist
for (dirpath, dirname, files) in os.walk('.'):
	for file in files:
		if file.endswith('.po'):
			filelist.append(os.path.join(dirpath, file))

results = [ ]

# I don't anything about the formal format of .po files other than what i
# observed by examining a few -- they contain msgid and msgstr elements 
# representing the source phrase and translated phrases respectively.
# they can be multiline and are generally wrapped in quotes
for file in filelist:
	fh = open(file)

	doing_msgid, doing_msgstr = False, False

	for line in fh:
		line = line.rstrip('\n\r')
		if line.startswith('msgid'):
			doing_msgstr = False
			doing_msgid = True
			entry = Entry()
			entry.source = line[6:].strip('"')
		elif line.startswith('msgstr'):
			doing_msgid = False
			doing_msgstr = True
			entry.trans = line[7:].strip('"')
		elif not line:
			doing_msgid = False
			doing_msgstr = False
			if entry:
				results.append(entry)
		elif doing_msgid:
			entry.source += line.strip('"')
		elif doing_msgstr:
			entry.trans += line.strip('"')
	fh.close()

for result in results:
	if result.valid(): print result


