Updated Pygettext to Python 3.7.9

This commit is contained in:
Manuel Cortez 2021-11-01 10:03:51 -06:00
parent 8a8f1998ac
commit 60a67947e6

View File

@ -1,6 +1,6 @@
#! /usr/bin/env python #! /usr/bin/env python3
# -*- coding: iso-8859-1 -*- # -*- coding: iso-8859-1 -*-
# Originally written by Barry Warsaw <barry@zope.com> # Originally written by Barry Warsaw <barry@python.org>
# #
# Minimally patched to make it even more xgettext compatible # Minimally patched to make it even more xgettext compatible
# by Peter Funk <pf@artcom-gmbh.de> # by Peter Funk <pf@artcom-gmbh.de>
@ -156,14 +156,14 @@ If `inputfile' is -, standard input is read.
""") """)
import os import os
import imp import importlib.machinery
import importlib.util
import sys import sys
import glob import glob
import time import time
import getopt import getopt
import token import token
import tokenize import tokenize
import operator
__version__ = '1.5' __version__ = '1.5'
@ -189,50 +189,51 @@ msgstr ""
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n" "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
"Language-Team: LANGUAGE <LL@li.org>\\n" "Language-Team: LANGUAGE <LL@li.org>\\n"
"MIME-Version: 1.0\\n" "MIME-Version: 1.0\\n"
"Content-Type: text/plain; charset=CHARSET\\n" "Content-Type: text/plain; charset=%(charset)s\\n"
"Content-Transfer-Encoding: ENCODING\\n" "Content-Transfer-Encoding: %(encoding)s\\n"
"Generated-By: pygettext.py %(version)s\\n" "Generated-By: pygettext.py %(version)s\\n"
''') ''')
def usage(code, msg=''): def usage(code, msg=''):
print >> sys.stderr, __doc__ % globals() print(__doc__ % globals(), file=sys.stderr)
if msg: if msg:
print >> sys.stderr, msg print(msg, file=sys.stderr)
sys.exit(code) sys.exit(code)
escapes = [] def make_escapes(pass_nonascii):
global escapes, escape
def make_escapes(pass_iso8859): if pass_nonascii:
global escapes # Allow non-ascii characters to pass through so that e.g. 'msgid
if pass_iso8859:
# Allow iso-8859 characters to pass through so that e.g. 'msgid
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
# escape any character outside the 32..126 range. # escape any character outside the 32..126 range.
mod = 128 mod = 128
escape = escape_ascii
else: else:
mod = 256 mod = 256
for i in range(256): escape = escape_nonascii
if 32 <= (i % mod) <= 126: escapes = [r"\%03o" % i for i in range(mod)]
escapes.append(chr(i)) for i in range(32, 127):
else: escapes[i] = chr(i)
escapes.append("\\%03o" % i) escapes[ord('\\')] = r'\\'
escapes[ord('\\')] = '\\\\' escapes[ord('\t')] = r'\t'
escapes[ord('\t')] = '\\t' escapes[ord('\r')] = r'\r'
escapes[ord('\r')] = '\\r' escapes[ord('\n')] = r'\n'
escapes[ord('\n')] = '\\n' escapes[ord('\"')] = r'\"'
escapes[ord('\"')] = '\\"'
def escape(s): def escape_ascii(s, encoding):
global escapes return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s)
s = list(s)
for i in range(len(s)): def escape_nonascii(s, encoding):
s[i] = escapes[ord(s[i])] return ''.join(escapes[b] for b in s.encode(encoding))
return EMPTYSTRING.join(s)
def is_literal_string(s):
return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')
def safe_eval(s): def safe_eval(s):
@ -240,18 +241,18 @@ def safe_eval(s):
return eval(s, {'__builtins__':{}}, {}) return eval(s, {'__builtins__':{}}, {})
def normalize(s): def normalize(s, encoding):
# This converts the various Python string types into a format that is # This converts the various Python string types into a format that is
# appropriate for .po files, namely much closer to C style. # appropriate for .po files, namely much closer to C style.
lines = s.split('\n') lines = s.split('\n')
if len(lines) == 1: if len(lines) == 1:
s = '"' + escape(s) + '"' s = '"' + escape(s, encoding) + '"'
else: else:
if not lines[-1]: if not lines[-1]:
del lines[-1] del lines[-1]
lines[-1] = lines[-1] + '\n' lines[-1] = lines[-1] + '\n'
for i in range(len(lines)): for i in range(len(lines)):
lines[i] = escape(lines[i]) lines[i] = escape(lines[i], encoding)
lineterm = '\\n"\n"' lineterm = '\\n"\n"'
s = '""\n"' + lineterm.join(lines) + '"' s = '""\n"' + lineterm.join(lines) + '"'
return s return s
@ -262,64 +263,6 @@ def containsAny(str, set):
return 1 in [c in str for c in set] return 1 in [c in str for c in set]
def _visit_pyfiles(list, dirname, names):
"""Helper for getFilesForName()."""
# get extension for python source files
if not globals().has_key('_py_ext'):
global _py_ext
_py_ext = [triple[0] for triple in imp.get_suffixes()
if triple[2] == imp.PY_SOURCE][0]
# don't recurse into CVS directories
if 'CVS' in names:
names.remove('CVS')
# add all *.py files to list
list.extend(
[os.path.join(dirname, file) for file in names
if os.path.splitext(file)[1] == _py_ext]
)
def _get_modpkg_path(dotted_name, pathlist=None):
"""Get the filesystem path for a module or a package.
Return the file system path to a file for a module, and to a directory for
a package. Return None if the name is not found, or is a builtin or
extension module.
"""
# split off top-most name
parts = dotted_name.split('.', 1)
if len(parts) > 1:
# we have a dotted path, import top-level package
try:
file, pathname, description = imp.find_module(parts[0], pathlist)
if file: file.close()
except ImportError:
return None
# check if it's indeed a package
if description[2] == imp.PKG_DIRECTORY:
# recursively handle the remaining name parts
pathname = _get_modpkg_path(parts[1], [pathname])
else:
pathname = None
else:
# plain name
try:
file, pathname, description = imp.find_module(
dotted_name, pathlist)
if file:
file.close()
if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
pathname = None
except ImportError:
pathname = None
return pathname
def getFilesForName(name): def getFilesForName(name):
"""Get a list of module files for a filename, a module or package name, """Get a list of module files for a filename, a module or package name,
or a directory. or a directory.
@ -334,14 +277,28 @@ def getFilesForName(name):
return list return list
# try to find module or package # try to find module or package
name = _get_modpkg_path(name) try:
spec = importlib.util.find_spec(name)
name = spec.origin
except ImportError:
name = None
if not name: if not name:
return [] return []
if os.path.isdir(name): if os.path.isdir(name):
# find all python files in directory # find all python files in directory
list = [] list = []
os.path.walk(name, _visit_pyfiles, list) # get extension for python source files
_py_ext = importlib.machinery.SOURCE_SUFFIXES[0]
for root, dirs, files in os.walk(name):
# don't recurse into CVS directories
if 'CVS' in dirs:
dirs.remove('CVS')
# add all *.py files to list
list.extend(
[os.path.join(root, file) for file in files
if os.path.splitext(file)[1] == _py_ext]
)
return list return list
elif os.path.exists(name): elif os.path.exists(name):
# a single file # a single file
@ -359,12 +316,13 @@ class TokenEater:
self.__lineno = -1 self.__lineno = -1
self.__freshmodule = 1 self.__freshmodule = 1
self.__curfile = None self.__curfile = None
self.__enclosurecount = 0
def __call__(self, ttype, tstring, stup, etup, line): def __call__(self, ttype, tstring, stup, etup, line):
# dispatch # dispatch
## import token ## import token
## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \ ## print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
## 'tstring:', tstring ## file=sys.stderr)
self.__state(ttype, tstring, stup[0]) self.__state(ttype, tstring, stup[0])
def __waiting(self, ttype, tstring, lineno): def __waiting(self, ttype, tstring, lineno):
@ -373,13 +331,13 @@ class TokenEater:
if opts.docstrings and not opts.nodocstrings.get(self.__curfile): if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
# module docstring? # module docstring?
if self.__freshmodule: if self.__freshmodule:
if ttype == tokenize.STRING: if ttype == tokenize.STRING and is_literal_string(tstring):
self.__addentry(safe_eval(tstring), lineno, isdocstring=1) self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__freshmodule = 0 self.__freshmodule = 0
elif ttype not in (tokenize.COMMENT, tokenize.NL): elif ttype not in (tokenize.COMMENT, tokenize.NL):
self.__freshmodule = 0 self.__freshmodule = 0
return return
# class docstring? # class or func/method docstring?
if ttype == tokenize.NAME and tstring in ('class', 'def'): if ttype == tokenize.NAME and tstring in ('class', 'def'):
self.__state = self.__suiteseen self.__state = self.__suiteseen
return return
@ -387,13 +345,19 @@ class TokenEater:
self.__state = self.__keywordseen self.__state = self.__keywordseen
def __suiteseen(self, ttype, tstring, lineno): def __suiteseen(self, ttype, tstring, lineno):
# ignore anything until we see the colon # skip over any enclosure pairs until we see the colon
if ttype == tokenize.OP and tstring == ':': if ttype == tokenize.OP:
if tstring == ':' and self.__enclosurecount == 0:
# we see a colon and we're not in an enclosure: end of def
self.__state = self.__suitedocstring self.__state = self.__suitedocstring
elif tstring in '([{':
self.__enclosurecount += 1
elif tstring in ')]}':
self.__enclosurecount -= 1
def __suitedocstring(self, ttype, tstring, lineno): def __suitedocstring(self, ttype, tstring, lineno):
# ignore any intervening noise # ignore any intervening noise
if ttype == tokenize.STRING: if ttype == tokenize.STRING and is_literal_string(tstring):
self.__addentry(safe_eval(tstring), lineno, isdocstring=1) self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__state = self.__waiting self.__state = self.__waiting
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
@ -418,18 +382,18 @@ class TokenEater:
if self.__data: if self.__data:
self.__addentry(EMPTYSTRING.join(self.__data)) self.__addentry(EMPTYSTRING.join(self.__data))
self.__state = self.__waiting self.__state = self.__waiting
elif ttype == tokenize.STRING: elif ttype == tokenize.STRING and is_literal_string(tstring):
self.__data.append(safe_eval(tstring)) self.__data.append(safe_eval(tstring))
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
token.NEWLINE, tokenize.NL]: token.NEWLINE, tokenize.NL]:
# warn if we see anything else than STRING or whitespace # warn if we see anything else than STRING or whitespace
print >> sys.stderr, _( print(_(
'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
) % { ) % {
'token': tstring, 'token': tstring,
'file': self.__curfile, 'file': self.__curfile,
'lineno': self.__lineno 'lineno': self.__lineno
} }, file=sys.stderr)
self.__state = self.__waiting self.__state = self.__waiting
def __addentry(self, msg, lineno=None, isdocstring=0): def __addentry(self, msg, lineno=None, isdocstring=0):
@ -445,45 +409,41 @@ class TokenEater:
def write(self, fp): def write(self, fp):
options = self.__options options = self.__options
timestamp = time.strftime('%Y-%m-%d %H:%M+%Z') timestamp = time.strftime('%Y-%m-%d %H:%M%z')
# The time stamp in the header doesn't have the same format as that encoding = fp.encoding if fp.encoding else 'UTF-8'
# generated by xgettext... print(pot_header % {'time': timestamp, 'version': __version__,
print >> fp, pot_header % {'time': timestamp, 'version': __version__} 'charset': encoding,
'encoding': '8bit'}, file=fp)
# Sort the entries. First sort each particular entry's keys, then # Sort the entries. First sort each particular entry's keys, then
# sort all the entries by their first item. # sort all the entries by their first item.
reverse = {} reverse = {}
for k, v in self.__messages.items(): for k, v in self.__messages.items():
keys = v.keys() keys = sorted(v.keys())
keys.sort()
reverse.setdefault(tuple(keys), []).append((k, v)) reverse.setdefault(tuple(keys), []).append((k, v))
rkeys = reverse.keys() rkeys = sorted(reverse.keys())
rkeys.sort()
for rkey in rkeys: for rkey in rkeys:
rentries = reverse[rkey] rentries = reverse[rkey]
rentries.sort() rentries.sort()
for k, v in rentries: for k, v in rentries:
isdocstring = 0
# If the entry was gleaned out of a docstring, then add a # If the entry was gleaned out of a docstring, then add a
# comment stating so. This is to aid translators who may wish # comment stating so. This is to aid translators who may wish
# to skip translating some unimportant docstrings. # to skip translating some unimportant docstrings.
if reduce(operator.__add__, v.values()): isdocstring = any(v.values())
isdocstring = 1
# k is the message string, v is a dictionary-set of (filename, # k is the message string, v is a dictionary-set of (filename,
# lineno) tuples. We want to sort the entries in v first by # lineno) tuples. We want to sort the entries in v first by
# file name and then by line number. # file name and then by line number.
v = v.keys() v = sorted(v.keys())
v.sort()
if not options.writelocations: if not options.writelocations:
pass pass
# location comments are different b/w Solaris and GNU: # location comments are different b/w Solaris and GNU:
elif options.locationstyle == options.SOLARIS: elif options.locationstyle == options.SOLARIS:
for filename, lineno in v: for filename, lineno in v:
d = {'filename': filename, 'lineno': lineno} d = {'filename': filename, 'lineno': lineno}
print >>fp, _( print(_(
'# File: %(filename)s, line: %(lineno)d') % d '# File: %(filename)s, line: %(lineno)d') % d, file=fp)
elif options.locationstyle == options.GNU: elif options.locationstyle == options.GNU:
# fit as many locations on one line, as long as the # fit as many locations on one line, as long as the
# resulting line length doesn't exceeds 'options.width' # resulting line length doesn't exceed 'options.width'
locline = '#:' locline = '#:'
for filename, lineno in v: for filename, lineno in v:
d = {'filename': filename, 'lineno': lineno} d = {'filename': filename, 'lineno': lineno}
@ -491,14 +451,14 @@ class TokenEater:
if len(locline) + len(s) <= options.width: if len(locline) + len(s) <= options.width:
locline = locline + s locline = locline + s
else: else:
print >> fp, locline print(locline, file=fp)
locline = "#:" + s locline = "#:" + s
if len(locline) > 2: if len(locline) > 2:
print >> fp, locline print(locline, file=fp)
if isdocstring: if isdocstring:
print >> fp, '#, docstring' print('#, docstring', file=fp)
print >> fp, 'msgid', normalize(k) print('msgid', normalize(k, encoding), file=fp)
print >> fp, 'msgstr ""\n' print('msgstr ""\n', file=fp)
@ -514,7 +474,7 @@ def main():
'style=', 'verbose', 'version', 'width=', 'exclude-file=', 'style=', 'verbose', 'version', 'width=', 'exclude-file=',
'docstrings', 'no-docstrings', 'docstrings', 'no-docstrings',
]) ])
except getopt.error, msg: except getopt.error as msg:
usage(1, msg) usage(1, msg)
# for holding option values # for holding option values
@ -572,7 +532,7 @@ def main():
elif opt in ('-v', '--verbose'): elif opt in ('-v', '--verbose'):
options.verbose = 1 options.verbose = 1
elif opt in ('-V', '--version'): elif opt in ('-V', '--version'):
print _('pygettext.py (xgettext for Python) %s') % __version__ print(_('pygettext.py (xgettext for Python) %s') % __version__)
sys.exit(0) sys.exit(0)
elif opt in ('-w', '--width'): elif opt in ('-w', '--width'):
try: try:
@ -593,7 +553,7 @@ def main():
fp.close() fp.close()
# calculate escapes # calculate escapes
make_escapes(options.escape) make_escapes(not options.escape)
# calculate all keywords # calculate all keywords
options.keywords.extend(default_keywords) options.keywords.extend(default_keywords)
@ -605,8 +565,8 @@ def main():
options.toexclude = fp.readlines() options.toexclude = fp.readlines()
fp.close() fp.close()
except IOError: except IOError:
print >> sys.stderr, _( print(_(
"Can't read --exclude-file: %s") % options.excludefilename "Can't read --exclude-file: %s") % options.excludefilename, file=sys.stderr)
sys.exit(1) sys.exit(1)
else: else:
options.toexclude = [] options.toexclude = []
@ -625,21 +585,24 @@ def main():
for filename in args: for filename in args:
if filename == '-': if filename == '-':
if options.verbose: if options.verbose:
print _('Reading standard input') print(_('Reading standard input'))
fp = sys.stdin fp = sys.stdin.buffer
closep = 0 closep = 0
else: else:
if options.verbose: if options.verbose:
print _('Working on %s') % filename print(_('Working on %s') % filename)
fp = open(filename) fp = open(filename, 'rb')
closep = 1 closep = 1
try: try:
eater.set_filename(filename) eater.set_filename(filename)
try: try:
tokenize.tokenize(fp.readline, eater) tokens = tokenize.tokenize(fp.readline)
except tokenize.TokenError, e: for _token in tokens:
print >> sys.stderr, '%s: %s, line %d, column %d' % ( eater(*_token)
e[0], filename, e[1][0], e[1][1]) except tokenize.TokenError as e:
print('%s: %s, line %d, column %d' % (
e.args[0], filename, e.args[1][0], e.args[1][1]),
file=sys.stderr)
finally: finally:
if closep: if closep:
fp.close() fp.close()
@ -663,7 +626,6 @@ def main():
if __name__ == '__main__': if __name__ == '__main__':
main() main()
# some more test strings # some more test strings
_(u'a unicode string')
# this one creates a warning # this one creates a warning
_('*** Seen unexpected token "%(token)s"') % {'token': 'test'} _('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
_('more' 'than' 'one' 'string') _('more' 'than' 'one' 'string')