Updated Pygettext to Python 3.7.9

2025-12-19 13:12:39 +01:00 · 2021-11-01 10:03:51 -06:00
parent 8a8f1998ac
commit 60a67947e6
1 changed files with 101 additions and 139 deletions
--- a/tools/pygettext.py
+++ b/tools/pygettext.py
@@ -1,6 +1,6 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
 # -*- coding: iso-8859-1 -*-
-# Originally written by Barry Warsaw <barry@zope.com>
+# Originally written by Barry Warsaw <barry@python.org>
 #
 # Minimally patched to make it even more xgettext compatible
 # by Peter Funk <pf@artcom-gmbh.de>
@@ -156,14 +156,14 @@ If `inputfile' is -, standard input is read.
 """)

 import os
-import imp
+import importlib.machinery
+import importlib.util
 import sys
 import glob
 import time
 import getopt
 import token
 import tokenize
-import operator

 __version__ = '1.5'

@@ -189,50 +189,51 @@ msgstr ""
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
 "Language-Team: LANGUAGE <LL@li.org>\\n"
 "MIME-Version: 1.0\\n"
-"Content-Type: text/plain; charset=CHARSET\\n"
-"Content-Transfer-Encoding: ENCODING\\n"
+"Content-Type: text/plain; charset=%(charset)s\\n"
+"Content-Transfer-Encoding: %(encoding)s\\n"
 "Generated-By: pygettext.py %(version)s\\n"

 ''')


 def usage(code, msg=''):
-    print >> sys.stderr, __doc__ % globals()
+    print(__doc__ % globals(), file=sys.stderr)
    if msg:
-        print >> sys.stderr, msg
+        print(msg, file=sys.stderr)
    sys.exit(code)



-escapes = []
-
-def make_escapes(pass_iso8859):
-    global escapes
-    if pass_iso8859:
-        # Allow iso-8859 characters to pass through so that e.g. 'msgid
+def make_escapes(pass_nonascii):
+    global escapes, escape
+    if pass_nonascii:
+        # Allow non-ascii characters to pass through so that e.g. 'msgid
        # "H<>he"' would result not result in 'msgid "H\366he"'.  Otherwise we
        # escape any character outside the 32..126 range.
        mod = 128
+        escape = escape_ascii
    else:
        mod = 256
-    for i in range(256):
-        if 32 <= (i % mod) <= 126:
-            escapes.append(chr(i))
-        else:
-            escapes.append("\\%03o" % i)
-    escapes[ord('\\')] = '\\\\'
-    escapes[ord('\t')] = '\\t'
-    escapes[ord('\r')] = '\\r'
-    escapes[ord('\n')] = '\\n'
-    escapes[ord('\"')] = '\\"'
+        escape = escape_nonascii
+    escapes = [r"\%03o" % i for i in range(mod)]
+    for i in range(32, 127):
+        escapes[i] = chr(i)
+    escapes[ord('\\')] = r'\\'
+    escapes[ord('\t')] = r'\t'
+    escapes[ord('\r')] = r'\r'
+    escapes[ord('\n')] = r'\n'
+    escapes[ord('\"')] = r'\"'


-def escape(s):
-    global escapes
-    s = list(s)
-    for i in range(len(s)):
-        s[i] = escapes[ord(s[i])]
-    return EMPTYSTRING.join(s)
+def escape_ascii(s, encoding):
+    return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s)
+
+def escape_nonascii(s, encoding):
+    return ''.join(escapes[b] for b in s.encode(encoding))
+
+
+def is_literal_string(s):
+    return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')


 def safe_eval(s):
@@ -240,18 +241,18 @@ def safe_eval(s):
    return eval(s, {'__builtins__':{}}, {})


-def normalize(s):
+def normalize(s, encoding):
    # This converts the various Python string types into a format that is
    # appropriate for .po files, namely much closer to C style.
    lines = s.split('\n')
    if len(lines) == 1:
-        s = '"' + escape(s) + '"'
+        s = '"' + escape(s, encoding) + '"'
    else:
        if not lines[-1]:
            del lines[-1]
            lines[-1] = lines[-1] + '\n'
        for i in range(len(lines)):
-            lines[i] = escape(lines[i])
+            lines[i] = escape(lines[i], encoding)
        lineterm = '\\n"\n"'
        s = '""\n"' + lineterm.join(lines) + '"'
    return s
@@ -262,64 +263,6 @@ def containsAny(str, set):
    return 1 in [c in str for c in set]


-def _visit_pyfiles(list, dirname, names):
-    """Helper for getFilesForName()."""
-    # get extension for python source files
-    if not globals().has_key('_py_ext'):
-        global _py_ext
-        _py_ext = [triple[0] for triple in imp.get_suffixes()
-                   if triple[2] == imp.PY_SOURCE][0]
-
-    # don't recurse into CVS directories
-    if 'CVS' in names:
-        names.remove('CVS')
-
-    # add all *.py files to list
-    list.extend(
-        [os.path.join(dirname, file) for file in names
-         if os.path.splitext(file)[1] == _py_ext]
-        )
-
-
-def _get_modpkg_path(dotted_name, pathlist=None):
-    """Get the filesystem path for a module or a package.
-
-    Return the file system path to a file for a module, and to a directory for
-    a package. Return None if the name is not found, or is a builtin or
-    extension module.
-    """
-    # split off top-most name
-    parts = dotted_name.split('.', 1)
-
-    if len(parts) > 1:
-        # we have a dotted path, import top-level package
-        try:
-            file, pathname, description = imp.find_module(parts[0], pathlist)
-            if file: file.close()
-        except ImportError:
-            return None
-
-        # check if it's indeed a package
-        if description[2] == imp.PKG_DIRECTORY:
-            # recursively handle the remaining name parts
-            pathname = _get_modpkg_path(parts[1], [pathname])
-        else:
-            pathname = None
-    else:
-        # plain name
-        try:
-            file, pathname, description = imp.find_module(
-                dotted_name, pathlist)
-            if file:
-                file.close()
-            if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
-                pathname = None
-        except ImportError:
-            pathname = None
-
-    return pathname
-
-
 def getFilesForName(name):
    """Get a list of module files for a filename, a module or package name,
    or a directory.
@@ -334,14 +277,28 @@ def getFilesForName(name):
            return list

        # try to find module or package
-        name = _get_modpkg_path(name)
+        try:
+            spec = importlib.util.find_spec(name)
+            name = spec.origin
+        except ImportError:
+            name = None
        if not name:
            return []

    if os.path.isdir(name):
        # find all python files in directory
        list = []
-        os.path.walk(name, _visit_pyfiles, list)
+        # get extension for python source files
+        _py_ext = importlib.machinery.SOURCE_SUFFIXES[0]
+        for root, dirs, files in os.walk(name):
+            # don't recurse into CVS directories
+            if 'CVS' in dirs:
+                dirs.remove('CVS')
+            # add all *.py files to list
+            list.extend(
+                [os.path.join(root, file) for file in files
+                 if os.path.splitext(file)[1] == _py_ext]
+                )
        return list
    elif os.path.exists(name):
        # a single file
@@ -359,12 +316,13 @@ class TokenEater:
        self.__lineno = -1
        self.__freshmodule = 1
        self.__curfile = None
+        self.__enclosurecount = 0

    def __call__(self, ttype, tstring, stup, etup, line):
        # dispatch
 ##        import token
-##        print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
-##              'tstring:', tstring
+##        print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
+##              file=sys.stderr)
        self.__state(ttype, tstring, stup[0])

    def __waiting(self, ttype, tstring, lineno):
@@ -373,13 +331,13 @@ class TokenEater:
        if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
            # module docstring?
            if self.__freshmodule:
-                if ttype == tokenize.STRING:
+                if ttype == tokenize.STRING and is_literal_string(tstring):
                    self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
                    self.__freshmodule = 0
                elif ttype not in (tokenize.COMMENT, tokenize.NL):
                    self.__freshmodule = 0
                return
-            # class docstring?
+            # class or func/method docstring?
            if ttype == tokenize.NAME and tstring in ('class', 'def'):
                self.__state = self.__suiteseen
                return
@@ -387,13 +345,19 @@ class TokenEater:
            self.__state = self.__keywordseen

    def __suiteseen(self, ttype, tstring, lineno):
-        # ignore anything until we see the colon
-        if ttype == tokenize.OP and tstring == ':':
-            self.__state = self.__suitedocstring
+        # skip over any enclosure pairs until we see the colon
+        if ttype == tokenize.OP:
+            if tstring == ':' and self.__enclosurecount == 0:
+                # we see a colon and we're not in an enclosure: end of def
+                self.__state = self.__suitedocstring
+            elif tstring in '([{':
+                self.__enclosurecount += 1
+            elif tstring in ')]}':
+                self.__enclosurecount -= 1

    def __suitedocstring(self, ttype, tstring, lineno):
        # ignore any intervening noise
-        if ttype == tokenize.STRING:
+        if ttype == tokenize.STRING and is_literal_string(tstring):
            self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
            self.__state = self.__waiting
        elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
@@ -418,18 +382,18 @@ class TokenEater:
            if self.__data:
                self.__addentry(EMPTYSTRING.join(self.__data))
            self.__state = self.__waiting
-        elif ttype == tokenize.STRING:
+        elif ttype == tokenize.STRING and is_literal_string(tstring):
            self.__data.append(safe_eval(tstring))
        elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
                           token.NEWLINE, tokenize.NL]:
            # warn if we see anything else than STRING or whitespace
-            print >> sys.stderr, _(
+            print(_(
                '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
                ) % {
                'token': tstring,
                'file': self.__curfile,
                'lineno': self.__lineno
-                }
+                }, file=sys.stderr)
            self.__state = self.__waiting

    def __addentry(self, msg, lineno=None, isdocstring=0):
@@ -445,45 +409,41 @@ class TokenEater:

    def write(self, fp):
        options = self.__options
-        timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
-        # The time stamp in the header doesn't have the same format as that
-        # generated by xgettext...
-        print >> fp, pot_header % {'time': timestamp, 'version': __version__}
+        timestamp = time.strftime('%Y-%m-%d %H:%M%z')
+        encoding = fp.encoding if fp.encoding else 'UTF-8'
+        print(pot_header % {'time': timestamp, 'version': __version__,
+                            'charset': encoding,
+                            'encoding': '8bit'}, file=fp)
        # Sort the entries.  First sort each particular entry's keys, then
        # sort all the entries by their first item.
        reverse = {}
        for k, v in self.__messages.items():
-            keys = v.keys()
-            keys.sort()
+            keys = sorted(v.keys())
            reverse.setdefault(tuple(keys), []).append((k, v))
-        rkeys = reverse.keys()
-        rkeys.sort()
+        rkeys = sorted(reverse.keys())
        for rkey in rkeys:
            rentries = reverse[rkey]
            rentries.sort()
            for k, v in rentries:
-                isdocstring = 0
                # If the entry was gleaned out of a docstring, then add a
                # comment stating so.  This is to aid translators who may wish
                # to skip translating some unimportant docstrings.
-                if reduce(operator.__add__, v.values()):
-                    isdocstring = 1
+                isdocstring = any(v.values())
                # k is the message string, v is a dictionary-set of (filename,
                # lineno) tuples.  We want to sort the entries in v first by
                # file name and then by line number.
-                v = v.keys()
-                v.sort()
+                v = sorted(v.keys())
                if not options.writelocations:
                    pass
                # location comments are different b/w Solaris and GNU:
                elif options.locationstyle == options.SOLARIS:
                    for filename, lineno in v:
                        d = {'filename': filename, 'lineno': lineno}
-                        print >>fp, _(
-                            '# File: %(filename)s, line: %(lineno)d') % d
+                        print(_(
+                            '# File: %(filename)s, line: %(lineno)d') % d, file=fp)
                elif options.locationstyle == options.GNU:
                    # fit as many locations on one line, as long as the
-                    # resulting line length doesn't exceeds 'options.width'
+                    # resulting line length doesn't exceed 'options.width'
                    locline = '#:'
                    for filename, lineno in v:
                        d = {'filename': filename, 'lineno': lineno}
@@ -491,14 +451,14 @@ class TokenEater:
                        if len(locline) + len(s) <= options.width:
                            locline = locline + s
                        else:
-                            print >> fp, locline
+                            print(locline, file=fp)
                            locline = "#:" + s
                    if len(locline) > 2:
-                        print >> fp, locline
+                        print(locline, file=fp)
                if isdocstring:
-                    print >> fp, '#, docstring'
-                print >> fp, 'msgid', normalize(k)
-                print >> fp, 'msgstr ""\n'
+                    print('#, docstring', file=fp)
+                print('msgid', normalize(k, encoding), file=fp)
+                print('msgstr ""\n', file=fp)



@@ -514,7 +474,7 @@ def main():
             'style=', 'verbose', 'version', 'width=', 'exclude-file=',
             'docstrings', 'no-docstrings',
             ])
-    except getopt.error, msg:
+    except getopt.error as msg:
        usage(1, msg)

    # for holding option values
@@ -572,7 +532,7 @@ def main():
        elif opt in ('-v', '--verbose'):
            options.verbose = 1
        elif opt in ('-V', '--version'):
-            print _('pygettext.py (xgettext for Python) %s') % __version__
+            print(_('pygettext.py (xgettext for Python) %s') % __version__)
            sys.exit(0)
        elif opt in ('-w', '--width'):
            try:
@@ -593,7 +553,7 @@ def main():
                fp.close()

    # calculate escapes
-    make_escapes(options.escape)
+    make_escapes(not options.escape)

    # calculate all keywords
    options.keywords.extend(default_keywords)
@@ -605,8 +565,8 @@ def main():
            options.toexclude = fp.readlines()
            fp.close()
        except IOError:
-            print >> sys.stderr, _(
-                "Can't read --exclude-file: %s") % options.excludefilename
+            print(_(
+                "Can't read --exclude-file: %s") % options.excludefilename, file=sys.stderr)
            sys.exit(1)
    else:
        options.toexclude = []
@@ -625,21 +585,24 @@ def main():
    for filename in args:
        if filename == '-':
            if options.verbose:
-                print _('Reading standard input')
-            fp = sys.stdin
+                print(_('Reading standard input'))
+            fp = sys.stdin.buffer
            closep = 0
        else:
            if options.verbose:
-                print _('Working on %s') % filename
-            fp = open(filename)
+                print(_('Working on %s') % filename)
+            fp = open(filename, 'rb')
            closep = 1
        try:
            eater.set_filename(filename)
            try:
-                tokenize.tokenize(fp.readline, eater)
-            except tokenize.TokenError, e:
-                print >> sys.stderr, '%s: %s, line %d, column %d' % (
-                    e[0], filename, e[1][0], e[1][1])
+                tokens = tokenize.tokenize(fp.readline)
+                for _token in tokens:
+                    eater(*_token)
+            except tokenize.TokenError as e:
+                print('%s: %s, line %d, column %d' % (
+                    e.args[0], filename, e.args[1][0], e.args[1][1]),
+                    file=sys.stderr)
        finally:
            if closep:
                fp.close()
@@ -663,7 +626,6 @@ def main():
 if __name__ == '__main__':
    main()
    # some more test strings
-    _(u'a unicode string')
    # this one creates a warning
    _('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
    _('more' 'than' 'one' 'string')