@@ -1,631 +0,0 @@
#! /usr/bin/env python3
# -*- coding: iso-8859-1 -*-
# Originally written by Barry Warsaw <barry@python.org>
#
# Minimally patched to make it even more xgettext compatible
# by Peter Funk <pf@artcom-gmbh.de>
#
# 2002-11-22 J<> rgen Hermann <jh@web.de>
# Added checks that _() only contains string literals, and
# command line args are resolved to module lists, i.e. you
# can now pass a filename, a module or package name, or a
# directory (including globbing chars, important for Win32).
# Made docstring fit in 80 chars wide displays using pydoc.
#
# for selftesting
try :
import fintl
_ = fintl . gettext
except ImportError :
_ = lambda s : s
__doc__ = _ ( """ pygettext -- Python equivalent of xgettext(1)
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
internationalization of C programs. Most of these tools are independent of
the programming language and can be used from within Python programs.
Martin von Loewis ' work[1] helps considerably in this regard.
There ' s one problem though; xgettext is the program that scans source code
looking for message strings, but it groks only C (or C++). Python
introduces a few wrinkles, such as dual quoting characters, triple quoted
strings, and raw strings. xgettext understands none of this.
Enter pygettext, which uses Python ' s standard tokenize module to scan
Python source code, generating .pot files identical to what GNU xgettext[2]
generates for C and C++ code. From there, the standard GNU tools can be
used.
A word about marking Python strings as candidates for translation. GNU
xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
and gettext_noop. But those can be a lot of text to include all over your
code. C and C++ have a trick: they use the C preprocessor. Most
internationalized C source includes a #define for gettext() to _() so that
what has to be written in the source is much less. Thus these are both
translatable strings:
gettext( " Translatable String " )
_( " Translatable String " )
Python of course has no preprocessor so this doesn ' t work so well. Thus,
pygettext searches only for _() by default, but see the -k/--keyword flag
below for how to augment this.
[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
[2] http://www.gnu.org/software/gettext/gettext.html
NOTE: pygettext attempts to be option and feature compatible with GNU
xgettext where ever possible. However some options are still missing or are
not fully implemented. Also, xgettext ' s use of command line switches with
option arguments is broken, and in these cases, pygettext just defines
additional switches.
Usage: pygettext [options] inputfile ...
Options:
-a
--extract-all
Extract all strings.
-d name
--default-domain=name
Rename the default output file from messages.pot to name.pot.
-E
--escape
Replace non-ASCII characters with octal escape sequences.
-D
--docstrings
Extract module, class, method, and function docstrings. These do
not need to be wrapped in _() markers, and in fact cannot be for
Python to consider them docstrings. (See also the -X option).
-h
--help
Print this help message and exit.
-k word
--keyword=word
Keywords to look for in addition to the default set, which are:
%(DEFAULTKEYWORDS)s
You can have multiple -k flags on the command line.
-K
--no-default-keywords
Disable the default set of keywords (see above). Any keywords
explicitly added with the -k/--keyword option are still recognized.
--no-location
Do not write filename/lineno location comments.
-n
--add-location
Write filename/lineno location comments indicating where each
extracted string is found in the source. These lines appear before
each msgid. The style of comments is controlled by the -S/--style
option. This is the default.
-o filename
--output=filename
Rename the default output file from messages.pot to filename. If
filename is `- ' then the output is sent to standard out.
-p dir
--output-dir=dir
Output files will be placed in directory dir.
-S stylename
--style stylename
Specify which style to use for location comments. Two styles are
supported:
Solaris # File: filename, line: line-number
GNU #: filename:line
The style name is case insensitive. GNU style is the default.
-v
--verbose
Print the names of the files being processed.
-V
--version
Print the version of pygettext and exit.
-w columns
--width=columns
Set width of output to columns.
-x filename
--exclude-file=filename
Specify a file that contains a list of strings that are not be
extracted from the input files. Each string to be excluded must
appear on a line by itself in the file.
-X filename
--no-docstrings=filename
Specify a file that contains a list of files (one per line) that
should not have their docstrings extracted. This is only useful in
conjunction with the -D option above.
If `inputfile ' is -, standard input is read.
""" )
import os
import importlib . machinery
import importlib . util
import sys
import glob
import time
import getopt
import token
import tokenize
__version__ = ' 1.5 '
default_keywords = [ ' _ ' ]
DEFAULTKEYWORDS = ' , ' . join ( default_keywords )
EMPTYSTRING = ' '
# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
# there.
pot_header = _ ( ''' \
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR ORGANIZATION
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
msgid " "
msgstr " "
" Project-Id-Version: PACKAGE VERSION \\ n "
" POT-Creation-Date: %(time)s \\ n "
" PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE \\ n "
" Last-Translator: FULL NAME <EMAIL@ADDRESS> \\ n "
" Language-Team: LANGUAGE <LL@li.org> \\ n "
" MIME-Version: 1.0 \\ n "
" Content-Type: text/plain; charset= %(charset)s \\ n "
" Content-Transfer-Encoding: %(encoding)s \\ n "
" Generated-By: pygettext.py %(version)s \\ n "
''' )
def usage ( code , msg = ' ' ) :
print ( __doc__ % globals ( ) , file = sys . stderr )
if msg :
print ( msg , file = sys . stderr )
sys . exit ( code )
def make_escapes ( pass_nonascii ) :
global escapes , escape
if pass_nonascii :
# Allow non-ascii characters to pass through so that e.g. 'msgid
# "H<> he"' would result not result in 'msgid "H\366he"'. Otherwise we
# escape any character outside the 32..126 range.
mod = 128
escape = escape_ascii
else :
mod = 256
escape = escape_nonascii
escapes = [ r " \ %03o " % i for i in range ( mod ) ]
for i in range ( 32 , 127 ) :
escapes [ i ] = chr ( i )
escapes [ ord ( ' \\ ' ) ] = r ' \\ '
escapes [ ord ( ' \t ' ) ] = r ' \ t '
escapes [ ord ( ' \r ' ) ] = r ' \ r '
escapes [ ord ( ' \n ' ) ] = r ' \ n '
escapes [ ord ( ' \" ' ) ] = r ' \ " '
def escape_ascii ( s , encoding ) :
return ' ' . join ( escapes [ ord ( c ) ] if ord ( c ) < 128 else c for c in s )
def escape_nonascii ( s , encoding ) :
return ' ' . join ( escapes [ b ] for b in s . encode ( encoding ) )
def is_literal_string ( s ) :
return s [ 0 ] in ' \' " ' or ( s [ 0 ] in ' rRuU ' and s [ 1 ] in ' \' " ' )
def safe_eval ( s ) :
# unwrap quotes, safely
return eval ( s , { ' __builtins__ ' : { } } , { } )
def normalize ( s , encoding ) :
# This converts the various Python string types into a format that is
# appropriate for .po files, namely much closer to C style.
lines = s . split ( ' \n ' )
if len ( lines ) == 1 :
s = ' " ' + escape ( s , encoding ) + ' " '
else :
if not lines [ - 1 ] :
del lines [ - 1 ]
lines [ - 1 ] = lines [ - 1 ] + ' \n '
for i in range ( len ( lines ) ) :
lines [ i ] = escape ( lines [ i ] , encoding )
lineterm = ' \\ n " \n " '
s = ' " " \n " ' + lineterm . join ( lines ) + ' " '
return s
def containsAny ( str , set ) :
""" Check whether ' str ' contains ANY of the chars in ' set ' """
return 1 in [ c in str for c in set ]
def getFilesForName ( name ) :
""" Get a list of module files for a filename, a module or package name,
or a directory.
"""
if not os . path . exists ( name ) :
# check for glob chars
if containsAny ( name , " *?[] " ) :
files = glob . glob ( name )
list = [ ]
for file in files :
list . extend ( getFilesForName ( file ) )
return list
# try to find module or package
try :
spec = importlib . util . find_spec ( name )
name = spec . origin
except ImportError :
name = None
if not name :
return [ ]
if os . path . isdir ( name ) :
# find all python files in directory
list = [ ]
# get extension for python source files
_py_ext = importlib . machinery . SOURCE_SUFFIXES [ 0 ]
for root , dirs , files in os . walk ( name ) :
# don't recurse into CVS directories
if ' CVS ' in dirs :
dirs . remove ( ' CVS ' )
# add all *.py files to list
list . extend (
[ os . path . join ( root , file ) for file in files
if os . path . splitext ( file ) [ 1 ] == _py_ext ]
)
return list
elif os . path . exists ( name ) :
# a single file
return [ name ]
return [ ]
class TokenEater :
def __init__ ( self , options ) :
self . __options = options
self . __messages = { }
self . __state = self . __waiting
self . __data = [ ]
self . __lineno = - 1
self . __freshmodule = 1
self . __curfile = None
self . __enclosurecount = 0
def __call__ ( self , ttype , tstring , stup , etup , line ) :
# dispatch
## import token
## print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
## file=sys.stderr)
self . __state ( ttype , tstring , stup [ 0 ] )
def __waiting ( self , ttype , tstring , lineno ) :
opts = self . __options
# Do docstring extractions, if enabled
if opts . docstrings and not opts . nodocstrings . get ( self . __curfile ) :
# module docstring?
if self . __freshmodule :
if ttype == tokenize . STRING and is_literal_string ( tstring ) :
self . __addentry ( safe_eval ( tstring ) , lineno , isdocstring = 1 )
self . __freshmodule = 0
elif ttype not in ( tokenize . COMMENT , tokenize . NL ) :
self . __freshmodule = 0
return
# class or func/method docstring?
if ttype == tokenize . NAME and tstring in ( ' class ' , ' def ' ) :
self . __state = self . __suiteseen
return
if ttype == tokenize . NAME and tstring in opts . keywords :
self . __state = self . __keywordseen
def __suiteseen ( self , ttype , tstring , lineno ) :
# skip over any enclosure pairs until we see the colon
if ttype == tokenize . OP :
if tstring == ' : ' and self . __enclosurecount == 0 :
# we see a colon and we're not in an enclosure: end of def
self . __state = self . __suitedocstring
elif tstring in ' ([ { ' :
self . __enclosurecount + = 1
elif tstring in ' )]} ' :
self . __enclosurecount - = 1
def __suitedocstring ( self , ttype , tstring , lineno ) :
# ignore any intervening noise
if ttype == tokenize . STRING and is_literal_string ( tstring ) :
self . __addentry ( safe_eval ( tstring ) , lineno , isdocstring = 1 )
self . __state = self . __waiting
elif ttype not in ( tokenize . NEWLINE , tokenize . INDENT ,
tokenize . COMMENT ) :
# there was no class docstring
self . __state = self . __waiting
def __keywordseen ( self , ttype , tstring , lineno ) :
if ttype == tokenize . OP and tstring == ' ( ' :
self . __data = [ ]
self . __lineno = lineno
self . __state = self . __openseen
else :
self . __state = self . __waiting
def __openseen ( self , ttype , tstring , lineno ) :
if ttype == tokenize . OP and tstring == ' ) ' :
# We've seen the last of the translatable strings. Record the
# line number of the first line of the strings and update the list
# of messages seen. Reset state for the next batch. If there
# were no strings inside _(), then just ignore this entry.
if self . __data :
self . __addentry ( EMPTYSTRING . join ( self . __data ) )
self . __state = self . __waiting
elif ttype == tokenize . STRING and is_literal_string ( tstring ) :
self . __data . append ( safe_eval ( tstring ) )
elif ttype not in [ tokenize . COMMENT , token . INDENT , token . DEDENT ,
token . NEWLINE , tokenize . NL ] :
# warn if we see anything else than STRING or whitespace
print ( _ (
' *** %(file)s : %(lineno)s : Seen unexpected token " %(token)s " '
) % {
' token ' : tstring ,
' file ' : self . __curfile ,
' lineno ' : self . __lineno
} , file = sys . stderr )
self . __state = self . __waiting
def __addentry ( self , msg , lineno = None , isdocstring = 0 ) :
if lineno is None :
lineno = self . __lineno
if not msg in self . __options . toexclude :
entry = ( self . __curfile , lineno )
self . __messages . setdefault ( msg , { } ) [ entry ] = isdocstring
def set_filename ( self , filename ) :
self . __curfile = filename
self . __freshmodule = 1
def write ( self , fp ) :
options = self . __options
timestamp = time . strftime ( ' % Y- % m- %d % H: % M % z ' )
encoding = fp . encoding if fp . encoding else ' UTF-8 '
print ( pot_header % { ' time ' : timestamp , ' version ' : __version__ ,
' charset ' : encoding ,
' encoding ' : ' 8bit ' } , file = fp )
# Sort the entries. First sort each particular entry's keys, then
# sort all the entries by their first item.
reverse = { }
for k , v in self . __messages . items ( ) :
keys = sorted ( v . keys ( ) )
reverse . setdefault ( tuple ( keys ) , [ ] ) . append ( ( k , v ) )
rkeys = sorted ( reverse . keys ( ) )
for rkey in rkeys :
rentries = reverse [ rkey ]
rentries . sort ( )
for k , v in rentries :
# If the entry was gleaned out of a docstring, then add a
# comment stating so. This is to aid translators who may wish
# to skip translating some unimportant docstrings.
isdocstring = any ( v . values ( ) )
# k is the message string, v is a dictionary-set of (filename,
# lineno) tuples. We want to sort the entries in v first by
# file name and then by line number.
v = sorted ( v . keys ( ) )
if not options . writelocations :
pass
# location comments are different b/w Solaris and GNU:
elif options . locationstyle == options . SOLARIS :
for filename , lineno in v :
d = { ' filename ' : filename , ' lineno ' : lineno }
print ( _ (
' # File: %(filename)s , line: %(lineno)d ' ) % d , file = fp )
elif options . locationstyle == options . GNU :
# fit as many locations on one line, as long as the
# resulting line length doesn't exceed 'options.width'
locline = ' #: '
for filename , lineno in v :
d = { ' filename ' : filename , ' lineno ' : lineno }
s = _ ( ' %(filename)s : %(lineno)d ' ) % d
if len ( locline ) + len ( s ) < = options . width :
locline = locline + s
else :
print ( locline , file = fp )
locline = " #: " + s
if len ( locline ) > 2 :
print ( locline , file = fp )
if isdocstring :
print ( ' #, docstring ' , file = fp )
print ( ' msgid ' , normalize ( k , encoding ) , file = fp )
print ( ' msgstr " " \n ' , file = fp )
def main ( ) :
global default_keywords
try :
opts , args = getopt . getopt (
sys . argv [ 1 : ] ,
' ad:DEhk:Kno:p:S:Vvw:x:X: ' ,
[ ' extract-all ' , ' default-domain= ' , ' escape ' , ' help ' ,
' keyword= ' , ' no-default-keywords ' ,
' add-location ' , ' no-location ' , ' output= ' , ' output-dir= ' ,
' style= ' , ' verbose ' , ' version ' , ' width= ' , ' exclude-file= ' ,
' docstrings ' , ' no-docstrings ' ,
] )
except getopt . error as msg :
usage ( 1 , msg )
# for holding option values
class Options :
# constants
GNU = 1
SOLARIS = 2
# defaults
extractall = 0 # FIXME: currently this option has no effect at all.
escape = 0
keywords = [ ]
outpath = ' '
outfile = ' messages.pot '
writelocations = 1
locationstyle = GNU
verbose = 0
width = 78
excludefilename = ' '
docstrings = 0
nodocstrings = { }
options = Options ( )
locations = { ' gnu ' : options . GNU ,
' solaris ' : options . SOLARIS ,
}
# parse options
for opt , arg in opts :
if opt in ( ' -h ' , ' --help ' ) :
usage ( 0 )
elif opt in ( ' -a ' , ' --extract-all ' ) :
options . extractall = 1
elif opt in ( ' -d ' , ' --default-domain ' ) :
options . outfile = arg + ' .pot '
elif opt in ( ' -E ' , ' --escape ' ) :
options . escape = 1
elif opt in ( ' -D ' , ' --docstrings ' ) :
options . docstrings = 1
elif opt in ( ' -k ' , ' --keyword ' ) :
options . keywords . append ( arg )
elif opt in ( ' -K ' , ' --no-default-keywords ' ) :
default_keywords = [ ]
elif opt in ( ' -n ' , ' --add-location ' ) :
options . writelocations = 1
elif opt in ( ' --no-location ' , ) :
options . writelocations = 0
elif opt in ( ' -S ' , ' --style ' ) :
options . locationstyle = locations . get ( arg . lower ( ) )
if options . locationstyle is None :
usage ( 1 , _ ( ' Invalid value for --style: %s ' ) % arg )
elif opt in ( ' -o ' , ' --output ' ) :
options . outfile = arg
elif opt in ( ' -p ' , ' --output-dir ' ) :
options . outpath = arg
elif opt in ( ' -v ' , ' --verbose ' ) :
options . verbose = 1
elif opt in ( ' -V ' , ' --version ' ) :
print ( _ ( ' pygettext.py (xgettext for Python) %s ' ) % __version__ )
sys . exit ( 0 )
elif opt in ( ' -w ' , ' --width ' ) :
try :
options . width = int ( arg )
except ValueError :
usage ( 1 , _ ( ' --width argument must be an integer: %s ' ) % arg )
elif opt in ( ' -x ' , ' --exclude-file ' ) :
options . excludefilename = arg
elif opt in ( ' -X ' , ' --no-docstrings ' ) :
fp = open ( arg )
try :
while 1 :
line = fp . readline ( )
if not line :
break
options . nodocstrings [ line [ : - 1 ] ] = 1
finally :
fp . close ( )
# calculate escapes
make_escapes ( not options . escape )
# calculate all keywords
options . keywords . extend ( default_keywords )
# initialize list of strings to exclude
if options . excludefilename :
try :
fp = open ( options . excludefilename )
options . toexclude = fp . readlines ( )
fp . close ( )
except IOError :
print ( _ (
" Can ' t read --exclude-file: %s " ) % options . excludefilename , file = sys . stderr )
sys . exit ( 1 )
else :
options . toexclude = [ ]
# resolve args to module lists
expanded = [ ]
for arg in args :
if arg == ' - ' :
expanded . append ( arg )
else :
expanded . extend ( getFilesForName ( arg ) )
args = expanded
# slurp through all the files
eater = TokenEater ( options )
for filename in args :
if filename == ' - ' :
if options . verbose :
print ( _ ( ' Reading standard input ' ) )
fp = sys . stdin . buffer
closep = 0
else :
if options . verbose :
print ( _ ( ' Working on %s ' ) % filename )
fp = open ( filename , ' rb ' )
closep = 1
try :
eater . set_filename ( filename )
try :
tokens = tokenize . tokenize ( fp . readline )
for _token in tokens :
eater ( * _token )
except tokenize . TokenError as e :
print ( ' %s : %s , line %d , column %d ' % (
e . args [ 0 ] , filename , e . args [ 1 ] [ 0 ] , e . args [ 1 ] [ 1 ] ) ,
file = sys . stderr )
finally :
if closep :
fp . close ( )
# write the output
if options . outfile == ' - ' :
fp = sys . stdout
closep = 0
else :
if options . outpath :
options . outfile = os . path . join ( options . outpath , options . outfile )
fp = open ( options . outfile , ' w ' )
closep = 1
try :
eater . write ( fp )
finally :
if closep :
fp . close ( )
if __name__ == ' __main__ ' :
main ( )
# some more test strings
# this one creates a warning
_ ( ' *** Seen unexpected token " %(token)s " ' ) % { ' token ' : ' test ' }
_ ( ' more ' ' than ' ' one ' ' string ' )