Actually add lib directory

This commit is contained in:
Samuel Sloniker 2022-11-30 14:27:12 -08:00
parent b1ac23690c
commit 314ea65427
5 changed files with 2733 additions and 0 deletions

17
lib/console.py Normal file
View File

@ -0,0 +1,17 @@
def read(console, buffer):
char = console.read(1)
if char == b'\x08':
if buffer:
console.write(b'\x08 \x08')
buffer.pop()
return
elif char == b'\n':
console.write(b'\n')
data = b''.join(buffer)
buffer.clear()
return data
elif len(char) == 1:
console.write(char)
buffer.append(char)
else:
return None

1678
lib/optparse.py Normal file

File diff suppressed because it is too large Load Diff

349
lib/shlex.py Normal file
View File

@ -0,0 +1,349 @@
"""A lexical analyzer class for simple shell-like syntaxes."""
# Module and documentation by Eric S. Raymond, 21 Dec 1998
# Input stacking and error message cleanup added by ESR, March 2000
# push_source() and pop_source() made explicit by ESR, January 2001.
# Posix compliance, split(), string arguments, and
# iterator interface by Gustavo Niemeyer, April 2003.
# changes to tokenize more like Posix shells by Vinay Sajip, July 2016.
import os
import re
import sys
from io import StringIO
__all__ = ["shlex", "split", "quote", "join"]
class shlex:
"A lexical analyzer class for simple shell-like syntaxes."
def __init__(self, instream=None, infile=None, posix=False,
punctuation_chars=False):
if isinstance(instream, str):
instream = StringIO(instream)
if instream is not None:
self.instream = instream
self.infile = infile
else:
self.instream = sys.stdin
self.infile = None
self.posix = posix
if posix:
self.eof = None
else:
self.eof = ''
self.commenters = '#'
self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
if self.posix:
self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
self.whitespace = ' \t\r\n'
self.whitespace_split = False
self.quotes = '\'"'
self.escape = '\\'
self.escapedquotes = '"'
self.state = ' '
self.pushback = list()
self.lineno = 1
self.debug = 0
self.token = ''
self.filestack = list()
self.source = None
if not punctuation_chars:
punctuation_chars = ''
elif punctuation_chars is True:
punctuation_chars = '();<>|&'
self._punctuation_chars = punctuation_chars
if punctuation_chars:
# _pushback_chars is a push back queue used by lookahead logic
self._pushback_chars = list()
# these chars added because allowed in file names, args, wildcards
self.wordchars += '~-./*?='
#remove any punctuation chars from wordchars
t = self.wordchars.maketrans(dict.fromkeys(punctuation_chars))
self.wordchars = self.wordchars.translate(t)
@property
def punctuation_chars(self):
return self._punctuation_chars
def push_token(self, tok):
"Push a token onto the stack popped by the get_token method"
if self.debug >= 1:
print("shlex: pushing token " + repr(tok))
self.pushback.appendleft(tok)
def push_source(self, newstream, newfile=None):
"Push an input source onto the lexer's input source stack."
if isinstance(newstream, str):
newstream = StringIO(newstream)
self.filestack.appendleft((self.infile, self.instream, self.lineno))
self.infile = newfile
self.instream = newstream
self.lineno = 1
if self.debug:
if newfile is not None:
print('shlex: pushing to file %s' % (self.infile,))
else:
print('shlex: pushing to stream %s' % (self.instream,))
def pop_source(self):
"Pop the input source stack."
self.instream.close()
(self.infile, self.instream, self.lineno) = self.filestack.popleft()
if self.debug:
print('shlex: popping to %s, line %d' \
% (self.instream, self.lineno))
self.state = ' '
def get_token(self):
"Get a token from the input stream (or from stack if it's nonempty)"
if self.pushback:
tok = self.pushback.popleft()
if self.debug >= 1:
print("shlex: popping token " + repr(tok))
return tok
# No pushback. Get a token.
raw = self.read_token()
# Handle inclusions
if self.source is not None:
while raw == self.source:
spec = self.sourcehook(self.read_token())
if spec:
(newfile, newstream) = spec
self.push_source(newstream, newfile)
raw = self.get_token()
# Maybe we got EOF instead?
while raw == self.eof:
if not self.filestack:
return self.eof
else:
self.pop_source()
raw = self.get_token()
# Neither inclusion nor EOF
if self.debug >= 1:
if raw != self.eof:
print("shlex: token=" + repr(raw))
else:
print("shlex: token=EOF")
return raw
def read_token(self):
quoted = False
escapedstate = ' '
while True:
if self.punctuation_chars and self._pushback_chars:
nextchar = self._pushback_chars.pop()
else:
nextchar = self.instream.read(1)
if nextchar == '\n':
self.lineno += 1
if self.debug >= 3:
print("shlex: in state %r I see character: %r" % (self.state,
nextchar))
if self.state is None:
self.token = '' # past end of file
break
elif self.state == ' ':
if not nextchar:
self.state = None # end of file
break
elif nextchar in self.whitespace:
if self.debug >= 2:
print("shlex: I see whitespace in whitespace state")
if self.token or (self.posix and quoted):
break # emit current token
else:
continue
elif nextchar in self.commenters:
self.instream.readline()
self.lineno += 1
elif self.posix and nextchar in self.escape:
escapedstate = 'a'
self.state = nextchar
elif nextchar in self.wordchars:
self.token = nextchar
self.state = 'a'
elif nextchar in self.punctuation_chars:
self.token = nextchar
self.state = 'c'
elif nextchar in self.quotes:
if not self.posix:
self.token = nextchar
self.state = nextchar
elif self.whitespace_split:
self.token = nextchar
self.state = 'a'
else:
self.token = nextchar
if self.token or (self.posix and quoted):
break # emit current token
else:
continue
elif self.state in self.quotes:
quoted = True
if not nextchar: # end of file
if self.debug >= 2:
print("shlex: I see EOF in quotes state")
# XXX what error should be raised here?
raise ValueError("No closing quotation")
if nextchar == self.state:
if not self.posix:
self.token += nextchar
self.state = ' '
break
else:
self.state = 'a'
elif (self.posix and nextchar in self.escape and self.state
in self.escapedquotes):
escapedstate = self.state
self.state = nextchar
else:
self.token += nextchar
elif self.state in self.escape:
if not nextchar: # end of file
if self.debug >= 2:
print("shlex: I see EOF in escape state")
# XXX what error should be raised here?
raise ValueError("No escaped character")
# In posix shells, only the quote itself or the escape
# character may be escaped within quotes.
if (escapedstate in self.quotes and
nextchar != self.state and nextchar != escapedstate):
self.token += self.state
self.token += nextchar
self.state = escapedstate
elif self.state in ('a', 'c'):
if not nextchar:
self.state = None # end of file
break
elif nextchar in self.whitespace:
if self.debug >= 2:
print("shlex: I see whitespace in word state")
self.state = ' '
if self.token or (self.posix and quoted):
break # emit current token
else:
continue
elif nextchar in self.commenters:
self.instream.readline()
self.lineno += 1
if self.posix:
self.state = ' '
if self.token or (self.posix and quoted):
break # emit current token
else:
continue
elif self.state == 'c':
if nextchar in self.punctuation_chars:
self.token += nextchar
else:
if nextchar not in self.whitespace:
self._pushback_chars.append(nextchar)
self.state = ' '
break
elif self.posix and nextchar in self.quotes:
self.state = nextchar
elif self.posix and nextchar in self.escape:
escapedstate = 'a'
self.state = nextchar
elif (nextchar in self.wordchars or nextchar in self.quotes
or (self.whitespace_split and
nextchar not in self.punctuation_chars)):
self.token += nextchar
else:
if self.punctuation_chars:
self._pushback_chars.append(nextchar)
else:
self.pushback.appendleft(nextchar)
if self.debug >= 2:
print("shlex: I see punctuation in word state")
self.state = ' '
if self.token or (self.posix and quoted):
break # emit current token
else:
continue
result = self.token
self.token = ''
if self.posix and not quoted and result == '':
result = None
if self.debug > 1:
if result:
print("shlex: raw token=" + repr(result))
else:
print("shlex: raw token=EOF")
return result
def sourcehook(self, newfile):
"Hook called on a filename to be sourced."
if newfile[0] == '"':
newfile = newfile[1:-1]
# This implements cpp-like semantics for relative-path inclusion.
if isinstance(self.infile, str) and not os.path.isabs(newfile):
newfile = os.path.join(os.path.dirname(self.infile), newfile)
return (newfile, open(newfile, "r"))
def error_leader(self, infile=None, lineno=None):
"Emit a C-compiler-like, Emacs-friendly error-message leader."
if infile is None:
infile = self.infile
if lineno is None:
lineno = self.lineno
return "\"%s\", line %d: " % (infile, lineno)
def __iter__(self):
return self
def __next__(self):
token = self.get_token()
if token == self.eof:
raise StopIteration
return token
def split(s, comments=False, posix=True):
"""Split the string *s* using shell-like syntax."""
if s is None:
import warnings
warnings.warn("Passing None for 's' to shlex.split() is deprecated.",
DeprecationWarning, stacklevel=2)
lex = shlex(s, posix=posix)
lex.whitespace_split = True
if not comments:
lex.commenters = ''
return list(lex)
def join(split_command):
"""Return a shell-escaped string from *split_command*."""
return ' '.join(quote(arg) for arg in split_command)
_find_unsafe = re.compile(r'[^\w@%+=:,./-]').search #, re.ASCII).search
def quote(s):
"""Return a shell-escaped version of the string *s*."""
if not s:
return "''"
if _find_unsafe(s) is None:
return s
# use single quotes, and put single quotes into double quotes
# the string $'b is then quoted as '$'"'"'b'
return "'" + s.replace("'", "'\"'\"'") + "'"
def _print_tokens(lexer):
while 1:
tt = lexer.get_token()
if not tt:
break
print("Token: " + repr(tt))
if __name__ == '__main__':
if len(sys.argv) == 1:
_print_tokens(shlex())
else:
fn = sys.argv[1]
with open(fn) as f:
_print_tokens(shlex(f, fn))

195
lib/stat.py Normal file
View File

@ -0,0 +1,195 @@
"""Constants/functions for interpreting results of os.stat() and os.lstat().
Suggested usage: from stat import *
"""
# Indices for stat struct members in the tuple returned by os.stat()
ST_MODE = 0
ST_INO = 1
ST_DEV = 2
ST_NLINK = 3
ST_UID = 4
ST_GID = 5
ST_SIZE = 6
ST_ATIME = 7
ST_MTIME = 8
ST_CTIME = 9
# Extract bits from the mode
def S_IMODE(mode):
"""Return the portion of the file's mode that can be set by
os.chmod().
"""
return mode & 0o7777
def S_IFMT(mode):
"""Return the portion of the file's mode that describes the
file type.
"""
return mode & 0o170000
# Constants used as S_IFMT() for various file types
# (not all are implemented on all systems)
S_IFDIR = 0o040000 # directory
S_IFCHR = 0o020000 # character device
S_IFBLK = 0o060000 # block device
S_IFREG = 0o100000 # regular file
S_IFIFO = 0o010000 # fifo (named pipe)
S_IFLNK = 0o120000 # symbolic link
S_IFSOCK = 0o140000 # socket file
# Fallbacks for uncommon platform-specific constants
S_IFDOOR = 0
S_IFPORT = 0
S_IFWHT = 0
# Functions to test for each file type
def S_ISDIR(mode):
"""Return True if mode is from a directory."""
return S_IFMT(mode) == S_IFDIR
def S_ISCHR(mode):
"""Return True if mode is from a character special device file."""
return S_IFMT(mode) == S_IFCHR
def S_ISBLK(mode):
"""Return True if mode is from a block special device file."""
return S_IFMT(mode) == S_IFBLK
def S_ISREG(mode):
"""Return True if mode is from a regular file."""
return S_IFMT(mode) == S_IFREG
def S_ISFIFO(mode):
"""Return True if mode is from a FIFO (named pipe)."""
return S_IFMT(mode) == S_IFIFO
def S_ISLNK(mode):
"""Return True if mode is from a symbolic link."""
return S_IFMT(mode) == S_IFLNK
def S_ISSOCK(mode):
"""Return True if mode is from a socket."""
return S_IFMT(mode) == S_IFSOCK
def S_ISDOOR(mode):
"""Return True if mode is from a door."""
return False
def S_ISPORT(mode):
"""Return True if mode is from an event port."""
return False
def S_ISWHT(mode):
"""Return True if mode is from a whiteout."""
return False
# Names for permission bits
S_ISUID = 0o4000 # set UID bit
S_ISGID = 0o2000 # set GID bit
S_ENFMT = S_ISGID # file locking enforcement
S_ISVTX = 0o1000 # sticky bit
S_IREAD = 0o0400 # Unix V7 synonym for S_IRUSR
S_IWRITE = 0o0200 # Unix V7 synonym for S_IWUSR
S_IEXEC = 0o0100 # Unix V7 synonym for S_IXUSR
S_IRWXU = 0o0700 # mask for owner permissions
S_IRUSR = 0o0400 # read by owner
S_IWUSR = 0o0200 # write by owner
S_IXUSR = 0o0100 # execute by owner
S_IRWXG = 0o0070 # mask for group permissions
S_IRGRP = 0o0040 # read by group
S_IWGRP = 0o0020 # write by group
S_IXGRP = 0o0010 # execute by group
S_IRWXO = 0o0007 # mask for others (not in group) permissions
S_IROTH = 0o0004 # read by others
S_IWOTH = 0o0002 # write by others
S_IXOTH = 0o0001 # execute by others
# Names for file flags
UF_NODUMP = 0x00000001 # do not dump file
UF_IMMUTABLE = 0x00000002 # file may not be changed
UF_APPEND = 0x00000004 # file may only be appended to
UF_OPAQUE = 0x00000008 # directory is opaque when viewed through a union stack
UF_NOUNLINK = 0x00000010 # file may not be renamed or deleted
UF_COMPRESSED = 0x00000020 # OS X: file is hfs-compressed
UF_HIDDEN = 0x00008000 # OS X: file should not be displayed
SF_ARCHIVED = 0x00010000 # file may be archived
SF_IMMUTABLE = 0x00020000 # file may not be changed
SF_APPEND = 0x00040000 # file may only be appended to
SF_NOUNLINK = 0x00100000 # file may not be renamed or deleted
SF_SNAPSHOT = 0x00200000 # file is a snapshot file
_filemode_table = (
((S_IFLNK, "l"),
(S_IFSOCK, "s"), # Must appear before IFREG and IFDIR as IFSOCK == IFREG | IFDIR
(S_IFREG, "-"),
(S_IFBLK, "b"),
(S_IFDIR, "d"),
(S_IFCHR, "c"),
(S_IFIFO, "p")),
((S_IRUSR, "r"),),
((S_IWUSR, "w"),),
((S_IXUSR|S_ISUID, "s"),
(S_ISUID, "S"),
(S_IXUSR, "x")),
((S_IRGRP, "r"),),
((S_IWGRP, "w"),),
((S_IXGRP|S_ISGID, "s"),
(S_ISGID, "S"),
(S_IXGRP, "x")),
((S_IROTH, "r"),),
((S_IWOTH, "w"),),
((S_IXOTH|S_ISVTX, "t"),
(S_ISVTX, "T"),
(S_IXOTH, "x"))
)
def filemode(mode):
"""Convert a file's mode to a string of the form '-rwxrwxrwx'."""
perm = []
for table in _filemode_table:
for bit, char in table:
if mode & bit == bit:
perm.append(char)
break
else:
perm.append("-")
return "".join(perm)
# Windows FILE_ATTRIBUTE constants for interpreting os.stat()'s
# "st_file_attributes" member
FILE_ATTRIBUTE_ARCHIVE = 32
FILE_ATTRIBUTE_COMPRESSED = 2048
FILE_ATTRIBUTE_DEVICE = 64
FILE_ATTRIBUTE_DIRECTORY = 16
FILE_ATTRIBUTE_ENCRYPTED = 16384
FILE_ATTRIBUTE_HIDDEN = 2
FILE_ATTRIBUTE_INTEGRITY_STREAM = 32768
FILE_ATTRIBUTE_NORMAL = 128
FILE_ATTRIBUTE_NOT_CONTENT_INDEXED = 8192
FILE_ATTRIBUTE_NO_SCRUB_DATA = 131072
FILE_ATTRIBUTE_OFFLINE = 4096
FILE_ATTRIBUTE_READONLY = 1
FILE_ATTRIBUTE_REPARSE_POINT = 1024
FILE_ATTRIBUTE_SPARSE_FILE = 512
FILE_ATTRIBUTE_SYSTEM = 4
FILE_ATTRIBUTE_TEMPORARY = 256
FILE_ATTRIBUTE_VIRTUAL = 65536
# If available, use C implementation
try:
from _stat import *
except ImportError:
pass

494
lib/textwrap.py Normal file
View File

@ -0,0 +1,494 @@
"""Text wrapping and filling.
"""
# Copyright (C) 1999-2001 Gregory P. Ward.
# Copyright (C) 2002, 2003 Python Software Foundation.
# Written by Greg Ward <gward@python.net>
import re
__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten']
# Hardcode the recognized whitespace characters to the US-ASCII
# whitespace characters. The main reason for doing this is that
# some Unicode spaces (like \u00a0) are non-breaking whitespaces.
_whitespace = '\t\n\x0b\x0c\r '
class TextWrapper:
"""
Object for wrapping/filling text. The public interface consists of
the wrap() and fill() methods; the other methods are just there for
subclasses to override in order to tweak the default behaviour.
If you want to completely replace the main wrapping algorithm,
you'll probably have to override _wrap_chunks().
Several instance attributes control various aspects of wrapping:
width (default: 70)
the maximum width of wrapped lines (unless break_long_words
is false)
initial_indent (default: "")
string that will be prepended to the first line of wrapped
output. Counts towards the line's width.
subsequent_indent (default: "")
string that will be prepended to all lines save the first
of wrapped output; also counts towards each line's width.
expand_tabs (default: true)
Expand tabs in input text to spaces before further processing.
Each tab will become 0 .. 'tabsize' spaces, depending on its position
in its line. If false, each tab is treated as a single character.
tabsize (default: 8)
Expand tabs in input text to 0 .. 'tabsize' spaces, unless
'expand_tabs' is false.
replace_whitespace (default: true)
Replace all whitespace characters in the input text by spaces
after tab expansion. Note that if expand_tabs is false and
replace_whitespace is true, every tab will be converted to a
single space!
fix_sentence_endings (default: false)
Ensure that sentence-ending punctuation is always followed
by two spaces. Off by default because the algorithm is
(unavoidably) imperfect.
break_long_words (default: true)
Break words longer than 'width'. If false, those words will not
be broken, and some lines might be longer than 'width'.
break_on_hyphens (default: true)
Allow breaking hyphenated words. If true, wrapping will occur
preferably on whitespaces and right after hyphens part of
compound words.
drop_whitespace (default: true)
Drop leading and trailing whitespace from lines.
max_lines (default: None)
Truncate wrapped lines.
placeholder (default: ' [...]')
Append to the last line of truncated text.
"""
unicode_whitespace_trans = {}
uspace = ord(' ')
for x in _whitespace:
unicode_whitespace_trans[ord(x)] = uspace
# This funky little regex is just the trick for splitting
# text up into word-wrappable chunks. E.g.
# "Hello there -- you goof-ball, use the -b option!"
# splits into
# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
# (after stripping out empty strings).
word_punct = r'[\w!"\'&.,?]'
letter = r'[^\d\W]'
whitespace = r'[%s]' % re.escape(_whitespace)
nowhitespace = '[^' + whitespace[1:]
wordsep_re = re.compile(r'''
( # any whitespace
%(ws)s+
| # em-dash between words
(?<=%(wp)s) -{2,} (?=\w)
| # word, possibly hyphenated
%(nws)s+? (?:
# hyphenated word
-(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
(?= %(lt)s -? %(lt)s)
| # end of word
(?=%(ws)s|\Z)
| # em-dash
(?<=%(wp)s) (?=-{2,}\w)
)
)''' % {'wp': word_punct, 'lt': letter,
'ws': whitespace, 'nws': nowhitespace},
re.VERBOSE)
del word_punct, letter, nowhitespace
# This less funky little regex just split on recognized spaces. E.g.
# "Hello there -- you goof-ball, use the -b option!"
# splits into
# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
wordsep_simple_re = re.compile(r'(%s+)' % whitespace)
del whitespace
# XXX this is not locale- or charset-aware -- string.lowercase
# is US-ASCII only (and therefore English-only)
sentence_end_re = re.compile(r'[a-z]' # lowercase letter
r'[\.\!\?]' # sentence-ending punct.
r'[\"\']?' # optional end-of-quote
r'\Z') # end of chunk
def __init__(self,
width=70,
initial_indent="",
subsequent_indent="",
expand_tabs=True,
replace_whitespace=True,
fix_sentence_endings=False,
break_long_words=True,
drop_whitespace=True,
break_on_hyphens=True,
tabsize=8,
*,
max_lines=None,
placeholder=' [...]'):
self.width = width
self.initial_indent = initial_indent
self.subsequent_indent = subsequent_indent
self.expand_tabs = expand_tabs
self.replace_whitespace = replace_whitespace
self.fix_sentence_endings = fix_sentence_endings
self.break_long_words = break_long_words
self.drop_whitespace = drop_whitespace
self.break_on_hyphens = break_on_hyphens
self.tabsize = tabsize
self.max_lines = max_lines
self.placeholder = placeholder
# -- Private methods -----------------------------------------------
# (possibly useful for subclasses to override)
def _munge_whitespace(self, text):
"""_munge_whitespace(text : string) -> string
Munge whitespace in text: expand tabs and convert all other
whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz"
becomes " foo bar baz".
"""
if self.expand_tabs:
text = text.expandtabs(self.tabsize)
if self.replace_whitespace:
text = text.translate(self.unicode_whitespace_trans)
return text
def _split(self, text):
"""_split(text : string) -> [string]
Split the text to wrap into indivisible chunks. Chunks are
not quite the same as words; see _wrap_chunks() for full
details. As an example, the text
Look, goof-ball -- use the -b option!
breaks into the following chunks:
'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
'use', ' ', 'the', ' ', '-b', ' ', 'option!'
if break_on_hyphens is True, or in:
'Look,', ' ', 'goof-ball', ' ', '--', ' ',
'use', ' ', 'the', ' ', '-b', ' ', option!'
otherwise.
"""
if self.break_on_hyphens is True:
chunks = self.wordsep_re.split(text)
else:
chunks = self.wordsep_simple_re.split(text)
chunks = [c for c in chunks if c]
return chunks
def _fix_sentence_endings(self, chunks):
"""_fix_sentence_endings(chunks : [string])
Correct for sentence endings buried in 'chunks'. Eg. when the
original text contains "... foo.\\nBar ...", munge_whitespace()
and split() will convert that to [..., "foo.", " ", "Bar", ...]
which has one too few spaces; this method simply changes the one
space to two.
"""
i = 0
patsearch = self.sentence_end_re.search
while i < len(chunks)-1:
if chunks[i+1] == " " and patsearch(chunks[i]):
chunks[i+1] = " "
i += 2
else:
i += 1
def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
"""_handle_long_word(chunks : [string],
cur_line : [string],
cur_len : int, width : int)
Handle a chunk of text (most likely a word, not whitespace) that
is too long to fit in any line.
"""
# Figure out when indent is larger than the specified width, and make
# sure at least one character is stripped off on every pass
if width < 1:
space_left = 1
else:
space_left = width - cur_len
# If we're allowed to break long words, then do so: put as much
# of the next chunk onto the current line as will fit.
if self.break_long_words:
end = space_left
chunk = reversed_chunks[-1]
if self.break_on_hyphens and len(chunk) > space_left:
# break after last hyphen, but only if there are
# non-hyphens before it
hyphen = chunk.rfind('-', 0, space_left)
if hyphen > 0 and any(c != '-' for c in chunk[:hyphen]):
end = hyphen + 1
cur_line.append(chunk[:end])
reversed_chunks[-1] = chunk[end:]
# Otherwise, we have to preserve the long word intact. Only add
# it to the current line if there's nothing already there --
# that minimizes how much we violate the width constraint.
elif not cur_line:
cur_line.append(reversed_chunks.pop())
# If we're not allowed to break long words, and there's already
# text on the current line, do nothing. Next time through the
# main loop of _wrap_chunks(), we'll wind up here again, but
# cur_len will be zero, so the next line will be entirely
# devoted to the long word that we can't handle right now.
def _wrap_chunks(self, chunks):
"""_wrap_chunks(chunks : [string]) -> [string]
Wrap a sequence of text chunks and return a list of lines of
length 'self.width' or less. (If 'break_long_words' is false,
some lines may be longer than this.) Chunks correspond roughly
to words and the whitespace between them: each chunk is
indivisible (modulo 'break_long_words'), but a line break can
come between any two chunks. Chunks should not have internal
whitespace; ie. a chunk is either all whitespace or a "word".
Whitespace chunks will be removed from the beginning and end of
lines, but apart from that whitespace is preserved.
"""
lines = []
if self.width <= 0:
raise ValueError("invalid width %r (must be > 0)" % self.width)
if self.max_lines is not None:
if self.max_lines > 1:
indent = self.subsequent_indent
else:
indent = self.initial_indent
if len(indent) + len(self.placeholder.lstrip()) > self.width:
raise ValueError("placeholder too large for max width")
# Arrange in reverse order so items can be efficiently popped
# from a stack of chucks.
chunks.reverse()
while chunks:
# Start the list of chunks that will make up the current line.
# cur_len is just the length of all the chunks in cur_line.
cur_line = []
cur_len = 0
# Figure out which static string will prefix this line.
if lines:
indent = self.subsequent_indent
else:
indent = self.initial_indent
# Maximum width for this line.
width = self.width - len(indent)
# First chunk on line is whitespace -- drop it, unless this
# is the very beginning of the text (ie. no lines started yet).
if self.drop_whitespace and chunks[-1].strip() == '' and lines:
del chunks[-1]
while chunks:
l = len(chunks[-1])
# Can at least squeeze this chunk onto the current line.
if cur_len + l <= width:
cur_line.append(chunks.pop())
cur_len += l
# Nope, this line is full.
else:
break
# The current line is full, and the next chunk is too big to
# fit on *any* line (not just this one).
if chunks and len(chunks[-1]) > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
cur_len = sum(map(len, cur_line))
# If the last chunk on this line is all whitespace, drop it.
if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
cur_len -= len(cur_line[-1])
del cur_line[-1]
if cur_line:
if (self.max_lines is None or
len(lines) + 1 < self.max_lines or
(not chunks or
self.drop_whitespace and
len(chunks) == 1 and
not chunks[0].strip()) and cur_len <= width):
# Convert current line back to a string and store it in
# list of all lines (return value).
lines.append(indent + ''.join(cur_line))
else:
while cur_line:
if (cur_line[-1].strip() and
cur_len + len(self.placeholder) <= width):
cur_line.append(self.placeholder)
lines.append(indent + ''.join(cur_line))
break
cur_len -= len(cur_line[-1])
del cur_line[-1]
else:
if lines:
prev_line = lines[-1].rstrip()
if (len(prev_line) + len(self.placeholder) <=
self.width):
lines[-1] = prev_line + self.placeholder
break
lines.append(indent + self.placeholder.lstrip())
break
return lines
def _split_chunks(self, text):
text = self._munge_whitespace(text)
return self._split(text)
# -- Public interface ----------------------------------------------
def wrap(self, text):
"""wrap(text : string) -> [string]
Reformat the single paragraph in 'text' so it fits in lines of
no more than 'self.width' columns, and return a list of wrapped
lines. Tabs in 'text' are expanded with string.expandtabs(),
and all other whitespace characters (including newline) are
converted to space.
"""
chunks = self._split_chunks(text)
if self.fix_sentence_endings:
self._fix_sentence_endings(chunks)
return self._wrap_chunks(chunks)
def fill(self, text):
"""fill(text : string) -> string
Reformat the single paragraph in 'text' to fit in lines of no
more than 'self.width' columns, and return a new string
containing the entire wrapped paragraph.
"""
return "\n".join(self.wrap(text))
# -- Convenience interface ---------------------------------------------
def wrap(text, width=70, **kwargs):
"""Wrap a single paragraph of text, returning a list of wrapped lines.
Reformat the single paragraph in 'text' so it fits in lines of no
more than 'width' columns, and return a list of wrapped lines. By
default, tabs in 'text' are expanded with string.expandtabs(), and
all other whitespace characters (including newline) are converted to
space. See TextWrapper class for available keyword args to customize
wrapping behaviour.
"""
w = TextWrapper(width=width, **kwargs)
return w.wrap(text)
def fill(text, width=70, **kwargs):
"""Fill a single paragraph of text, returning a new string.
Reformat the single paragraph in 'text' to fit in lines of no more
than 'width' columns, and return a new string containing the entire
wrapped paragraph. As with wrap(), tabs are expanded and other
whitespace characters converted to space. See TextWrapper class for
available keyword args to customize wrapping behaviour.
"""
w = TextWrapper(width=width, **kwargs)
return w.fill(text)
def shorten(text, width, **kwargs):
"""Collapse and truncate the given text to fit in the given width.
The text first has its whitespace collapsed. If it then fits in
the *width*, it is returned as is. Otherwise, as many words
as possible are joined and then the placeholder is appended::
>>> textwrap.shorten("Hello world!", width=12)
'Hello world!'
>>> textwrap.shorten("Hello world!", width=11)
'Hello [...]'
"""
w = TextWrapper(width=width, max_lines=1, **kwargs)
return w.fill(' '.join(text.strip().split()))
# -- Loosely related functionality -------------------------------------
_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
def dedent(text):
"""Remove any common leading whitespace from every line in `text`.
This can be used to make triple-quoted strings line up with the left
edge of the display, while still presenting them in the source code
in indented form.
Note that tabs and spaces are both treated as whitespace, but they
are not equal: the lines " hello" and "\\thello" are
considered to have no common leading whitespace.
Entirely blank lines are normalized to a newline character.
"""
# Look for the longest leading string of spaces and tabs common to
# all lines.
margin = None
text = _whitespace_only_re.sub('', text)
indents = _leading_whitespace_re.findall(text)
for indent in indents:
if margin is None:
margin = indent
# Current line more deeply indented than previous winner:
# no change (previous winner is still on top).
elif indent.startswith(margin):
pass
# Current line consistent with and no deeper than previous winner:
# it's the new winner.
elif margin.startswith(indent):
margin = indent
# Find the largest common whitespace between current line and previous
# winner.
else:
for i, (x, y) in enumerate(zip(margin, indent)):
if x != y:
margin = margin[:i]
break
# sanity check (testing/debugging only)
if 0 and margin:
for line in text.split("\n"):
assert not line or line.startswith(margin), \
"line = %r, margin = %r" % (line, margin)
if margin:
text = re.sub(r'(?m)^' + margin, '', text)
return text
def indent(text, prefix, predicate=None):
"""Adds 'prefix' to the beginning of selected lines in 'text'.
If 'predicate' is provided, 'prefix' will only be added to the lines
where 'predicate(line)' is True. If 'predicate' is not provided,
it will default to adding 'prefix' to all non-empty lines that do not
consist solely of whitespace characters.
"""
if predicate is None:
def predicate(line):
return line.strip()
def prefixed_lines():
for line in text.splitlines(True):
yield (prefix + line if predicate(line) else line)
return ''.join(prefixed_lines())
if __name__ == "__main__":
#print dedent("\tfoo\n\tbar")
#print dedent(" \thello there\n \t how are you?")
print(dedent("Hello there.\n This is indented."))