ts-minifier gets some spring cleaning

i dont care if its not spring just try and stop me
This commit is contained in:
bleck9999 2021-09-12 15:52:06 +01:00
parent b9b6e07eb0
commit 13d320a355
No known key found for this signature in database
GPG key ID: D0CA0C41DB616843

View file

@ -1,10 +1,11 @@
# Copyright (c) 2021 bleck9999 # Copyright (c) 2021 bleck9999
# https://github.com/bleck9999/ts-minifier # https://github.com/bleck9999/ts-minifier
# Version: d7106796 # Version: 9d56e91a
import argparse import argparse
import itertools import itertools
import string import logging
from os import path
from string import ascii_letters, digits, hexdigits from string import ascii_letters, digits, hexdigits
auto_replace = False auto_replace = False
@ -32,21 +33,7 @@ class Code:
self.comments = comments self.comments = comments
self.code = code self.code = code
self.varstrs = [] self.varstrs = []
self.rawcode = "".join([x[2] for x in sorted(self.code+self.strings)]) self.rawcode = "".join([x[2] for x in sorted(self.code + self.strings)])
def getafter(self, ch: int):
ch += self.comments[-1][1] if self.comments else 0
for strcom in self.strings:
if strcom[0] >= ch:
return strcom
return None
def nextch(self, ch: int, reverse: bool):
rawcontent = self.rawcode
if ((ch+1 >= len(rawcontent)) and not reverse) or \
((ch-1 < 0) and reverse):
return ''
return rawcontent[ch-1] if reverse else rawcontent[ch+1]
def isidentifier(s: str): def isidentifier(s: str):
@ -110,7 +97,7 @@ def parser(script: str):
# last identifier in a script (eg if script.rawcode was "a=12" the 12 wouldn't be detected without the trailing ' ') # last identifier in a script (eg if script.rawcode was "a=12" the 12 wouldn't be detected without the trailing ' ')
start = len(strscript) + 1 start = len(strscript) + 1
for ch in range(len(strscript)): for ch in range(len(strscript)):
if (strscript[ch-1] == '0' and strscript[ch] == 'x') and not quoted: if (strscript[ch - 1] == '0' and strscript[ch] == 'x') and not quoted:
hexxed = True hexxed = True
elif isidentifier(strscript[ch]) and not (hexxed or quoted): elif isidentifier(strscript[ch]) and not (hexxed or quoted):
if start > ch: if start > ch:
@ -122,8 +109,8 @@ def parser(script: str):
elif strscript[ch] == '"': elif strscript[ch] == '"':
quoted = not quoted quoted = not quoted
elif not quoted: elif not quoted:
if start != len(strscript)+1 and not ismember: # if we actually had an identifier before this char if start != len(strscript) + 1 and not ismember: # if we actually had an identifier before this char
identifier = strscript[start:ch] # and this isnt a member of anything identifier = strscript[start:ch] # and this isnt a member of anything
if identifier in usages: if identifier in usages:
usages[identifier].append(start) usages[identifier].append(start)
elif identifier.isnumeric(): # numbers are legally valid identifiers because fuckyou elif identifier.isnumeric(): # numbers are legally valid identifiers because fuckyou
@ -131,8 +118,8 @@ def parser(script: str):
userobjects[identifier] = "INT" userobjects[identifier] = "INT"
elif identifier == "0x": elif identifier == "0x":
pass pass
elif strscript[ch] == '=' and strscript[ch+1] != '=': elif strscript[ch] == '=' and strscript[ch + 1] != '=':
isfunc = script.nextch(ch, False) == '{' isfunc = strscript[ch + 1] == '{'
userobjects[identifier] = "func" if isfunc else "var" userobjects[identifier] = "func" if isfunc else "var"
usages[identifier] = [start] # declaration is a usage because i cant be arsed usages[identifier] = [start] # declaration is a usage because i cant be arsed
else: # not an assignment (or member) but also haven't seen this name before else: # not an assignment (or member) but also haven't seen this name before
@ -155,7 +142,7 @@ def parser(script: str):
ismember = False ismember = False
pass pass
elif strscript[ch] in ')}]': elif strscript[ch] in ')}]':
ismember = script.nextch(ch, False) == '.' ismember = strscript[ch + 1] == '.'
start = len(strscript) + 1 start = len(strscript) + 1
return script, userobjects, usages return script, userobjects, usages
@ -170,10 +157,13 @@ def minify(script: Code, userobjects, usages):
# ^ 2 for = and whitespace, 2 for "" # ^ 2 for = and whitespace, 2 for ""
# #
# obviously for a rename you're already defining it so it's just the difference between lengths multiplied by uses # obviously for a rename you're already defining it so it's just the difference between lengths multiplied by uses
short_idents = [x for x in (ascii_letters+'_')] + [x[0]+x[1] for x in itertools.product(ascii_letters+'_', repeat=2)] short_idents = [x for x in (ascii_letters + '_')] + [x[0] + x[1] for x in
itertools.product(ascii_letters + '_', repeat=2)]
short_idents.pop(short_idents.index("if")) short_idents.pop(short_idents.index("if"))
mcode = script.rawcode mcode = script.rawcode
aliases = [] aliases = []
logging.info("Renaming user functions and variables" if auto_replace else
"Checking user function and variable names")
for uo in [x for x in userobjects]: for uo in [x for x in userobjects]:
if userobjects[uo] not in ["var", "func"]: if userobjects[uo] not in ["var", "func"]:
continue continue
@ -189,23 +179,23 @@ def minify(script: Code, userobjects, usages):
for i in candidates: for i in candidates:
if i not in userobjects: if i not in userobjects:
minName = i minName = i
userobjects[minName] = "TRN"
break break
if verbose and not minName: if not minName:
print(f"{'Function' if otype == 'func' else 'Variable'} name {uo} could be shortened but " logging.info(f"{'Function' if otype == 'func' else 'Variable'} name {uo} could be shortened but "
f"no available names found (would save {uses} bytes)") f"no available names found (would save {uses} bytes)")
continue continue
# we assume that nobody is insane enough to exhaust all *2,808* 2 character names, # we assume that nobody is insane enough to exhaust all *2,808* 2 character names,
# instead that uo is len 2 and all the 1 character names are in use (because of that we dont multiply # instead that uo is len 2 and all the 1 character names are in use (because of that we dont multiply
# uses by anything as multiplying by a difference of 1 would be redundant) # uses by anything as multiplying by a difference of 1 would be redundant)
if not auto_replace: if not auto_replace:
print(f"{'Function' if otype == 'func' else 'Variable'} name {uo} could be shortened ({uo}->{minName}, " logging.warning(
f"would save {uses*(uolen - len(minName))} bytes)") f"{'Function' if otype == 'func' else 'Variable'} name {uo} could be shortened ({uo}->{minName}, "
f"would save {uses * (uolen - len(minName))} bytes)")
continue continue
else: else:
userobjects[minName] = "TRN" logging.info(f"Renaming {'Function' if otype == 'func' else 'Variable'} {uo} to {minName} "
if verbose: f"(saving {uses * (uolen - len(minName))} bytes)")
print(f"Renaming {'Function' if otype == 'func' else 'Variable'} {uo} to {minName} "
f"(saving {uses*(uolen - len(minName))} bytes)")
diff = uolen - len(minName) diff = uolen - len(minName)
# the foreach syntax is literally the worst part of ts # the foreach syntax is literally the worst part of ts
@ -213,8 +203,7 @@ def minify(script: Code, userobjects, usages):
struo = f'"{uo}"' struo = f'"{uo}"'
for varstr in script.varstrs: for varstr in script.varstrs:
if varstr[2] == struo: if varstr[2] == struo:
if verbose: logging.info(f"Replacing declaration of {varstr[2]} at {varstr[0]}-{varstr[1]}")
print(f"Replacing declaration of {varstr[2]} at {varstr[0]}-{varstr[1]}")
start = varstr[0] - (script.comments[-1][1] if script.comments else 0) start = varstr[0] - (script.comments[-1][1] if script.comments else 0)
end = varstr[1] - (script.comments[-1][1] if script.comments else 0) end = varstr[1] - (script.comments[-1][1] if script.comments else 0)
newend = start + len(minName) newend = start + len(minName)
@ -223,11 +212,13 @@ def minify(script: Code, userobjects, usages):
# rather than just blindly str.replace()ing we're going to actually use the character indices that we stored # rather than just blindly str.replace()ing we're going to actually use the character indices that we stored
prev = 0 prev = 0
for bound in usages[uo]: for bound in usages[uo]:
tmpcode += mcode[prev:bound] + minName + ' '*diff tmpcode += mcode[prev:bound] + minName + ' ' * diff
prev = bound + diff + len(minName) prev = bound + diff + len(minName)
# actually shut up about "bound might be referenced before assignment" or show me what possible # actually shut up about "bound might be referenced before assignment" or show me what possible
# execution path that could lead to usages[uo] being an empty list # execution path that could lead to usages[uo] being an empty list
mcode = tmpcode + mcode[bound+diff+len(minName):] mcode = tmpcode + mcode[bound + diff + len(minName):]
logging.info("Aliasing standard library functions" if auto_replace else
"Checking for standard library aliases")
for func in usages: for func in usages:
tmpcode = "" tmpcode = ""
candidates = short_idents candidates = short_idents
@ -244,27 +235,28 @@ def minify(script: Code, userobjects, usages):
minName = i minName = i
break break
# once again we assume it's only `if` that could trigger this message # once again we assume it's only `if` that could trigger this message
# uses - 4 is the minimum amount of uses needed to save space, 1*(uses - 4) is the space it would save # 4 is the minimum amount of uses needed to save space, 1*(uses - 4) is the space it would save
if not minName and (uses - 4) > 0: if not minName and uses > 4:
if verbose: logging.info(f"Standard library function {func} could be aliased but no available names found "
print(f"Standard library function {func} could be aliased but no available names found " f"(would save {uses - 4} bytes)")
f"(would save {uses-4} bytes)")
else: else:
if not savings: if not savings:
savings = uses*len(func) - (len(func)+len(minName)+2) savings = uses * len(func) - (len(func) + len(minName) + 2)
if (verbose and savings <= 0) or (not auto_replace and savings > 0): if savings > 0:
print(f"Not aliasing standard library function {func} (would save {savings} bytes)")
elif auto_replace and savings > 0:
userobjects[minName] = "TRP" userobjects[minName] = "TRP"
if verbose: if auto_replace:
print(f"Aliasing standard library function {func} to {minName} (saving {savings} bytes)") logging.info(f"Aliasing standard library function {func} to {minName} (saving {savings} bytes)")
diff = len(func) - len(minName) diff = len(func) - len(minName)
prev = 0 prev = 0
for bound in usages[func]: for bound in usages[func]:
tmpcode += mcode[prev:bound] + minName + ' ' * diff tmpcode += mcode[prev:bound] + minName + ' ' * diff
prev = bound + diff + len(minName) prev = bound + diff + len(minName)
mcode = tmpcode + mcode[bound + diff + len(minName):] mcode = tmpcode + mcode[bound + diff + len(minName):]
aliases.append(f"{minName}={func} ") aliases.append(f"{minName}={func} ")
else:
logging.warning(f"Not aliasing standard library function {func} (would save {savings} bytes)")
else:
logging.info(f"Not aliasing standard library function {func} (would save {savings} bytes)")
str_reuse = {} str_reuse = {}
for string in script.strings: for string in script.strings:
@ -272,6 +264,8 @@ def minify(script: Code, userobjects, usages):
str_reuse[string[2]].append(string[0]) str_reuse[string[2]].append(string[0])
else: else:
str_reuse[string[2]] = [string[0]] str_reuse[string[2]] = [string[0]]
logging.info("Introducing variables for reused literals" if auto_replace else
"Checking for reused literals")
for string in str_reuse: for string in str_reuse:
tmpcode = "" tmpcode = ""
candidates = short_idents candidates = short_idents
@ -286,29 +280,32 @@ def minify(script: Code, userobjects, usages):
break break
if not minName: if not minName:
savings = len(string) * uses - (len(string) + 5) # 5 comes from id="{string}" savings = len(string) * uses - (len(string) + 5) # 5 comes from id="{string}"
if verbose: logging.info(f"Could introduce variable for reused string {string} but no available names found "
print(f"Could introduce variable for reused string {string} but no available names found " f"(would save {savings} bytes)")
f"(would save {savings} bytes)")
continue continue
# the quotation marks are included in string # the quotation marks are included in string
savings = uses * len(string) - (len(string) + len(minName) + 2) savings = uses * len(string) - (len(string) + len(minName) + 2)
if (verbose and savings <= 0) or (not auto_replace and savings > 0): if savings > 0:
print(f"Not introducing variable for string {string} reused {uses} times (would save {savings} bytes)")
elif auto_replace and savings > 0:
# "duplicated code fragment" do i look like i give a shit
userobjects[minName] = "TIV" userobjects[minName] = "TIV"
if verbose: if auto_replace:
print(f"Introducing variable {minName} with value {string} (saving {savings} bytes)") # "duplicated code fragment" do i look like i give a shit
diff = len(string) - len(minName) logging.info(f"Introducing variable {minName} with value {string} (saving {savings} bytes)")
prev = 0 diff = len(string) - len(minName)
for bound in str_reuse[string]: prev = 0
bound -= script.comments[-1][1] if script.comments else 0 for bound in str_reuse[string]:
tmpcode += mcode[prev:bound] + minName + ' ' * diff bound -= script.comments[-1][1] if script.comments else 0
prev = bound + diff + len(minName) tmpcode += mcode[prev:bound] + minName + ' ' * diff
mcode = tmpcode + mcode[bound + diff + len(minName):] prev = bound + diff + len(minName)
aliases.append(f"{minName}={string}") mcode = tmpcode + mcode[bound + diff + len(minName):]
elif verbose: aliases.append(f"{minName}={string}")
print(f"Not introducing variable for string {string} (only used once)") else:
logging.warning(f"Not introducing variable for string {string} reused {uses} times "
f"(would save {savings} bytes)")
else:
logging.info(f"Not introducing variable for string {string} reused {uses} times "
f"(would save {savings} bytes)")
else:
logging.info(f"Not introducing variable for string {string} (only used once)")
for uint in [x for x in userobjects]: for uint in [x for x in userobjects]:
if userobjects[uint] != "INT" or len(uint) < 2: if userobjects[uint] != "INT" or len(uint) < 2:
@ -328,35 +325,37 @@ def minify(script: Code, userobjects, usages):
if not minName: if not minName:
# yet another case of "nobody could possibly use up all the 2 char names we hope" # yet another case of "nobody could possibly use up all the 2 char names we hope"
savings = uilen * uses - (uilen + 4) # 4 comes from id={uint}<whitespace> savings = uilen * uses - (uilen + 4) # 4 comes from id={uint}<whitespace>
if verbose: logging.info(f"Could introduce variable for reused integer {uint} but no available names found "
print(f"Could introduce variable for reused integer {uint} but no available names found " f"(would save {savings} bytes)")
f"(would save {savings} bytes)")
continue continue
savings = uilen * uses - (uilen + len(minName) + 2) savings = uilen * uses - (uilen + len(minName) + 2)
if (verbose and savings <= 0) or (not auto_replace and savings > 0): if savings > 0:
print(f"Not introducing variable for string {uint} reused {uses} times (would save {savings} bytes)")
elif auto_replace and savings > 0:
userobjects[minName] = "TIV" userobjects[minName] = "TIV"
if verbose: if auto_replace:
print(f"Introducing variable {minName} with value {uint} (saving {savings} bytes)") logging.info(f"Introducing variable {minName} with value {uint} (saving {savings} bytes)")
diff = len(uint) - len(minName) diff = len(uint) - len(minName)
prev = 0 prev = 0
for bound in usages[uint]: for bound in usages[uint]:
tmpcode += mcode[prev:bound] + minName + ' ' * diff tmpcode += mcode[prev:bound] + minName + ' ' * diff
prev = bound + diff + len(minName) prev = bound + diff + len(minName)
mcode = tmpcode + mcode[bound + diff + len(minName):] mcode = tmpcode + mcode[bound + diff + len(minName):]
aliases.append(f"{minName}={uint} ") aliases.append(f"{minName}={uint} ")
elif verbose: else:
print(f"Not introducing variable for int {uint} (only used once)") logging.warning(f"Not introducing variable for string {uint} reused {uses} times "
f"(would save {savings} bytes)")
else:
logging.info(f"Not introducing variable for string {uint} reused {uses} times "
f"(would save {savings} bytes)")
else:
logging.info(f"Not introducing variable for int {uint} (only used once)")
print("Reintroducing REQUIREs") logging.info("Reintroducing REQUIREs")
mcode = "".join([x[2] for x in script.comments]) + "".join(aliases) + mcode mcode = "".join([x[2] for x in script.comments]) + "".join(aliases) + mcode
print("Stripping whitespace") return mcode
return whitespacent(mcode)
def whitespacent(script: str): def whitespacent(script: str):
# also removes unneeded comments and push REQUIREs to the top of the file # also removes unneeded comments and pushes REQUIREs to the top of the file
requires = "" requires = ""
mcode = "" mcode = ""
for line in script.split(sep='\n'): for line in script.split(sep='\n'):
@ -393,9 +392,9 @@ def whitespacent(script: str):
part += 1 part += 1
# tsv3 is still an absolute nightmare # tsv3 is still an absolute nightmare
# so spaces are required under two situations # so spaces should be preserved under two situations
# 1. the minus operator which requires space between the right operand but only if the right operand is a literal # 1. the subtraction operator which requires space between the right operand but only if the right operand is a literal
# 2. between 2 characters that are either valid identifiers (aA-zZ or _) or integers # 2. between 2 characters that are valid identifiers (aA-zZ, _ or integers)
inquote = False inquote = False
mmcode = "" mmcode = ""
index = 0 index = 0
@ -425,25 +424,34 @@ if __name__ == '__main__':
"\ndefault: ./", default='./') "\ndefault: ./", default='./')
argparser.add_argument("--auto-replace", action="store_true", default=False, argparser.add_argument("--auto-replace", action="store_true", default=False,
help="automatically replace reused functions, variables and strings instead of just warning\n" help="automatically replace reused functions, variables and strings instead of just warning\n"
"and attempt to generate shorter names for reused variables \ndefault: false") "and attempt to generate shorter names for reused variables \ndefault: false")
argparser.add_argument("-v", action="store_true", default=False, argparser.add_argument("-v", action="store_true", default=False,
help="prints even more information to the console than usual") help="prints even more information to the console than usual")
args = argparser.parse_args() args = argparser.parse_args()
files = args.source files = args.source
dest = args.d[:-1] if args.d[-1] == '/' else args.d dest = args.d[:-1] if args.d[-1] in '/\\' else args.d
auto_replace = args.auto_replace auto_replace = args.auto_replace
verbose = args.v verbose = "INFO" if args.v else "WARNING"
logging.basicConfig(level=verbose, format="{message}", style='{')
print(f"Automatic replacement: {'ENABLED' if auto_replace else 'DISABLED'}")
for file in files: for file in files:
print(f"\nMinifying {file}") print(f"\nMinifying {file}")
with open(file, 'r') as f: with open(file, 'r') as f:
print("Stripping comments") logging.info("Stripping comments and whitespace (pass 1)")
res = parser(whitespacent(f.read())) r = whitespacent(f.read())
r = minify(res[0], res[1], res[2]) logging.info("Parsing file")
r = parser(r)
logging.info("Searching for optimisations")
r = minify(r[0], r[1], r[2])
logging.info("Stripping whitespace (pass 2)")
r = whitespacent(r)
file = file.split(sep='.')[0].split(sep='/')[-1] file = file.split(sep='.')[0].split(sep='/')[-1]
if dest != '.': if path.exists(f"{dest}/{file}.te"):
f = open(f"{dest}/{file}.te", 'w')
else:
f = open(f"{dest}/{file}_min.te", 'w') f = open(f"{dest}/{file}_min.te", 'w')
else:
f = open(f"{dest}/{file}.te", 'w')
logging.info(f"Writing to {f.name}")
f.write(r) f.write(r)
print("Done!")