bugfixes waguspin

This commit is contained in:
bleck9999 2021-09-11 17:56:48 +01:00
parent 80ee915bfd
commit b9b6e07eb0
No known key found for this signature in database
GPG key ID: D0CA0C41DB616843

View file

@ -1,10 +1,11 @@
# Copyright (c) 2021 bleck9999 # Copyright (c) 2021 bleck9999
# https://github.com/bleck9999/ts-minifier # https://github.com/bleck9999/ts-minifier
# Version: b201eb4d # Version: d7106796
import argparse import argparse
import itertools import itertools
from string import ascii_letters import string
from string import ascii_letters, digits, hexdigits
auto_replace = False auto_replace = False
verbose = False verbose = False
@ -19,19 +20,13 @@ class Code:
def __init__(self, strings, comments, script): def __init__(self, strings, comments, script):
counter = 0 counter = 0
strings_comments = sorted(strings + comments) strings_comments = sorted(strings + comments)
bounds = [0] if strings_comments[0][0] != 0 else []
for val in strings_comments: bound = 0
if counter and (bounds[counter - 1] == val[0]):
bounds[counter - 1] = val[1]
else:
bounds += [val[0], val[1]]
counter += 2
bounds.append(len(script))
code = [] code = []
i = 2 if len(bounds) % 2 else 1 for strcom in strings_comments:
while i < len(bounds): code.append((bound, strcom[0], script[bound:strcom[0]]))
code.append((bounds[i - 1], bounds[i], script[bounds[i - 1]:bounds[i]])) bound = strcom[1]
i += 2 code.append((bound, len(script), script[bound:]))
self.sections = sorted(strings_comments + code) self.sections = sorted(strings_comments + code)
self.strings = strings self.strings = strings
self.comments = comments self.comments = comments
@ -56,7 +51,14 @@ class Code:
def isidentifier(s: str): def isidentifier(s: str):
for c in s: for c in s:
if c not in (ascii_letters + '_'): if c not in (ascii_letters + '_' + digits):
return False
return True
def iswhitespace(s: str):
for c in s:
if c not in (' ', '\t', '\n'):
return False return False
return True return True
@ -103,7 +105,9 @@ def parser(script: str):
hexxed = False hexxed = False
ismember = False ismember = False
quoted = False quoted = False
strscript = script.rawcode strscript = script.rawcode + ' '
# the space will get removed after the second pass of whitespacent, but for now it prevents not detecting the
# last identifier in a script (eg if script.rawcode was "a=12" the 12 wouldn't be detected without the trailing ' ')
start = len(strscript) + 1 start = len(strscript) + 1
for ch in range(len(strscript)): for ch in range(len(strscript)):
if (strscript[ch-1] == '0' and strscript[ch] == 'x') and not quoted: if (strscript[ch-1] == '0' and strscript[ch] == 'x') and not quoted:
@ -113,20 +117,25 @@ def parser(script: str):
start = ch start = ch
else: else:
pass pass
elif hexxed and strscript[ch].upper() not in "0123456789ABCDEF": elif hexxed and strscript[ch] not in hexdigits:
hexxed = False hexxed = False
elif strscript[ch] == '"': elif strscript[ch] == '"':
quoted = not quoted quoted = not quoted
elif not quoted: elif not quoted:
if start != len(strscript)+1: # if we actually had an identifier before this char if start != len(strscript)+1 and not ismember: # if we actually had an identifier before this char
identifier = strscript[start:ch] identifier = strscript[start:ch] # and this isnt a member of anything
if identifier in usages: if identifier in usages:
usages[identifier].append(start) usages[identifier].append(start)
elif identifier.isnumeric(): # numbers are legally valid identifiers because fuckyou
usages[identifier] = [start]
userobjects[identifier] = "INT"
elif identifier == "0x":
pass
elif strscript[ch] == '=' and strscript[ch+1] != '=': elif strscript[ch] == '=' and strscript[ch+1] != '=':
isfunc = script.nextch(ch, False) == '{' isfunc = script.nextch(ch, False) == '{'
userobjects[identifier] = "func" if isfunc else "var" userobjects[identifier] = "func" if isfunc else "var"
usages[identifier] = [start] # declaration is a usage because i cant be arsed usages[identifier] = [start] # declaration is a usage because i cant be arsed
elif not ismember: # not an assignment (or member) but also haven't seen this name before else: # not an assignment (or member) but also haven't seen this name before
usages[identifier] = [start] usages[identifier] = [start]
# fuck it we are using a fucking list of fucking stdlib functions i just fucking cant im adding tsv3 # fuck it we are using a fucking list of fucking stdlib functions i just fucking cant im adding tsv3
# to the fucking esolangs wiki have a good day # to the fucking esolangs wiki have a good day
@ -143,12 +152,13 @@ def parser(script: str):
script.strings.pop(i) script.strings.pop(i)
break break
else: else:
ismember = False
pass pass
elif strscript[ch] == ')': elif strscript[ch] in ')}]':
ismember = script.nextch(ch, False) == '.' ismember = script.nextch(ch, False) == '.'
start = len(strscript) + 1 start = len(strscript) + 1
return minify(script, userobjects, usages) return script, userobjects, usages
def minify(script: Code, userobjects, usages): def minify(script: Code, userobjects, usages):
@ -179,7 +189,6 @@ def minify(script: Code, userobjects, usages):
for i in candidates: for i in candidates:
if i not in userobjects: if i not in userobjects:
minName = i minName = i
userobjects[minName] = "TRN"
break break
if verbose and not minName: if verbose and not minName:
print(f"{'Function' if otype == 'func' else 'Variable'} name {uo} could be shortened but " print(f"{'Function' if otype == 'func' else 'Variable'} name {uo} could be shortened but "
@ -193,8 +202,10 @@ def minify(script: Code, userobjects, usages):
f"would save {uses*(uolen - len(minName))} bytes)") f"would save {uses*(uolen - len(minName))} bytes)")
continue continue
else: else:
print(f"Renaming {'Function' if otype == 'func' else 'Variable'} {uo} to {minName} " userobjects[minName] = "TRN"
f"(saving {uses*(uolen - len(minName))} bytes)") if verbose:
print(f"Renaming {'Function' if otype == 'func' else 'Variable'} {uo} to {minName} "
f"(saving {uses*(uolen - len(minName))} bytes)")
diff = uolen - len(minName) diff = uolen - len(minName)
# the foreach syntax is literally the worst part of ts # the foreach syntax is literally the worst part of ts
@ -231,19 +242,20 @@ def minify(script: Code, userobjects, usages):
for i in candidates: for i in candidates:
if i not in userobjects: if i not in userobjects:
minName = i minName = i
userobjects[minName] = "TRP"
break break
# once again we assume it's only `if` that could trigger this message # once again we assume it's only `if` that could trigger this message
# uses - 4 is the minimum amount of uses needed to save space, 1*(uses - 4) is the space it would save # uses - 4 is the minimum amount of uses needed to save space, 1*(uses - 4) is the space it would save
if verbose and (not minName and (uses - 4) > 0): if not minName and (uses - 4) > 0:
print(f"Standard library function {func} could be aliased but no available names found " if verbose:
f"(would save {uses-4} bytes)") print(f"Standard library function {func} could be aliased but no available names found "
f"(would save {uses-4} bytes)")
else: else:
if not savings: if not savings:
savings = uses*len(func) - (len(func)+len(minName)+2) savings = uses*len(func) - (len(func)+len(minName)+2)
if (verbose and savings <= 0) or (not auto_replace and savings > 0): if (verbose and savings <= 0) or (not auto_replace and savings > 0):
print(f"Not aliasing standard library function {func} (would save {savings} bytes)") print(f"Not aliasing standard library function {func} (would save {savings} bytes)")
elif auto_replace and savings > 0: elif auto_replace and savings > 0:
userobjects[minName] = "TRP"
if verbose: if verbose:
print(f"Aliasing standard library function {func} to {minName} (saving {savings} bytes)") print(f"Aliasing standard library function {func} to {minName} (saving {savings} bytes)")
diff = len(func) - len(minName) diff = len(func) - len(minName)
@ -271,14 +283,20 @@ def minify(script: Code, userobjects, usages):
for i in candidates: for i in candidates:
if i not in userobjects: if i not in userobjects:
minName = i minName = i
userobjects[minName] = "TIV"
break break
if not minName:
savings = len(string) * uses - (len(string) + 5) # 5 comes from id="{string}"
if verbose:
print(f"Could introduce variable for reused string {string} but no available names found "
f"(would save {savings} bytes)")
continue
# the quotation marks are included in string # the quotation marks are included in string
savings = uses * len(string) - (len(string) + len(minName) + 2) savings = uses * len(string) - (len(string) + len(minName) + 2)
if (verbose and savings <= 0) or (not auto_replace and savings > 0): if (verbose and savings <= 0) or (not auto_replace and savings > 0):
print(f"Not introducing variable for string {string} reused {uses} times (would save {savings} bytes)") print(f"Not introducing variable for string {string} reused {uses} times (would save {savings} bytes)")
elif auto_replace and savings > 0: elif auto_replace and savings > 0:
# "duplicated code fragment" do i look like i give a shit # "duplicated code fragment" do i look like i give a shit
userobjects[minName] = "TIV"
if verbose: if verbose:
print(f"Introducing variable {minName} with value {string} (saving {savings} bytes)") print(f"Introducing variable {minName} with value {string} (saving {savings} bytes)")
diff = len(string) - len(minName) diff = len(string) - len(minName)
@ -292,6 +310,45 @@ def minify(script: Code, userobjects, usages):
elif verbose: elif verbose:
print(f"Not introducing variable for string {string} (only used once)") print(f"Not introducing variable for string {string} (only used once)")
for uint in [x for x in userobjects]:
if userobjects[uint] != "INT" or len(uint) < 2:
continue
candidates = short_idents
uses = len(usages[uint])
uilen = len(uint)
minName = ""
tmpcode = ""
if uses > 1:
if uilen == 2:
candidates = short_idents[:53]
for i in candidates:
if i not in userobjects:
minName = i
break
if not minName:
# yet another case of "nobody could possibly use up all the 2 char names we hope"
savings = uilen * uses - (uilen + 4) # 4 comes from id={uint}<whitespace>
if verbose:
print(f"Could introduce variable for reused integer {uint} but no available names found "
f"(would save {savings} bytes)")
continue
savings = uilen * uses - (uilen + len(minName) + 2)
if (verbose and savings <= 0) or (not auto_replace and savings > 0):
print(f"Not introducing variable for string {uint} reused {uses} times (would save {savings} bytes)")
elif auto_replace and savings > 0:
userobjects[minName] = "TIV"
if verbose:
print(f"Introducing variable {minName} with value {uint} (saving {savings} bytes)")
diff = len(uint) - len(minName)
prev = 0
for bound in usages[uint]:
tmpcode += mcode[prev:bound] + minName + ' ' * diff
prev = bound + diff + len(minName)
mcode = tmpcode + mcode[bound + diff + len(minName):]
aliases.append(f"{minName}={uint} ")
elif verbose:
print(f"Not introducing variable for int {uint} (only used once)")
print("Reintroducing REQUIREs") print("Reintroducing REQUIREs")
mcode = "".join([x[2] for x in script.comments]) + "".join(aliases) + mcode mcode = "".join([x[2] for x in script.comments]) + "".join(aliases) + mcode
print("Stripping whitespace") print("Stripping whitespace")
@ -327,10 +384,9 @@ def whitespacent(script: str):
while part < len(line): while part < len(line):
# all the odd numbered indexes should be inside quotes # all the odd numbered indexes should be inside quotes
if part % 2 == 0: if part % 2 == 0:
if not line[part]: if line[part] and not iswhitespace(line[part]):
break # turn lots of whitespace into one whitespace with one easy trick!
# turn lots of whitespace into one whitespace with one easy trick! mcode += ' '.join(line[part].split()) + ' '
mcode += ' '.join(line[part].split()) + ' '
else: else:
mcode += f'"{line[part]}"' mcode += f'"{line[part]}"'
@ -383,7 +439,8 @@ if __name__ == '__main__':
print(f"\nMinifying {file}") print(f"\nMinifying {file}")
with open(file, 'r') as f: with open(file, 'r') as f:
print("Stripping comments") print("Stripping comments")
r = parser(whitespacent(f.read())) res = parser(whitespacent(f.read()))
r = minify(res[0], res[1], res[2])
file = file.split(sep='.')[0].split(sep='/')[-1] file = file.split(sep='.')[0].split(sep='/')[-1]
if dest != '.': if dest != '.':
f = open(f"{dest}/{file}.te", 'w') f = open(f"{dest}/{file}.te", 'w')