From aff76027a0aa8ec4cd3f4a392af8103063630be3 Mon Sep 17 00:00:00 2001 From: yenatch Date: Tue, 28 Oct 2014 21:49:21 -0700 Subject: [PATCH 01/13] Fix the output of PointerLabelParam.to_asm. Accidentally forgot to zfill either half of pointer_part, which would return malformed addresses. Fixes de39f7c6ff487f02d1914cc4a3260817287207c5. --- pokemontools/crystal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pokemontools/crystal.py b/pokemontools/crystal.py index 9d0fa5b..610f4f7 100644 --- a/pokemontools/crystal.py +++ b/pokemontools/crystal.py @@ -913,7 +913,7 @@ def to_asm(self): lo, hi = self.bytes[1:3] else: lo, hi = self.bytes[0:2] - pointer_part = "{0}{1:2x}{2:2x}".format(self.prefix, hi, lo) + pointer_part = "{0}{1:02x}{2:02x}".format(self.prefix, hi, lo) # bank positioning matters! if bank == True or bank == "reverse": # bank, pointer From 074f8dfdf4686d973234162ab6ebdc8a91e01550 Mon Sep 17 00:00:00 2001 From: yenatch Date: Tue, 28 Oct 2014 21:54:29 -0700 Subject: [PATCH 02/13] audio: Expose header reads via separate functions. --- pokemontools/audio.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/pokemontools/audio.py b/pokemontools/audio.py index 896ceef..9e08113 100644 --- a/pokemontools/audio.py +++ b/pokemontools/audio.py @@ -370,7 +370,8 @@ def __init__(self, address, name='', sfx=False): self.asms = [] self.parse() - def parse(self): + + def parse_header(self): self.num_channels = (rom[self.address] >> 6) + 1 self.channels = [] for ch in xrange(self.num_channels): @@ -383,9 +384,9 @@ def parse(self): self.channels += [(current_channel, channel)] self.labels += channel.labels - asms = [] - asms += [generate_label_asm(self.base_label, self.start_address)] + def make_header(self): + asms = [] for i, (num, channel) in enumerate(self.channels): channel_id = num - 1 @@ -397,16 +398,27 @@ def parse(self): comment_text = '; %x\n' % self.address asms += [(self.address, comment_text, self.address)] + return asms + + + def parse(self): + self.parse_header() + + asms = [] + + asms += [generate_label_asm(self.base_label, self.start_address)] + asms += self.make_header() for num, channel in self.channels: asms += channel.output asms = sort_asms(asms) - self.last_address = asms[-1][2] + _, _, self.last_address = asms[-1] asms += [(self.last_address,'; %x\n' % self.last_address, self.last_address)] self.asms += asms + def to_asm(self, labels=[]): """insert outside labels here""" asms = self.asms From 85e0e2af68301288f394631196a8dc5af10eab27 Mon Sep 17 00:00:00 2001 From: yenatch Date: Tue, 28 Oct 2014 22:00:52 -0700 Subject: [PATCH 03/13] Refactor the de/compressor. Speed, readability, accuracy and compression are all improved. Also fix an off-by-one error in repeat commands in the compressor. --- pokemontools/gfx.py | 507 +++++++++++++++++++++++++------------------- 1 file changed, 290 insertions(+), 217 deletions(-) diff --git a/pokemontools/gfx.py b/pokemontools/gfx.py index bfd6745..2bc39d6 100644 --- a/pokemontools/gfx.py +++ b/pokemontools/gfx.py @@ -186,222 +186,258 @@ def to_file(filename, data): class Compressed: + """ + Usage: + lz = Compressed(data).output + or + lz = Compressed().compress(data) + or + c = Compressed() + c.data = data + lz = c.compress() + """ + + # The target compressor is not always as efficient as this implementation. + # To ignore compatibility and spit out a smaller blob, pass in small=True. + small = False + + # BUG: literal [00] is a byte longer than blank 1. + # This bug exists in the target compressor as well, + # so don't fix until we've given up on replicating it. + min_scores = { + 'blank': 2, + 'iterate': 2, + 'alternate': 3, + 'repeat': 3, + 'reverse': 3, + 'flip': 3, + } + + preference = [ + 'repeat', + 'blank', + 'reverse', + 'flip', + 'iterate', + 'alternate', + #'literal', + ] + def __init__(self, data=None, commands=lz_commands, debug=False): self.data = list(bytearray(data)) self.commands = commands self.debug = debug - self.compress() - def byte_at(self, address): - if address < len(self.data): + if self.data is not None: + self.compress() + + def read_byte(self, address=None): + if address is None: + address = self.address + if 0 <= address < len(self.data): return self.data[address] return None - def compress(self): + def reset_scores(self): + self.scores = {} + self.offsets = {} + for method in self.min_scores.keys(): + self.scores[method] = 0 + + def score_literal(self, method): + address = self.address + compare = { + 'blank': [0], + 'iterate': [self.read_byte(address)], + 'alternate': [self.read_byte(address), self.read_byte(address + 1)], + }[method] + length = 0 + while self.read_byte(address) == compare[length % len(compare)]: + length += 1 + address += 1 + self.scores[method] = length + return compare + + def precompute_repeat_matches(self): + """This is faster than redundantly searching each time repeats are scored.""" + self.indexes = {} + for byte in xrange(0x100): + self.indexes[byte] = [] + index = -1 + while 1: + try: + index = self.data.index(byte, index + 1) + except ValueError: + break + self.indexes[byte].append(index) + + def score_repeats(self, name, direction=1, mutate=int): + + address = self.address + byte = mutate(self.data[address]) + + for index in self.indexes[byte]: + if index >= address: break + + length = 1 # we already know the first byte matches + while 1: + byte = self.read_byte(index + length * direction) + if byte == None or mutate(byte) != self.read_byte(address + length): + break + length += 1 + + # If repeats are almost entirely zeroes, just keep going and use blank instead. + if all(x == 0 for x in self.data[ address + 2 : address + length ]): + if self.read_byte(address + length) == 0: + # zeroes continue after this chunk + continue + + # Adjust the score for two-byte offsets. + two_byte_index = index < address - 0x7f + if self.scores[name] >= length - int(two_byte_index): + continue + + self.scores [name] = length + self.offsets[name] = index + + def compress(self, data=None): """ This algorithm is greedy. It aims to match the compressor it's based on as closely as possible. It doesn't, but in the meantime the output is smaller. """ + + if data is not None: + self.data = data + self.address = 0 self.end = len(self.data) self.output = [] self.literal = [] + self.precompute_repeat_matches() while self.address < self.end: + # Tally up the number of bytes that can be compressed # by a single command from the current address. - self.scores = {} - for method in self.commands.keys(): - self.scores[method] = 0 - # The most common byte by far is 0 (whitespace in - # images and padding in tilemaps and regular data). - address = self.address - while self.byte_at(address) == 0x00: - self.scores['blank'] += 1 - address += 1 + self.reset_scores() - # In the same vein, see how long the same byte repeats for. - address = self.address - self.iter = self.byte_at(address) - while self.byte_at(address) == self.iter: - self.scores['iterate'] += 1 - address += 1 + # Check for repetition. Alternating bytes are common since graphics data is planar. - # Do it again, but for alternating bytes. - address = self.address - self.alts = [] - self.alts += [self.byte_at(address)] - self.alts += [self.byte_at(address + 1)] - while self.byte_at(address) == self.alts[(address - self.address) % 2]: - self.scores['alternate'] += 1 - address += 1 + _, self.iter, self.alts = map(self.score_literal, ['blank', 'iterate', 'alternate']) - # Check if we can repeat any data that the - # decompressor just output (here, the input data). - # TODO this includes the current command's output - self.matches = {} - last_matches = {} - address = self.address - min_length = 4 # minimum worthwhile length - max_length = 9 # any further and the time loss is too significant - for length in xrange(min_length, min(len(self.data) - address, max_length)): - keyword = self.data[address:address+length] - for offset, byte in enumerate(self.data[:address]): - # offset ranges are -0x80:-1 and 0:0x7fff - if offset > 0x7fff and offset < address - 0x80: - continue - if byte == keyword[0]: - # Straight repeat... - if self.data[offset:offset+length] == keyword: - if self.scores['repeat'] < length: - self.scores['repeat'] = length - self.matches['repeat'] = offset - # In reverse... - if self.data[offset-1:offset-length-1:-1] == keyword: - if self.scores['reverse'] < length: - self.scores['reverse'] = length - self.matches['reverse'] = offset - # Or bitflipped - if self.bit_flip([byte]) == self.bit_flip([keyword[0]]): - if self.bit_flip(self.data[offset:offset+length]) == self.bit_flip(keyword): - if self.scores['flip'] < length: - self.scores['flip'] = length - self.matches['flip'] = offset - if self.matches == last_matches: - break - last_matches = list(self.matches) + # Check if we can repeat any data that the decompressor just output (here, the input data). + # This includes the current command's output. + + for args in [ + ('repeat', 1, int), + ('reverse', -1, int), + ('flip', 1, self.bit_flip), + ]: + self.score_repeats(*args) # If the scores are too low, try again from the next byte. - if not any(map(lambda x: { - 'blank': 1, - 'iterate': 2, - 'alternate': 3, - 'repeat': 3, - 'reverse': 3, - 'flip': 3, - }.get(x[0], 10000) < x[1], self.scores.items())): - self.literal += [self.data[self.address]] + if not any( + self.min_scores.get(name, score) + int(self.scores[name] > lowmax) < score + for name, score in self.scores.items() + ): + self.literal += [self.read_byte()] self.address += 1 - else: # payload - # bug: literal [00] is a byte longer than blank 1. - # this bug exists in the target compressor as well, - # so don't fix until we've given up on replicating it. - self.do_literal() + else: + self.do_literal() # payload self.do_scored() # unload any literals we're sitting on self.do_literal() + self.output += [lz_end] - def bit_flip(self, data): - return [sum(((byte >> i) & 1) << (7 - i) for i in xrange(8)) for byte in data] + return self.output + + def bit_flip(self, byte): + return sum(((byte >> i) & 1) << (7 - i) for i in xrange(8)) def do_literal(self): if self.literal: - cmd = self.commands['literal'] length = len(self.literal) - self.do_cmd(cmd, length) - # self.address has already been - # incremented in the main loop + self.do_cmd('literal', length) self.literal = [] - def do_cmd(self, cmd, length): - if length > max_length: - length = max_length + def do_scored(self): + # Which command did the best? + winner, score = sorted( + self.scores.items(), + key = lambda (name, score): ( + -(score - self.min_scores[name] - int(score > lowmax)), + self.preference.index(name) + ) + )[0] + length = self.do_cmd(winner, score) + self.address += length + def do_cmd(self, cmd, length): + length = min(length, max_length) cmd_length = length - 1 + output = [] + if length > lowmax: - output = [(self.commands['long'] << 5) + (cmd << 2) + (cmd_length >> 8)] + output += [(self.commands['long'] << 5) + (self.commands[cmd] << 2) + (cmd_length >> 8)] output += [cmd_length & 0xff] else: - output = [(cmd << 5) + cmd_length] - - if cmd == self.commands['literal']: - output += self.literal - elif cmd == self.commands['iterate']: - output += [self.iter] - elif cmd == self.commands['alternate']: - output += self.alts - else: - for command in ['repeat', 'reverse', 'flip']: - if cmd == self.commands[command]: - offset = self.matches[command] - # negative offsets are a byte shorter - if self.address - offset <= 0x80: - offset = self.address - offset + 0x80 - if cmd == self.commands['repeat']: - offset -= 1 # this is a hack, but it seems to work - output += [offset] - else: - output += [offset / 0x100, offset % 0x100] + output += [(self.commands[cmd] << 5) + cmd_length] + + output += { + 'literal': self.literal, + 'iterate': self.iter, + 'alternate': self.alts, + 'blank': [], + }.get(cmd, []) + + if cmd in ['repeat', 'reverse', 'flip']: + offset = self.offsets[cmd] + # Negative offsets are one byte. + # Positive offsets are two. + if self.address - offset <= 0x7f: + offset = self.address - offset + 0x80 + offset -= 1 # this is a hack, but it seems to work + output += [offset] + else: + output += [offset / 0x100, offset % 0x100] # big endian if self.debug: print ( - dict(map(reversed, self.commands.items()))[cmd], - length, '\t', + cmd, length, '\t', ' '.join(map('{:02x}'.format, output)) ) self.output += output return length - def do_scored(self): - # Which command did the best? - winner, score = sorted( - self.scores.items(), - key=lambda x:(-x[1], [ - 'blank', - 'repeat', - 'reverse', - 'flip', - 'iterate', - 'alternate', - 'literal', - 'long', # hack - ].index(x[0])) - )[0] - cmd = self.commands[winner] - length = self.do_cmd(cmd, score) - self.address += length - class Decompressed: """ Parse compressed data, usually 2bpp. - parameters: - [compressed data] - [tile arrangement] default: 'vert' - [size of pic] default: None - [start] (optional) - - splits output into pic [size] and animation tiles if applicable - data can be fed in from rom if [start] is specified + To decompress from an offset (i.e. in a rom), pass in . """ - def __init__(self, lz=None, start=0, debug=False): - # todo: play nice with Compressed + def __init__(self, lz=None, start=0, commands=lz_commands, debug=False): - assert lz, 'need something to decompress!' self.lz = bytearray(lz) + self.commands = commands + self.command_names = dict(map(reversed, self.commands.items())) - self.byte = None - self.address = 0 - self.start = start - - self.output = [] + self.address = start + self.start = start self.decompress() + self.compressed_data = self.lz[self.start : self.address] - self.compressed_data = self.lz[self.start : self.start + self.address] - - # print tuple containing start and end address - if debug: print '(' + hex(self.start) + ', ' + hex(self.start + self.address+1) + '),' + if debug: print '({:x), {:x})'.format(self.start, self.address) def command_list(self): @@ -409,134 +445,163 @@ def command_list(self): Print a list of commands that were used. Useful for debugging. """ - data = bytearray(self.lz) - address = self.address + data = bytearray(self.compressed_data) + data_list = list(data) + + text = '' + address = 0 + head = 0 + while 1: + offset = 0 + cmd_addr = address byte = data[address] address += 1 - if byte == lz_end: break + + if byte == lz_end: + break + cmd = (byte >> 5) & 0b111 - if cmd == lz_commands['long']: + + if cmd == self.commands['long']: cmd = (byte >> 2) & 0b111 - length = (byte & 0b11) << 8 + length = (byte & 0b11) * 0x100 length += data[address] address += 1 else: length = byte & 0b11111 + length += 1 - name = dict(map(reversed, lz_commands.items()))[cmd] + + name = self.command_names[cmd] + if name == 'iterate': address += 1 + elif name == 'alternate': address += 2 + elif name in ['repeat', 'reverse', 'flip']: if data[address] < 0x80: + offset = data[address] * 0x100 + data[address + 1] address += 2 else: + offset = head - (data[address] & 0x7f) - 1 address += 1 + elif name == 'literal': address += length - print name, length, '\t', ' '.join(map('{:02x}'.format, list(data)[cmd_addr:address])) + + text += '{0}: {1}'.format(name, length) + text += '\t' + ' '.join(map('{:02x}'.format, data_list[cmd_addr:address])) + + if name in ['repeat', 'reverse', 'flip']: + + bites = self.output[ offset : offset + length ] + if name == 'reverse': + bites = self.output[ offset : offset - length : -1 ] + + text += ' [' + ' '.join(map('{:02x}'.format, bites)) + ']' + + text += '\n' + + head += length + + + return text def decompress(self): - """ - Replica of crystal's decompression. - """ self.output = [] - while True: - self.getCurByte() + while 1: if (self.byte == lz_end): - self.address += 1 + self.next() break self.cmd = (self.byte & 0b11100000) >> 5 - if self.cmd == lz_commands['long']: # 10-bit param + if self.cmd_name == 'long': + # 10-bit length self.cmd = (self.byte & 0b00011100) >> 2 - self.length = (self.byte & 0b00000011) << 8 - self.next() - self.length += self.byte + 1 - else: # 5-bit param - self.length = (self.byte & 0b00011111) + 1 - - # literals - if self.cmd == lz_commands['literal']: - self.doLiteral() - elif self.cmd == lz_commands['iterate']: - self.doIter() - elif self.cmd == lz_commands['alternate']: - self.doAlt() - elif self.cmd == lz_commands['blank']: - self.doZeros() - - else: # repeaters - self.next() - if self.byte > 0x7f: # negative - self.displacement = self.byte & 0x7f - self.displacement = len(self.output) - self.displacement - 1 - else: # positive - self.displacement = self.byte * 0x100 - self.next() - self.displacement += self.byte + self.length = (self.next() & 0b00000011) * 0x100 + self.length += self.next() + 1 + else: + # 5-bit length + self.length = (self.next() & 0b00011111) + 1 - if self.cmd == lz_commands['flip']: - self.doFlip() - elif self.cmd == lz_commands['reverse']: - self.doReverse() - else: # lz_commands['repeat'] - self.doRepeat() + do = { + 'literal': self.doLiteral, + 'iterate': self.doIter, + 'alternate': self.doAlt, + 'blank': self.doZeros, + 'flip': self.doFlip, + 'reverse': self.doReverse, + 'repeat': self.doRepeat, + }[ self.cmd_name ] - self.address += 1 - #self.next() # somewhat of a hack + do() - def getCurByte(self): - self.byte = self.lz[self.start+self.address] + @property + def byte(self): + return self.lz[ self.address ] def next(self): + byte = self.byte self.address += 1 - self.getCurByte() + return byte + + @property + def cmd_name(self): + return self.command_names.get(self.cmd) + + + def get_offset(self): + + if self.byte >= 0x80: # negative + # negative + offset = self.next() & 0x7f + offset = len(self.output) - offset - 1 + else: + # positive + offset = self.next() * 0x100 + offset += self.next() + + self.offset = offset + def doLiteral(self): """ Copy data directly. """ - for byte in range(self.length): - self.next() - self.output.append(self.byte) + self.output += self.lz[ self.address : self.address + self.length ] + self.address += self.length def doIter(self): """ Write one byte repeatedly. """ - self.next() - for byte in range(self.length): - self.output.append(self.byte) + self.output += [self.next()] * self.length def doAlt(self): """ Write alternating bytes. """ - self.alts = [] - self.next() - self.alts.append(self.byte) - self.next() - self.alts.append(self.byte) + alts = [self.next(), self.next()] + self.output += [ alts[x & 1] for x in xrange(self.length) ] - for byte in range(self.length): - self.output.append(self.alts[byte&1]) + #alts = [self.next(), self.next()] * (self.length / 2 + 1) + #self.output += alts[:self.length] def doZeros(self): """ Write zeros. """ - for byte in range(self.length): - self.output.append(0x00) + self.output += [0] * self.length def doFlip(self): """ @@ -545,23 +610,31 @@ def doFlip(self): eg 11100100 -> 00100111 quat 3 2 1 0 -> 0 2 1 3 """ - for byte in range(self.length): - flipped = sum(1<<(7-i) for i in range(8) if self.output[self.displacement+byte]>>i&1) + self.get_offset() + # Note: appends must be one at a time (this way, repeats can draw from themselves if required) + for i in xrange(self.length): + byte = self.output[ self.offset + i ] + flipped = sum( 1 << (7 - j) for j in xrange(8) if (byte >> j) & 1) self.output.append(flipped) + self.output.append( self.bit_flip( self.output[ self.offset + i ] ) ) def doReverse(self): """ Repeat reversed bytes from output. """ - for byte in range(self.length): - self.output.append(self.output[self.displacement-byte]) + self.get_offset() + # Note: appends must be one at a time (this way, repeats can draw from themselves if required) + for i in xrange(self.length): + self.output.append( self.output[ self.offset - i ] ) def doRepeat(self): """ Repeat bytes from output. """ - for byte in range(self.length): - self.output.append(self.output[self.displacement+byte]) + self.get_offset() + # Note: appends must be one at a time (this way, repeats can draw from themselves if required) + for i in xrange(self.length): + self.output.append( self.output[ self.offset + i ] ) From 0dd6b20b2ff54e1b3bf15957fab77193c287fbea Mon Sep 17 00:00:00 2001 From: yenatch Date: Sat, 1 Nov 2014 20:42:41 -0700 Subject: [PATCH 04/13] BSSReader: Fix eval inserting __builtins__ into self.constants. Eval should really not be used in the first place, but at least it works again. Fixes 82b78e5c7969aebb797f35a58577dd0afb2c2f38, which replaced the constants_to_dict method with BSSReader. --- pokemontools/wram.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pokemontools/wram.py b/pokemontools/wram.py index e25ca37..b6d7fc6 100644 --- a/pokemontools/wram.py +++ b/pokemontools/wram.py @@ -58,7 +58,7 @@ def read_bss_line(self, l): if token in ['ds', 'db', 'dw']: if any(params): - length = eval(rgbasm_to_py(params[0]), self.constants) + length = eval(rgbasm_to_py(params[0]), self.constants.copy()) else: length = {'ds': 1, 'db': 1, 'dw': 2}[token] self.address += length @@ -172,7 +172,7 @@ def read_bss_sections(self, bss): real = split_line[index] name, value = map(' '.join, [split_line[:index], split_line[index+1:]]) value = rgbasm_to_py(value) - self.constants[name] = eval(value, self.constants) + self.constants[name] = eval(value, self.constants.copy()) else: self.read_bss_line(line) @@ -195,7 +195,7 @@ def scrape_constants(text): bss = BSSReader() bss.read_bss_sections(text) constants = bss.constants - return constants + return {v: k for k, v in constants.items()} def read_constants(filepath): """ From ca42efaa0d6e0f53c1ce911ed9c206c646c8c7d9 Mon Sep 17 00:00:00 2001 From: yenatch Date: Sat, 1 Nov 2014 21:11:49 -0700 Subject: [PATCH 05/13] More lenient palette handling. Truncate colors in a palette beyond the limit of 4. Make palette sorting more accurate by taking the sum of the hues rather than by luminance. Ultimately, palette order is up to the whim of whoever was drawing in the tile editor at the time, but this is a reasonable estimation. --- pokemontools/gfx.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/pokemontools/gfx.py b/pokemontools/gfx.py index 2bc39d6..e5cac7a 100644 --- a/pokemontools/gfx.py +++ b/pokemontools/gfx.py @@ -1394,33 +1394,34 @@ def png_to_2bpp(filein, **kwargs): 'g': line[px+1], 'b': line[px+2], 'a': line[px+3], } - newline += [color] if color not in palette: - palette += [color] + if len(palette) < 4: + palette += [color] + else: + # TODO Find the nearest match + print 'WARNING: %s: Color %s truncated to' % (filein, color), + color = sorted(palette, key=lambda x: sum(x.values()))[0] + print color + newline += [color] image += [newline] - assert len(palette) <= 4, 'Palette should be 4 colors, is really %d' % len(palette) + assert len(palette) <= 4, '%s: palette should be 4 colors, is really %d: %s' % (filein, len(palette), palette) # Pad out smaller palettes with greyscale colors hues = { - 'white': { 'r': 0xff, 'g': 0xff, 'b': 0xff, 'a': 0xff }, 'black': { 'r': 0x00, 'g': 0x00, 'b': 0x00, 'a': 0xff }, 'grey': { 'r': 0x55, 'g': 0x55, 'b': 0x55, 'a': 0xff }, 'gray': { 'r': 0xaa, 'g': 0xaa, 'b': 0xaa, 'a': 0xff }, + 'white': { 'r': 0xff, 'g': 0xff, 'b': 0xff, 'a': 0xff }, } - for hue in hues.values(): + preference = 'white', 'black', 'grey', 'gray' + for hue in map(hues.get, preference): if len(palette) >= 4: break if hue not in palette: palette += [hue] - # Sort palettes by luminance - def luminance(color): - rough = { 'r': 4.7, - 'g': 1.4, - 'b': 13.8, } - return sum(color[key] * rough[key] for key in rough.keys()) - palette.sort(key=luminance) + palette.sort(key=lambda x: sum(x.values())) # Game Boy palette order palette.reverse() From 145269cdb7aacca6494ed76f972e8c0f886021ff Mon Sep 17 00:00:00 2001 From: yenatch Date: Sun, 2 Nov 2014 00:14:05 -0700 Subject: [PATCH 06/13] Left a scalpel in the patient. "whoops" --- pokemontools/gfx.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pokemontools/gfx.py b/pokemontools/gfx.py index e5cac7a..43c81d5 100644 --- a/pokemontools/gfx.py +++ b/pokemontools/gfx.py @@ -616,7 +616,6 @@ def doFlip(self): byte = self.output[ self.offset + i ] flipped = sum( 1 << (7 - j) for j in xrange(8) if (byte >> j) & 1) self.output.append(flipped) - self.output.append( self.bit_flip( self.output[ self.offset + i ] ) ) def doReverse(self): """ From 41df23fce368821214c8cc1472b162d118c95b41 Mon Sep 17 00:00:00 2001 From: yenatch Date: Sat, 22 Nov 2014 13:56:44 -0800 Subject: [PATCH 07/13] Refactor the Decompressed class and dump/convert pic animations. --- pokemontools/gfx.py | 479 ++++++++++++++++++++++++++++++-------------- 1 file changed, 328 insertions(+), 151 deletions(-) diff --git a/pokemontools/gfx.py b/pokemontools/gfx.py index 43c81d5..3ca8ee8 100644 --- a/pokemontools/gfx.py +++ b/pokemontools/gfx.py @@ -5,17 +5,25 @@ import png from math import sqrt, floor, ceil import argparse +import operator import configuration config = configuration.Config() -import pokemon_constants +from pokemon_constants import pokemon_constants import trainers import romstr -def load_rom(): - rom = romstr.RomStr.load(filename=config.rom_path) + +bit_flipped = [ + sum(((byte >> i) & 1) << (7 - i) for i in xrange(8)) + for byte in xrange(0x100) +] + + +def load_rom(filename=config.rom_path): + rom = romstr.RomStr.load(filename=filename) return bytearray(rom) def rom_offset(bank, address): @@ -124,19 +132,31 @@ def deinterleave_tiles(image, width): return connect(deinterleave(get_tiles(image), width)) -def condense_tiles_to_map(image): +def condense_tiles_to_map(image, pic=0): tiles = get_tiles(image) - new_tiles = [] - tilemap = [] - for tile in tiles: + + # Leave the first frame intact for pics. + new_tiles = tiles[:pic] + tilemap = range(pic) + + for i, tile in enumerate(tiles[pic:]): if tile not in new_tiles: new_tiles += [tile] - tilemap += [new_tiles.index(tile)] + + # Match the first frame where possible. + if tile == new_tiles[i % pic]: + tilemap += [i % pic] + else: + tilemap += [new_tiles.index(tile)] + new_image = connect(new_tiles) return new_image, tilemap def to_file(filename, data): + """ + Apparently open(filename, 'wb').write(bytearray(data)) won't work. + """ file = open(filename, 'wb') for byte in data: file.write('%c' % byte) @@ -202,6 +222,7 @@ class Compressed: small = False # BUG: literal [00] is a byte longer than blank 1. + # In other words, blank's real minimum score is 1. # This bug exists in the target compressor as well, # so don't fix until we've given up on replicating it. min_scores = { @@ -223,10 +244,16 @@ class Compressed: #'literal', ] - def __init__(self, data=None, commands=lz_commands, debug=False): - self.data = list(bytearray(data)) - self.commands = commands - self.debug = debug + data = None + commands = lz_commands + debug = False + literal_only = False + + arg_names = 'data', 'commands', 'debug', 'literal_only' + + def __init__(self, *args, **kwargs): + self.__dict__.update(dict(zip(self.arg_names, args))) + self.__dict__.update(kwargs) if self.data is not None: self.compress() @@ -259,7 +286,9 @@ def score_literal(self, method): return compare def precompute_repeat_matches(self): - """This is faster than redundantly searching each time repeats are scored.""" + """ + This is faster than redundantly searching each time repeats are scored. + """ self.indexes = {} for byte in xrange(0x100): self.indexes[byte] = [] @@ -310,6 +339,8 @@ def compress(self, data=None): if data is not None: self.data = data + self.data = list(bytearray(self.data)) + self.address = 0 self.end = len(self.data) self.output = [] @@ -338,8 +369,11 @@ def compress(self, data=None): self.score_repeats(*args) # If the scores are too low, try again from the next byte. - if not any( - self.min_scores.get(name, score) + int(self.scores[name] > lowmax) < score + + if self.literal_only or not any( + self.min_scores.get(name, score) + + int(self.scores[name] > lowmax) + < score for name, score in self.scores.items() ): self.literal += [self.read_byte()] @@ -357,7 +391,7 @@ def compress(self, data=None): return self.output def bit_flip(self, byte): - return sum(((byte >> i) & 1) << (7 - i) for i in xrange(8)) + return bit_flipped[byte] def do_literal(self): if self.literal: @@ -366,7 +400,7 @@ def do_literal(self): self.literal = [] def do_scored(self): - # Which command did the best? + # Which command will compress the longest chunk? winner, score = sorted( self.scores.items(), key = lambda (name, score): ( @@ -420,24 +454,41 @@ def do_cmd(self, cmd, length): class Decompressed: """ - Parse compressed data, usually 2bpp. + Interpret and decompress lz-compressed data, usually 2bpp. + """ - To decompress from an offset (i.e. in a rom), pass in . """ + Usage: + data = Decompressed(lz).output + or + data = Decompressed().decompress(lz) + or + d = Decompressed() + d.lz = lz + data = d.decompress() - def __init__(self, lz=None, start=0, commands=lz_commands, debug=False): + To decompress from offset 0x80000 in a rom: + data = Decompressed(rom, start=0x80000).output + """ - self.lz = bytearray(lz) - self.commands = commands - self.command_names = dict(map(reversed, self.commands.items())) + lz = None + start = 0 + commands = lz_commands + debug = False - self.address = start - self.start = start + arg_names = 'lz', 'start', 'commands', 'debug' - self.decompress() - self.compressed_data = self.lz[self.start : self.address] + def __init__(self, *args, **kwargs): + self.__dict__.update(dict(zip(self.arg_names, args))) + self.__dict__.update(kwargs) - if debug: print '({:x), {:x})'.format(self.start, self.address) + self.command_names = dict(map(reversed, self.commands.items())) + self.address = self.start + + if self.lz is not None: + self.decompress() + + if self.debug: print self.command_list() def command_list(self): @@ -445,79 +496,45 @@ def command_list(self): Print a list of commands that were used. Useful for debugging. """ - data = bytearray(self.compressed_data) - data_list = list(data) - text = '' - address = 0 - head = 0 - - while 1: - offset = 0 - cmd_addr = address - byte = data[address] - address += 1 - - if byte == lz_end: - break - - cmd = (byte >> 5) & 0b111 - - if cmd == self.commands['long']: - cmd = (byte >> 2) & 0b111 - length = (byte & 0b11) * 0x100 - length += data[address] - address += 1 - else: - length = byte & 0b11111 - - length += 1 - - name = self.command_names[cmd] - - if name == 'iterate': - address += 1 - - elif name == 'alternate': - address += 2 - - elif name in ['repeat', 'reverse', 'flip']: - if data[address] < 0x80: - offset = data[address] * 0x100 + data[address + 1] - address += 2 - else: - offset = head - (data[address] & 0x7f) - 1 - address += 1 - - elif name == 'literal': - address += length + for name, attrs in self.used_commands: + length = attrs['length'] + address = attrs['address'] + offset = attrs['offset'] + direction = attrs['direction'] text += '{0}: {1}'.format(name, length) - text += '\t' + ' '.join(map('{:02x}'.format, data_list[cmd_addr:address])) - - if name in ['repeat', 'reverse', 'flip']: - - bites = self.output[ offset : offset + length ] - if name == 'reverse': - bites = self.output[ offset : offset - length : -1 ] + text += '\t' + ' '.join( + '{:02x}'.format(int(byte)) + for byte in self.lz[ address : address + attrs['cmd_length'] ] + ) - text += ' [' + ' '.join(map('{:02x}'.format, bites)) + ']' + if offset is not None: + repeated_data = self.output[ offset : offset + length * direction : direction ] + text += ' [' + ' '.join(map('{:02x}'.format, repeated_data)) + ']' text += '\n' - head += length + return text - return text + def decompress(self, lz=None): + if lz is not None: + self.lz = lz - def decompress(self): + self.lz = bytearray(self.lz) + self.used_commands = [] self.output = [] while 1: + cmd_address = self.address + self.offset = None + self.direction = None + if (self.byte == lz_end): self.next() break @@ -533,17 +550,21 @@ def decompress(self): # 5-bit length self.length = (self.next() & 0b00011111) + 1 - do = { - 'literal': self.doLiteral, - 'iterate': self.doIter, - 'alternate': self.doAlt, - 'blank': self.doZeros, - 'flip': self.doFlip, - 'reverse': self.doReverse, - 'repeat': self.doRepeat, - }[ self.cmd_name ] + self.__class__.__dict__[self.cmd_name](self) - do() + self.used_commands += [( + self.cmd_name, + { + 'length': self.length, + 'address': cmd_address, + 'offset': self.offset, + 'cmd_length': self.address - cmd_address, + 'direction': self.direction, + } + )] + + # Keep track of the data we just decompressed. + self.compressed_data = self.lz[self.start : self.address] @property @@ -574,66 +595,59 @@ def get_offset(self): self.offset = offset - def doLiteral(self): + def literal(self): """ Copy data directly. """ self.output += self.lz[ self.address : self.address + self.length ] self.address += self.length - def doIter(self): + def iterate(self): """ Write one byte repeatedly. """ self.output += [self.next()] * self.length - def doAlt(self): + def alternate(self): """ Write alternating bytes. """ alts = [self.next(), self.next()] self.output += [ alts[x & 1] for x in xrange(self.length) ] - #alts = [self.next(), self.next()] * (self.length / 2 + 1) - #self.output += alts[:self.length] - - def doZeros(self): + def blank(self): """ Write zeros. """ self.output += [0] * self.length - def doFlip(self): + def flip(self): """ Repeat flipped bytes from output. - eg 11100100 -> 00100111 - quat 3 2 1 0 -> 0 2 1 3 + Example: 11100100 -> 00100111 """ - self.get_offset() - # Note: appends must be one at a time (this way, repeats can draw from themselves if required) - for i in xrange(self.length): - byte = self.output[ self.offset + i ] - flipped = sum( 1 << (7 - j) for j in xrange(8) if (byte >> j) & 1) - self.output.append(flipped) + self._repeat(table=bit_flipped) - def doReverse(self): + def reverse(self): """ Repeat reversed bytes from output. """ - self.get_offset() - # Note: appends must be one at a time (this way, repeats can draw from themselves if required) - for i in xrange(self.length): - self.output.append( self.output[ self.offset - i ] ) + self._repeat(direction=-1) - def doRepeat(self): + def repeat(self): """ Repeat bytes from output. """ + self._repeat() + + def _repeat(self, direction=1, table=None): self.get_offset() + self.direction = direction # Note: appends must be one at a time (this way, repeats can draw from themselves if required) for i in xrange(self.length): - self.output.append( self.output[ self.offset + i ] ) + byte = self.output[ self.offset + i * direction ] + self.output.append( table[byte] if table else byte ) @@ -1002,7 +1016,7 @@ def dump_monster_pals(): pal_length = 0x4 for mon in range(251): - name = pokemon_constants.pokemon_constants[mon+1].title().replace('_','') + name = pokemon_constants[mon+1].title().replace('_','') num = str(mon+1).zfill(3) dir = 'gfx/pics/'+num+'/' @@ -1321,24 +1335,171 @@ def tile_length(img): return width, height, palette, greyscale, bitdepth, px_map -def export_png_to_2bpp(filein, fileout=None, palout=None, tile_padding=0, pic_dimensions=None): +def get_pic_animation(tmap, w, h): + """ + Generate pic animation data from a combined tilemap of each frame. + """ + + frame_text = '' + bitmask_text = '' + + frames = list(split(tmap, w * h)) + bitmasks = [] + + frame_text += ''.join( + '\tdw .frame{0}\n'.format(i + 1) for i, frame in enumerate(frames[1:]) + ) + + for i, frame in enumerate(frames[1:]): + bitmask = map( + lambda (i, x): + int(x != frames[0][i]), + enumerate(frame) + ) + if bitmask not in bitmasks: + bitmasks.append(bitmask) + which_bitmask = bitmasks.index(bitmask) + + frame_ = [x for _, x in filter(lambda (i, x): bitmask[i], enumerate(frame))] + frame_text += '\n'.join([ + '.frame{0}'.format(i + 1), + '\tdb ${0:02x} ; bitmask'.format(which_bitmask), + ('\tdb ' + ', '.join(map('${0:02x}'.format, frame_))) if frame_ else '', + ]) + '\n' + + for i, bitmask in enumerate(bitmasks): + bitmask_text += '; {0}\n'.format(i) + for byte in split(bitmask, 8): + byte.reverse() + byte = int(''.join(map(str, byte)), 2) + bitmask_text += '\tdb %{0:08b}\n'.format(byte) + + return frame_text, bitmask_text + + +def dump_pic_animations(addresses={'bitmasks': 'BitmasksPointers', 'frames': 'FramesPointers'}, pokemon=pokemon_constants, rom=load_rom()): + """ + The code to dump pic animations from rom is mysteriously absent. + Here it is again, but now it dumps images instead of text. + Said text can then be derived from the images. + """ + # Labels can be passed in instead of raw addresses. + for which, offset in addresses.items(): + if type(offset) is str: + for line in open('pokecrystal.sym').readlines(): + if offset in line.split(): + addresses[which] = rom_offset(*map(lambda x: int(x, 16), line[:7].split(':'))) + break + + for i, name in pokemon.items(): + if name.lower() == 'unown': continue + + i -= 1 + + directory = os.path.join('gfx', 'pics', name.lower()) + size = sizes[i] + + if i > 151 - 1: + bank = 0x36 + else: + bank = 0x35 + address = addresses['frames'] + i * 2 + address = rom_offset(bank, rom[address] + rom[address + 1] * 0x100) + addrs = [] + while address not in addrs: + addr = rom[address] + rom[address + 1] * 0x100 + addrs.append(rom_offset(bank, addr)) + address += 2 + num_frames = len(addrs) + + # To go any further, we need bitmasks. + # Bitmasks need the number of frames, which we now have. + + bank = 0x34 + address = addresses['bitmasks'] + i * 2 + address = rom_offset(bank, rom[address] + rom[address + 1] * 0x100) + length = size ** 2 + num_bytes = (length + 7) / 8 + bitmasks = [] + for _ in xrange(num_frames): + bitmask = [] + bytes_ = rom[ address : address + num_bytes ] + for byte in bytes_: + bits = map(int, bin(byte)[2:].zfill(8)) + bits.reverse() + bitmask += bits + bitmasks.append(bitmask) + address += num_bytes + + # Back to frames: + frames = [] + for addr in addrs: + bitmask = bitmasks[rom[addr]] + num_tiles = len(filter(int, bitmask)) + frame = (rom[addr], rom[addr + 1 : addr + 1 + num_tiles]) + frames.append(frame) + + tmap = range(length) * (len(frames) + 1) + for i, frame in enumerate(frames): + bitmask = bitmasks[frame[0]] + tiles = (x for x in frame[1]) + for j, bit in enumerate(bitmask): + if bit: + tmap[(i + 1) * length + j] = tiles.next() + + filename = os.path.join(directory, 'front.{0}x{0}.2bpp.lz'.format(size)) + tiles = get_tiles(Decompressed(open(filename).read()).output) + new_tiles = map(tiles.__getitem__, tmap) + new_image = connect(new_tiles) + filename = os.path.splitext(filename)[0] + to_file(filename, new_image) + export_2bpp_to_png(filename) + + +def export_png_to_2bpp(filein, fileout=None, palout=None, **kwargs): arguments = { - 'tile_padding': tile_padding, - 'pic_dimensions': pic_dimensions, + 'tile_padding': 0, + 'pic_dimensions': None, + 'animate': False, + 'stupid_bitmask_hack': [], } + arguments.update(kwargs) arguments.update(read_filename_arguments(filein)) - image, palette, tmap = png_to_2bpp(filein, **arguments) + image, arguments = png_to_2bpp(filein, **arguments) if fileout == None: fileout = os.path.splitext(filein)[0] + '.2bpp' to_file(fileout, image) - if tmap != None: - mapout = os.path.splitext(fileout)[0] + '.tilemap' - to_file(mapout, tmap) + tmap = arguments.get('tmap') + + if tmap != None and arguments['animate'] and arguments['pic_dimensions']: + # Generate pic animation data. + frame_text, bitmask_text = get_pic_animation(tmap, *arguments['pic_dimensions']) + + frames_path = os.path.join(os.path.split(fileout)[0], 'frames.asm') + with open(frames_path, 'w') as out: + out.write(frame_text) + + bitmask_path = os.path.join(os.path.split(fileout)[0], 'bitmask.asm') + + # The following Pokemon have a bitmask dummied out. + for exception in arguments['stupid_bitmask_hack']: + if exception in bitmask_path: + bitmasks = bitmask_text.split(';') + bitmasks[-1] = bitmasks[-1].replace('1', '0') + bitmask_text = ';'.join(bitmasks) + + with open(bitmask_path, 'w') as out: + out.write(bitmask_text) + + elif tmap != None and arguments.get('tilemap', False): + tilemap_path = os.path.splitext(fileout)[0] + '.tilemap' + to_file(tilemap_path, tmap) + palette = arguments.get('palette') if palout == None: palout = os.path.splitext(fileout)[0] + '.pal' export_palette(palette, palout) @@ -1371,28 +1532,30 @@ def png_to_2bpp(filein, **kwargs): Convert a png image to planar 2bpp. """ - tile_padding = kwargs.get('tile_padding', 0) - pic_dimensions = kwargs.get('pic_dimensions', None) - interleave = kwargs.get('interleave', False) - norepeat = kwargs.get('norepeat', False) - tilemap = kwargs.get('tilemap', False) + arguments = { + 'tile_padding': 0, + 'pic_dimensions': False, + 'interleave': False, + 'norepeat': False, + 'tilemap': False, + } + arguments.update(kwargs) + + if type(filein) is str: + filein = open(filein) - with open(filein, 'rb') as data: - width, height, rgba, info = png.Reader(data).asRGBA8() - rgba = list(rgba) - greyscale = info['greyscale'] + assert type(filein) is file + + width, height, rgba, info = png.Reader(filein).asRGBA8() # png.Reader returns flat pixel data. Nested is easier to work with - len_px = 4 # rgba + len_px = len('rgba') image = [] palette = [] for line in rgba: newline = [] for px in xrange(0, len(line), len_px): - color = { 'r': line[px ], - 'g': line[px+1], - 'b': line[px+2], - 'a': line[px+3], } + color = dict(zip('rgba', line[px:px+len_px])) if color not in palette: if len(palette) < 4: palette += [color] @@ -1404,17 +1567,17 @@ def png_to_2bpp(filein, **kwargs): newline += [color] image += [newline] - assert len(palette) <= 4, '%s: palette should be 4 colors, is really %d: %s' % (filein, len(palette), palette) + assert len(palette) <= 4, '%s: palette should be 4 colors, is really %d (%s)' % (filein, len(palette), palette) # Pad out smaller palettes with greyscale colors - hues = { + greyscale = { 'black': { 'r': 0x00, 'g': 0x00, 'b': 0x00, 'a': 0xff }, 'grey': { 'r': 0x55, 'g': 0x55, 'b': 0x55, 'a': 0xff }, 'gray': { 'r': 0xaa, 'g': 0xaa, 'b': 0xaa, 'a': 0xff }, 'white': { 'r': 0xff, 'g': 0xff, 'b': 0xff, 'a': 0xff }, } preference = 'white', 'black', 'grey', 'gray' - for hue in map(hues.get, preference): + for hue in map(greyscale.get, preference): if len(palette) >= 4: break if hue not in palette: @@ -1464,8 +1627,16 @@ def png_to_2bpp(filein, **kwargs): top += (quad /2 & 1) << (7 - bit) image += [bottom, top] - if pic_dimensions: - w, h = pic_dimensions + dim = arguments['pic_dimensions'] + if dim: + if type(dim) in (tuple, list): + w, h = dim + else: + # infer dimensions based on width. + w = width / tile_width + h = height / tile_height + if h % w == 0: + h = w tiles = get_tiles(image) pic_length = w * h @@ -1483,17 +1654,23 @@ def png_to_2bpp(filein, **kwargs): image = connect(new_image) # Remove any tile padding used to make the png rectangular. - image = image[:len(image) - tile_padding * 0x10] + image = image[:len(image) - arguments['tile_padding'] * 0x10] - if interleave: + tmap = None + + if arguments['interleave']: image = deinterleave_tiles(image, num_columns) - if norepeat: + if arguments['pic_dimensions']: + image, tmap = condense_tiles_to_map(image, w * h) + elif arguments['norepeat']: image, tmap = condense_tiles_to_map(image) - if not tilemap: - tmap = None + if not arguments['tilemap']: + tmap = None + + arguments.update({ 'palette': palette, 'tmap': tmap, }) - return image, palette, tmap + return image, arguments def export_palette(palette, filename): @@ -1583,7 +1760,7 @@ def export_png_to_1bpp(filename, fileout=None): to_file(fileout, image) def png_to_1bpp(filename, **kwargs): - image, palette, tmap = png_to_2bpp(filename, **kwargs) + image, kwargs = png_to_2bpp(filename, **kwargs) return convert_2bpp_to_1bpp(image) From 0182bcaf8f92e66396c17969b0753f2175603ccd Mon Sep 17 00:00:00 2001 From: yenatch Date: Sat, 22 Nov 2014 13:57:53 -0800 Subject: [PATCH 08/13] Read gfx.yaml for graphics conversion options. Keys count both as path nodes and attributes depending on context. An example gfx.yaml: ``` gfx: pics: pal_file: normal.pal pic_dimensions: yes animate: yes stupid_bitmask_hack: [dewgong, lugia] pc: literal_only: yes ``` Here, all graphics matching gfx/pics/.../*.png will use palette gfx/pics/.../normal.pal and be interpreted as animated pics. gfx/pc.2bpp will compress using only the `literal` command. If gfx.yaml doesn't exist, current behavior is unchanged. Filename arguments override yaml. --- pokemontools/gfx.py | 66 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 9 deletions(-) diff --git a/pokemontools/gfx.py b/pokemontools/gfx.py index 3ca8ee8..f80e10e 100644 --- a/pokemontools/gfx.py +++ b/pokemontools/gfx.py @@ -5,6 +5,7 @@ import png from math import sqrt, floor, ceil import argparse +import yaml import operator import configuration @@ -1174,33 +1175,80 @@ def png_to_rgb(palette): return output -def read_filename_arguments(filename): - int_args = { +def read_yaml_arguments(filename, yaml_filename = os.path.join(config.path, 'gfx.yaml'), path_arguments = ['pal_file']): + + parsed_arguments = {} + + # Read arguments from gfx.yaml if it exists. + if os.path.exists(yaml_filename): + yargs = yaml.load(open(yaml_filename)) + dirs = os.path.splitext(filename)[0].split('/') + current_path = os.path.dirname(filename) + path = [] + while yargs: + for key, value in yargs.items(): + # Follow directories to the bottom while picking up keys. + # Try not to mistake other files for keys. + parsed_path = os.path.join( * (path + [key]) ) + for guessed_path in map(parsed_path.__add__, ['', '.png']): + if os.path.exists(guessed_path) or '.' in key: + if guessed_path != filename: + continue + if key in path_arguments: + value = os.path.join(current_path, value) + parsed_arguments[key] = value + if not dirs: + break + yargs = yargs.get(dirs[0], {}) + path.append(dirs.pop(0)) + + return parsed_arguments + +def read_filename_arguments(filename, yaml_filename = os.path.join(config.path, 'gfx.yaml'), path_arguments = ['pal_file']): + """ + Infer graphics conversion arguments given a filename. + + If it exists, ./gfx.yaml is traversed for arguments. + Then additional arguments within the filename (separated with ".") are grabbed. + """ + parsed_arguments = {} + + parsed_arguments.update(read_yaml_arguments( + filename, + yaml_filename = yaml_filename, + path_arguments = path_arguments + )) + + int_arguments = { 'w': 'width', 'h': 'height', 't': 'tile_padding', } - parsed_arguments = {} + # Filename arguments override yaml. arguments = os.path.splitext(filename)[0].split('.')[1:] for argument in arguments: + + # Check for integer arguments first (i.e. "w128"). arg = argument[0] param = argument[1:] if param.isdigit(): - arg = int_args.get(arg, False) + arg = int_arguments.get(arg, False) if arg: parsed_arguments[arg] = int(param) - elif argument == 'interleave': - parsed_arguments['interleave'] = True - elif argument == 'norepeat': - parsed_arguments['norepeat'] = True + elif argument == 'arrange': parsed_arguments['norepeat'] = True parsed_arguments['tilemap'] = True - elif 'x' in argument: + + # Pic dimensions (i.e. "6x6"). + elif 'x' in argument and any(map(str.isdigit, argument)): w, h = argument.split('x') if w.isdigit() and h.isdigit(): parsed_arguments['pic_dimensions'] = (int(w), int(h)) + else: + parsed_arguments[argument] = True + return parsed_arguments From aec2e7ff640b324bdd800cd816b4bafffe133067 Mon Sep 17 00:00:00 2001 From: yenatch Date: Sat, 7 Feb 2015 12:36:22 -0800 Subject: [PATCH 09/13] Refactor get_pic_animation. --- pokemontools/gfx.py | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/pokemontools/gfx.py b/pokemontools/gfx.py index f80e10e..7dc1663 100644 --- a/pokemontools/gfx.py +++ b/pokemontools/gfx.py @@ -1387,40 +1387,38 @@ def get_pic_animation(tmap, w, h): """ Generate pic animation data from a combined tilemap of each frame. """ - frame_text = '' bitmask_text = '' frames = list(split(tmap, w * h)) + base = frames.pop(0) bitmasks = [] - frame_text += ''.join( - '\tdw .frame{0}\n'.format(i + 1) for i, frame in enumerate(frames[1:]) - ) + for i in xrange(len(frames)): + frame_text += '\tdw .frame{}\n'.format(i + 1) - for i, frame in enumerate(frames[1:]): - bitmask = map( - lambda (i, x): - int(x != frames[0][i]), - enumerate(frame) - ) + for i, frame in enumerate(frames): + bitmask = map(operator.eq, frame, base) if bitmask not in bitmasks: bitmasks.append(bitmask) which_bitmask = bitmasks.index(bitmask) - frame_ = [x for _, x in filter(lambda (i, x): bitmask[i], enumerate(frame))] - frame_text += '\n'.join([ - '.frame{0}'.format(i + 1), - '\tdb ${0:02x} ; bitmask'.format(which_bitmask), - ('\tdb ' + ', '.join(map('${0:02x}'.format, frame_))) if frame_ else '', - ]) + '\n' + mask = iter(bitmask) + masked_frame = filter(mask.next, frame) + + frame_text += '.frame{}\n'.format(i + 1) + frame_text += '\tdb ${:02x} ; bitmask\n'.format(which_bitmask) + if masked_frame: + frame_text += '\tdb {}\n'.format(', '.join( + map('${:02x}'.format, masked_frame) + )) + frame_text += '\n' for i, bitmask in enumerate(bitmasks): - bitmask_text += '; {0}\n'.format(i) + bitmask_text += '; {}\n'.format(i) for byte in split(bitmask, 8): - byte.reverse() - byte = int(''.join(map(str, byte)), 2) - bitmask_text += '\tdb %{0:08b}\n'.format(byte) + byte = int(''.join(map(int.__repr__, reversed(byte))), 2) + bitmask_text += '\tdb %{:08b}\n'.format(byte) return frame_text, bitmask_text From 78aa4f00be882b1688c5913400ccf32a3454844a Mon Sep 17 00:00:00 2001 From: yenatch Date: Sat, 7 Feb 2015 12:40:15 -0800 Subject: [PATCH 10/13] Pointless whitespace tweaks. --- pokemontools/gfx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pokemontools/gfx.py b/pokemontools/gfx.py index 7dc1663..25caeba 100644 --- a/pokemontools/gfx.py +++ b/pokemontools/gfx.py @@ -197,7 +197,7 @@ def to_file(filename, data): 'long': 7, # n is now 10 bits for a new control code }) max_length = 1 << 10 # can't go higher than 10 bits -lowmax = 1 << 5 # standard 5-bit param +lowmax = 1 << 5 # standard 5-bit param """ If 0xff is encountered instead of a command, decompression ends. @@ -373,7 +373,7 @@ def compress(self, data=None): if self.literal_only or not any( self.min_scores.get(name, score) - + int(self.scores[name] > lowmax) + + int(score > lowmax) < score for name, score in self.scores.items() ): From 36f7ee513e0a2d3fe7278e58a56d05d1a34177f0 Mon Sep 17 00:00:00 2001 From: yenatch Date: Sat, 7 Feb 2015 13:06:08 -0800 Subject: [PATCH 11/13] Refactor the compressor again. This is a little closer to the target compressor than before. --- pokemontools/gfx.py | 411 ++++++++++++++++++++++++++------------------ 1 file changed, 241 insertions(+), 170 deletions(-) diff --git a/pokemontools/gfx.py b/pokemontools/gfx.py index 25caeba..25f7ce2 100644 --- a/pokemontools/gfx.py +++ b/pokemontools/gfx.py @@ -216,240 +216,311 @@ class Compressed: c = Compressed() c.data = data lz = c.compress() + + There are some issues with reproducing the target compressor. + Some notes are listed here: + - the criteria for detecting a lookback is inconsistent + - sometimes lookbacks that are mostly 0s are pruned, sometimes not + - target appears to skip ahead if it can use a lookback soon, stopping the current command short or in some cases truncating it with literals. + - this has been implemented, but the specifics are unknown + - self.min_scores: It's unknown if blank's minimum score should be 1 or 2. Most likely it's 1, with some other hack to account for edge cases. + - may be related to the above + - target does not appear to compress backwards """ - # The target compressor is not always as efficient as this implementation. - # To ignore compatibility and spit out a smaller blob, pass in small=True. - small = False - - # BUG: literal [00] is a byte longer than blank 1. - # In other words, blank's real minimum score is 1. - # This bug exists in the target compressor as well, - # so don't fix until we've given up on replicating it. - min_scores = { - 'blank': 2, - 'iterate': 2, - 'alternate': 3, - 'repeat': 3, - 'reverse': 3, - 'flip': 3, - } + def __init__(self, *args, **kwargs): - preference = [ - 'repeat', - 'blank', - 'reverse', - 'flip', - 'iterate', - 'alternate', - #'literal', - ] - - data = None - commands = lz_commands - debug = False - literal_only = False + self.min_scores = { + 'blank': 1, + 'iterate': 2, + 'alternate': 3, + 'repeat': 3, + 'reverse': 3, + 'flip': 3, + } + + self.preference = [ + 'repeat', + 'blank', + 'flip', + 'reverse', + 'iterate', + 'alternate', + #'literal', + ] - arg_names = 'data', 'commands', 'debug', 'literal_only' + self.lookback_methods = 'repeat', 'reverse', 'flip' + + self.__dict__.update({ + 'data': None, + 'commands': lz_commands, + 'debug': False, + 'literal_only': False, + }) + + self.arg_names = 'data', 'commands', 'debug', 'literal_only' - def __init__(self, *args, **kwargs): - self.__dict__.update(dict(zip(self.arg_names, args))) self.__dict__.update(kwargs) + self.__dict__.update(dict(zip(self.arg_names, args))) if self.data is not None: self.compress() - def read_byte(self, address=None): - if address is None: - address = self.address - if 0 <= address < len(self.data): - return self.data[address] - return None - - def reset_scores(self): - self.scores = {} - self.offsets = {} - for method in self.min_scores.keys(): - self.scores[method] = 0 + def compress(self, data=None): + if data is not None: + self.data = data - def score_literal(self, method): - address = self.address - compare = { - 'blank': [0], - 'iterate': [self.read_byte(address)], - 'alternate': [self.read_byte(address), self.read_byte(address + 1)], - }[method] - length = 0 - while self.read_byte(address) == compare[length % len(compare)]: - length += 1 - address += 1 - self.scores[method] = length - return compare + self.data = list(bytearray(self.data)) - def precompute_repeat_matches(self): - """ - This is faster than redundantly searching each time repeats are scored. - """ self.indexes = {} - for byte in xrange(0x100): - self.indexes[byte] = [] - index = -1 - while 1: - try: - index = self.data.index(byte, index + 1) - except ValueError: - break - self.indexes[byte].append(index) + self.lookbacks = {} + for method in self.lookback_methods: + self.lookbacks[method] = {} - def score_repeats(self, name, direction=1, mutate=int): + self.address = 0 + self.end = len(self.data) + self.output = [] + self.literal = None - address = self.address - byte = mutate(self.data[address]) + while self.address < self.end: - for index in self.indexes[byte]: - if index >= address: break + if self.score(): + self.do_literal() + self.do_winner() - length = 1 # we already know the first byte matches - while 1: - byte = self.read_byte(index + length * direction) - if byte == None or mutate(byte) != self.read_byte(address + length): - break - length += 1 + else: + if self.literal == None: + self.literal = self.address + self.address += 1 - # If repeats are almost entirely zeroes, just keep going and use blank instead. - if all(x == 0 for x in self.data[ address + 2 : address + length ]): - if self.read_byte(address + length) == 0: - # zeroes continue after this chunk - continue + self.do_literal() - # Adjust the score for two-byte offsets. - two_byte_index = index < address - 0x7f - if self.scores[name] >= length - int(two_byte_index): - continue + self.output += [lz_end] + return self.output - self.scores [name] = length - self.offsets[name] = index + def reset_scores(self): + self.scores = {} + self.offsets = {} + self.helpers = {} + for method in self.min_scores.iterkeys(): + self.scores[method] = 0 - def compress(self, data=None): - """ - This algorithm is greedy. - It aims to match the compressor it's based on as closely as possible. - It doesn't, but in the meantime the output is smaller. - """ + def bit_flip(self, byte): + return bit_flipped[byte] - if data is not None: - self.data = data + def do_literal(self): + if self.literal != None: + length = abs(self.address - self.literal) + start = min(self.literal, self.address + 1) + self.helpers['literal'] = self.data[start:start+length] + self.do_cmd('literal', length) + self.literal = None - self.data = list(bytearray(self.data)) + def score(self): + self.reset_scores() - self.address = 0 - self.end = len(self.data) - self.output = [] - self.literal = [] - self.precompute_repeat_matches() + map(self.score_literal, ['iterate', 'alternate', 'blank']) - while self.address < self.end: + for method in self.lookback_methods: + self.scores[method], self.offsets[method] = self.find_lookback(method, self.address) + + # Compatibility: + # If a lookback is close, reduce the scores of other commands + best_method, best_score = max( + self.scores.items(), + key = lambda x: ( + x[1], + -self.preference.index(x[0]) + ) + ) + for method in self.lookback_methods: + for address in xrange(self.address+1, self.address+min(best_score, 6)): + if self.find_lookback(method, address)[0] > max(self.min_scores[method], best_score): + # BUG: lookbacks can reduce themselves. This appears to be a bug in the target also. + for m, score in self.scores.items(): + self.scores[m] = min(score, address - self.address) + + return any( + score + > self.min_scores[method] + int(score > lowmax) + for method, score in self.scores.iteritems() + ) + + def read(self, address=None): + if address is None: + address = self.address + if 0 <= address < len(self.data): + return self.data[address] + return None - # Tally up the number of bytes that can be compressed - # by a single command from the current address. + def find_all_lookbacks(self): + for method in self.lookback_methods: + for address, byte in enumerate(self.data): + self.find_lookback(method, address) - self.reset_scores() + def find_lookback(self, method, address=None): + if address is None: + address = self.address - # Check for repetition. Alternating bytes are common since graphics data is planar. + existing = self.lookbacks.get(method, {}).get(address) + if existing != None: + return existing - _, self.iter, self.alts = map(self.score_literal, ['blank', 'iterate', 'alternate']) + lookback = 0, None - # Check if we can repeat any data that the decompressor just output (here, the input data). - # This includes the current command's output. + # Better to not carelessly optimize at the moment. + """ + if address < 2: + return lookback + """ - for args in [ - ('repeat', 1, int), - ('reverse', -1, int), - ('flip', 1, self.bit_flip), - ]: - self.score_repeats(*args) + byte = self.read(address) + if byte is None: + return lookback - # If the scores are too low, try again from the next byte. + direction, mutate = { + 'repeat': ( 1, int), + 'reverse': (-1, int), + 'flip': ( 1, self.bit_flip), + }[method] - if self.literal_only or not any( - self.min_scores.get(name, score) - + int(score > lowmax) - < score - for name, score in self.scores.items() - ): - self.literal += [self.read_byte()] - self.address += 1 + # Doesn't seem to help + """ + if mutate == self.bit_flip: + if byte == 0: + self.lookbacks[method][address] = lookback + return lookback + """ + + data_len = len(self.data) + is_two_byte_index = lambda index: int(index < address - 0x7f) + + for index in self.get_indexes(mutate(byte)): + if index >= address: + break + + old_length, old_index = lookback + if direction == 1: + if old_length > data_len - index: break else: - self.do_literal() # payload - self.do_scored() + if old_length > index: continue - # unload any literals we're sitting on - self.do_literal() + if self.read(index) in [None]: continue - self.output += [lz_end] + length = 1 # we know there's at least one match, or we wouldn't be checking this index + while 1: + this_byte = self.read(address + length) + that_byte = self.read(index + length * direction) + if that_byte == None or this_byte != mutate(that_byte): + break + length += 1 + """ + if direction == 1: + if not any(self.data[address+2:address+length]): continue + """ + if length - is_two_byte_index(index) >= old_length - is_two_byte_index(old_index): # XXX >? + # XXX maybe avoid two-byte indexes when possible + lookback = length, index + + self.lookbacks[method][address] = lookback + return lookback + + def get_indexes(self, byte): + if not self.indexes.has_key(byte): + self.indexes[byte] = [] + index = -1 + while 1: + try: + index = self.data.index(byte, index + 1) + except ValueError: + break + self.indexes[byte].append(index) + return self.indexes[byte] - return self.output + def score_literal(self, method): + address = self.address - def bit_flip(self, byte): - return bit_flipped[byte] + compare = { + 'blank': [0], + 'iterate': [self.read(address)], + 'alternate': [self.read(address), self.read(address + 1)], + }[method] - def do_literal(self): - if self.literal: - length = len(self.literal) - self.do_cmd('literal', length) - self.literal = [] + # XXX may or may not be correct + if method == 'alternate' and compare[0] == 0: + return - def do_scored(self): - # Which command will compress the longest chunk? - winner, score = sorted( - self.scores.items(), - key = lambda (name, score): ( - -(score - self.min_scores[name] - int(score > lowmax)), - self.preference.index(name) + length = 0 + while self.read(address + length) == compare[length % len(compare)]: + length += 1 + + self.scores[method] = length + self.helpers[method] = compare + + def do_winner(self): + winners = filter( + lambda (method, score): + score + > self.min_scores[method] + int(score > lowmax), + self.scores.iteritems() + ) + winners.sort( + key = lambda (method, score): ( + -(score - self.min_scores[method] - int(score > lowmax)), + self.preference.index(method) ) - )[0] - length = self.do_cmd(winner, score) + ) + winner, score = winners[0] + + length = min(score, max_length) + self.do_cmd(winner, length) self.address += length def do_cmd(self, cmd, length): - length = min(length, max_length) + start_address = self.address + cmd_length = length - 1 output = [] if length > lowmax: - output += [(self.commands['long'] << 5) + (self.commands[cmd] << 2) + (cmd_length >> 8)] - output += [cmd_length & 0xff] + output.append( + (self.commands['long'] << 5) + + (self.commands[cmd] << 2) + + (cmd_length >> 8) + ) + output.append( + cmd_length & 0xff + ) else: - output += [(self.commands[cmd] << 5) + cmd_length] + output.append( + (self.commands[cmd] << 5) + + cmd_length + ) - output += { - 'literal': self.literal, - 'iterate': self.iter, - 'alternate': self.alts, - 'blank': [], - }.get(cmd, []) + self.helpers['blank'] = [] # quick hack + output += self.helpers.get(cmd, []) - if cmd in ['repeat', 'reverse', 'flip']: + if cmd in self.lookback_methods: offset = self.offsets[cmd] # Negative offsets are one byte. # Positive offsets are two. - if self.address - offset <= 0x7f: - offset = self.address - offset + 0x80 - offset -= 1 # this is a hack, but it seems to work + if start_address - offset <= 0x7f: + offset = start_address - offset + 0x80 + offset -= 1 # this seems to work output += [offset] else: output += [offset / 0x100, offset % 0x100] # big endian if self.debug: - print ( + print ' '.join(map(str, [ cmd, length, '\t', - ' '.join(map('{:02x}'.format, output)) - ) + ' '.join(map('{:02x}'.format, output)), + self.data[start_address:start_address+length] if cmd in self.lookback_methods else '', + ])) self.output += output - return length From fb23f2754c9b94eac871a33621ce43fa71b53170 Mon Sep 17 00:00:00 2001 From: yenatch Date: Sat, 7 Feb 2015 13:11:38 -0800 Subject: [PATCH 12/13] Split the lz compression tools out of gfx.py. --- pokemontools/gfx.py | 562 +------------------------------------------ pokemontools/lz.py | 566 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 567 insertions(+), 561 deletions(-) create mode 100644 pokemontools/lz.py diff --git a/pokemontools/gfx.py b/pokemontools/gfx.py index 25f7ce2..b2244a5 100644 --- a/pokemontools/gfx.py +++ b/pokemontools/gfx.py @@ -15,13 +15,9 @@ import trainers import romstr +from lz import Compressed, Decompressed -bit_flipped = [ - sum(((byte >> i) & 1) << (7 - i) for i in xrange(8)) - for byte in xrange(0x100) -] - def load_rom(filename=config.rom_path): rom = romstr.RomStr.load(filename=filename) @@ -165,562 +161,6 @@ def to_file(filename, data): -""" -A rundown of Pokemon Crystal's compression scheme: - -Control commands occupy bits 5-7. -Bits 0-4 serve as the first parameter for each command. -""" -lz_commands = { - 'literal': 0, # n values for n bytes - 'iterate': 1, # one value for n bytes - 'alternate': 2, # alternate two values for n bytes - 'blank': 3, # zero for n bytes -} - -""" -Repeater commands repeat any data that was just decompressed. -They take an additional signed parameter to mark a relative starting point. -These wrap around (positive from the start, negative from the current position). -""" -lz_commands.update({ - 'repeat': 4, # n bytes starting from s - 'flip': 5, # n bytes in reverse bit order starting from s - 'reverse': 6, # n bytes backwards starting from s -}) - -""" -The long command is used when 5 bits aren't enough. Bits 2-4 contain a new control code. -Bits 0-1 are appended to a new byte as 8-9, allowing a 10-bit parameter. -""" -lz_commands.update({ - 'long': 7, # n is now 10 bits for a new control code -}) -max_length = 1 << 10 # can't go higher than 10 bits -lowmax = 1 << 5 # standard 5-bit param - -""" -If 0xff is encountered instead of a command, decompression ends. -""" -lz_end = 0xff - - -class Compressed: - - """ - Usage: - lz = Compressed(data).output - or - lz = Compressed().compress(data) - or - c = Compressed() - c.data = data - lz = c.compress() - - There are some issues with reproducing the target compressor. - Some notes are listed here: - - the criteria for detecting a lookback is inconsistent - - sometimes lookbacks that are mostly 0s are pruned, sometimes not - - target appears to skip ahead if it can use a lookback soon, stopping the current command short or in some cases truncating it with literals. - - this has been implemented, but the specifics are unknown - - self.min_scores: It's unknown if blank's minimum score should be 1 or 2. Most likely it's 1, with some other hack to account for edge cases. - - may be related to the above - - target does not appear to compress backwards - """ - - def __init__(self, *args, **kwargs): - - self.min_scores = { - 'blank': 1, - 'iterate': 2, - 'alternate': 3, - 'repeat': 3, - 'reverse': 3, - 'flip': 3, - } - - self.preference = [ - 'repeat', - 'blank', - 'flip', - 'reverse', - 'iterate', - 'alternate', - #'literal', - ] - - self.lookback_methods = 'repeat', 'reverse', 'flip' - - self.__dict__.update({ - 'data': None, - 'commands': lz_commands, - 'debug': False, - 'literal_only': False, - }) - - self.arg_names = 'data', 'commands', 'debug', 'literal_only' - - self.__dict__.update(kwargs) - self.__dict__.update(dict(zip(self.arg_names, args))) - - if self.data is not None: - self.compress() - - def compress(self, data=None): - if data is not None: - self.data = data - - self.data = list(bytearray(self.data)) - - self.indexes = {} - self.lookbacks = {} - for method in self.lookback_methods: - self.lookbacks[method] = {} - - self.address = 0 - self.end = len(self.data) - self.output = [] - self.literal = None - - while self.address < self.end: - - if self.score(): - self.do_literal() - self.do_winner() - - else: - if self.literal == None: - self.literal = self.address - self.address += 1 - - self.do_literal() - - self.output += [lz_end] - return self.output - - def reset_scores(self): - self.scores = {} - self.offsets = {} - self.helpers = {} - for method in self.min_scores.iterkeys(): - self.scores[method] = 0 - - def bit_flip(self, byte): - return bit_flipped[byte] - - def do_literal(self): - if self.literal != None: - length = abs(self.address - self.literal) - start = min(self.literal, self.address + 1) - self.helpers['literal'] = self.data[start:start+length] - self.do_cmd('literal', length) - self.literal = None - - def score(self): - self.reset_scores() - - map(self.score_literal, ['iterate', 'alternate', 'blank']) - - for method in self.lookback_methods: - self.scores[method], self.offsets[method] = self.find_lookback(method, self.address) - - # Compatibility: - # If a lookback is close, reduce the scores of other commands - best_method, best_score = max( - self.scores.items(), - key = lambda x: ( - x[1], - -self.preference.index(x[0]) - ) - ) - for method in self.lookback_methods: - for address in xrange(self.address+1, self.address+min(best_score, 6)): - if self.find_lookback(method, address)[0] > max(self.min_scores[method], best_score): - # BUG: lookbacks can reduce themselves. This appears to be a bug in the target also. - for m, score in self.scores.items(): - self.scores[m] = min(score, address - self.address) - - return any( - score - > self.min_scores[method] + int(score > lowmax) - for method, score in self.scores.iteritems() - ) - - def read(self, address=None): - if address is None: - address = self.address - if 0 <= address < len(self.data): - return self.data[address] - return None - - def find_all_lookbacks(self): - for method in self.lookback_methods: - for address, byte in enumerate(self.data): - self.find_lookback(method, address) - - def find_lookback(self, method, address=None): - if address is None: - address = self.address - - existing = self.lookbacks.get(method, {}).get(address) - if existing != None: - return existing - - lookback = 0, None - - # Better to not carelessly optimize at the moment. - """ - if address < 2: - return lookback - """ - - byte = self.read(address) - if byte is None: - return lookback - - direction, mutate = { - 'repeat': ( 1, int), - 'reverse': (-1, int), - 'flip': ( 1, self.bit_flip), - }[method] - - # Doesn't seem to help - """ - if mutate == self.bit_flip: - if byte == 0: - self.lookbacks[method][address] = lookback - return lookback - """ - - data_len = len(self.data) - is_two_byte_index = lambda index: int(index < address - 0x7f) - - for index in self.get_indexes(mutate(byte)): - - if index >= address: - break - - old_length, old_index = lookback - if direction == 1: - if old_length > data_len - index: break - else: - if old_length > index: continue - - if self.read(index) in [None]: continue - - length = 1 # we know there's at least one match, or we wouldn't be checking this index - while 1: - this_byte = self.read(address + length) - that_byte = self.read(index + length * direction) - if that_byte == None or this_byte != mutate(that_byte): - break - length += 1 - """ - if direction == 1: - if not any(self.data[address+2:address+length]): continue - """ - if length - is_two_byte_index(index) >= old_length - is_two_byte_index(old_index): # XXX >? - # XXX maybe avoid two-byte indexes when possible - lookback = length, index - - self.lookbacks[method][address] = lookback - return lookback - - def get_indexes(self, byte): - if not self.indexes.has_key(byte): - self.indexes[byte] = [] - index = -1 - while 1: - try: - index = self.data.index(byte, index + 1) - except ValueError: - break - self.indexes[byte].append(index) - return self.indexes[byte] - - def score_literal(self, method): - address = self.address - - compare = { - 'blank': [0], - 'iterate': [self.read(address)], - 'alternate': [self.read(address), self.read(address + 1)], - }[method] - - # XXX may or may not be correct - if method == 'alternate' and compare[0] == 0: - return - - length = 0 - while self.read(address + length) == compare[length % len(compare)]: - length += 1 - - self.scores[method] = length - self.helpers[method] = compare - - def do_winner(self): - winners = filter( - lambda (method, score): - score - > self.min_scores[method] + int(score > lowmax), - self.scores.iteritems() - ) - winners.sort( - key = lambda (method, score): ( - -(score - self.min_scores[method] - int(score > lowmax)), - self.preference.index(method) - ) - ) - winner, score = winners[0] - - length = min(score, max_length) - self.do_cmd(winner, length) - self.address += length - - def do_cmd(self, cmd, length): - start_address = self.address - - cmd_length = length - 1 - - output = [] - - if length > lowmax: - output.append( - (self.commands['long'] << 5) - + (self.commands[cmd] << 2) - + (cmd_length >> 8) - ) - output.append( - cmd_length & 0xff - ) - else: - output.append( - (self.commands[cmd] << 5) - + cmd_length - ) - - self.helpers['blank'] = [] # quick hack - output += self.helpers.get(cmd, []) - - if cmd in self.lookback_methods: - offset = self.offsets[cmd] - # Negative offsets are one byte. - # Positive offsets are two. - if start_address - offset <= 0x7f: - offset = start_address - offset + 0x80 - offset -= 1 # this seems to work - output += [offset] - else: - output += [offset / 0x100, offset % 0x100] # big endian - - if self.debug: - print ' '.join(map(str, [ - cmd, length, '\t', - ' '.join(map('{:02x}'.format, output)), - self.data[start_address:start_address+length] if cmd in self.lookback_methods else '', - ])) - - self.output += output - - - -class Decompressed: - """ - Interpret and decompress lz-compressed data, usually 2bpp. - """ - - """ - Usage: - data = Decompressed(lz).output - or - data = Decompressed().decompress(lz) - or - d = Decompressed() - d.lz = lz - data = d.decompress() - - To decompress from offset 0x80000 in a rom: - data = Decompressed(rom, start=0x80000).output - """ - - lz = None - start = 0 - commands = lz_commands - debug = False - - arg_names = 'lz', 'start', 'commands', 'debug' - - def __init__(self, *args, **kwargs): - self.__dict__.update(dict(zip(self.arg_names, args))) - self.__dict__.update(kwargs) - - self.command_names = dict(map(reversed, self.commands.items())) - self.address = self.start - - if self.lz is not None: - self.decompress() - - if self.debug: print self.command_list() - - - def command_list(self): - """ - Print a list of commands that were used. Useful for debugging. - """ - - text = '' - - for name, attrs in self.used_commands: - length = attrs['length'] - address = attrs['address'] - offset = attrs['offset'] - direction = attrs['direction'] - - text += '{0}: {1}'.format(name, length) - text += '\t' + ' '.join( - '{:02x}'.format(int(byte)) - for byte in self.lz[ address : address + attrs['cmd_length'] ] - ) - - if offset is not None: - repeated_data = self.output[ offset : offset + length * direction : direction ] - text += ' [' + ' '.join(map('{:02x}'.format, repeated_data)) + ']' - - text += '\n' - - return text - - - def decompress(self, lz=None): - - if lz is not None: - self.lz = lz - - self.lz = bytearray(self.lz) - - self.used_commands = [] - self.output = [] - - while 1: - - cmd_address = self.address - self.offset = None - self.direction = None - - if (self.byte == lz_end): - self.next() - break - - self.cmd = (self.byte & 0b11100000) >> 5 - - if self.cmd_name == 'long': - # 10-bit length - self.cmd = (self.byte & 0b00011100) >> 2 - self.length = (self.next() & 0b00000011) * 0x100 - self.length += self.next() + 1 - else: - # 5-bit length - self.length = (self.next() & 0b00011111) + 1 - - self.__class__.__dict__[self.cmd_name](self) - - self.used_commands += [( - self.cmd_name, - { - 'length': self.length, - 'address': cmd_address, - 'offset': self.offset, - 'cmd_length': self.address - cmd_address, - 'direction': self.direction, - } - )] - - # Keep track of the data we just decompressed. - self.compressed_data = self.lz[self.start : self.address] - - - @property - def byte(self): - return self.lz[ self.address ] - - def next(self): - byte = self.byte - self.address += 1 - return byte - - @property - def cmd_name(self): - return self.command_names.get(self.cmd) - - - def get_offset(self): - - if self.byte >= 0x80: # negative - # negative - offset = self.next() & 0x7f - offset = len(self.output) - offset - 1 - else: - # positive - offset = self.next() * 0x100 - offset += self.next() - - self.offset = offset - - - def literal(self): - """ - Copy data directly. - """ - self.output += self.lz[ self.address : self.address + self.length ] - self.address += self.length - - def iterate(self): - """ - Write one byte repeatedly. - """ - self.output += [self.next()] * self.length - - def alternate(self): - """ - Write alternating bytes. - """ - alts = [self.next(), self.next()] - self.output += [ alts[x & 1] for x in xrange(self.length) ] - - def blank(self): - """ - Write zeros. - """ - self.output += [0] * self.length - - def flip(self): - """ - Repeat flipped bytes from output. - - Example: 11100100 -> 00100111 - """ - self._repeat(table=bit_flipped) - - def reverse(self): - """ - Repeat reversed bytes from output. - """ - self._repeat(direction=-1) - - def repeat(self): - """ - Repeat bytes from output. - """ - self._repeat() - - def _repeat(self, direction=1, table=None): - self.get_offset() - self.direction = direction - # Note: appends must be one at a time (this way, repeats can draw from themselves if required) - for i in xrange(self.length): - byte = self.output[ self.offset + i * direction ] - self.output.append( table[byte] if table else byte ) - sizes = [ diff --git a/pokemontools/lz.py b/pokemontools/lz.py new file mode 100644 index 0000000..2f30ddf --- /dev/null +++ b/pokemontools/lz.py @@ -0,0 +1,566 @@ +# -*- coding: utf-8 -*- +""" +Pokemon Crystal data de/compression. +""" + +""" +A rundown of Pokemon Crystal's compression scheme: + +Control commands occupy bits 5-7. +Bits 0-4 serve as the first parameter for each command. +""" +lz_commands = { + 'literal': 0, # n values for n bytes + 'iterate': 1, # one value for n bytes + 'alternate': 2, # alternate two values for n bytes + 'blank': 3, # zero for n bytes +} + +""" +Repeater commands repeat any data that was just decompressed. +They take an additional signed parameter to mark a relative starting point. +These wrap around (positive from the start, negative from the current position). +""" +lz_commands.update({ + 'repeat': 4, # n bytes starting from s + 'flip': 5, # n bytes in reverse bit order starting from s + 'reverse': 6, # n bytes backwards starting from s +}) + +""" +The long command is used when 5 bits aren't enough. Bits 2-4 contain a new control code. +Bits 0-1 are appended to a new byte as 8-9, allowing a 10-bit parameter. +""" +lz_commands.update({ + 'long': 7, # n is now 10 bits for a new control code +}) +max_length = 1 << 10 # can't go higher than 10 bits +lowmax = 1 << 5 # standard 5-bit param + +""" +If 0xff is encountered instead of a command, decompression ends. +""" +lz_end = 0xff + + +bit_flipped = [ + sum(((byte >> i) & 1) << (7 - i) for i in xrange(8)) + for byte in xrange(0x100) +] + + +class Compressed: + + """ + Usage: + lz = Compressed(data).output + or + lz = Compressed().compress(data) + or + c = Compressed() + c.data = data + lz = c.compress() + + There are some issues with reproducing the target compressor. + Some notes are listed here: + - the criteria for detecting a lookback is inconsistent + - sometimes lookbacks that are mostly 0s are pruned, sometimes not + - target appears to skip ahead if it can use a lookback soon, stopping the current command short or in some cases truncating it with literals. + - this has been implemented, but the specifics are unknown + - self.min_scores: It's unknown if blank's minimum score should be 1 or 2. Most likely it's 1, with some other hack to account for edge cases. + - may be related to the above + - target does not appear to compress backwards + """ + + def __init__(self, *args, **kwargs): + + self.min_scores = { + 'blank': 1, + 'iterate': 2, + 'alternate': 3, + 'repeat': 3, + 'reverse': 3, + 'flip': 3, + } + + self.preference = [ + 'repeat', + 'blank', + 'flip', + 'reverse', + 'iterate', + 'alternate', + #'literal', + ] + + self.lookback_methods = 'repeat', 'reverse', 'flip' + + self.__dict__.update({ + 'data': None, + 'commands': lz_commands, + 'debug': False, + 'literal_only': False, + }) + + self.arg_names = 'data', 'commands', 'debug', 'literal_only' + + self.__dict__.update(kwargs) + self.__dict__.update(dict(zip(self.arg_names, args))) + + if self.data is not None: + self.compress() + + def compress(self, data=None): + if data is not None: + self.data = data + + self.data = list(bytearray(self.data)) + + self.indexes = {} + self.lookbacks = {} + for method in self.lookback_methods: + self.lookbacks[method] = {} + + self.address = 0 + self.end = len(self.data) + self.output = [] + self.literal = None + + while self.address < self.end: + + if self.score(): + self.do_literal() + self.do_winner() + + else: + if self.literal == None: + self.literal = self.address + self.address += 1 + + self.do_literal() + + self.output += [lz_end] + return self.output + + def reset_scores(self): + self.scores = {} + self.offsets = {} + self.helpers = {} + for method in self.min_scores.iterkeys(): + self.scores[method] = 0 + + def bit_flip(self, byte): + return bit_flipped[byte] + + def do_literal(self): + if self.literal != None: + length = abs(self.address - self.literal) + start = min(self.literal, self.address + 1) + self.helpers['literal'] = self.data[start:start+length] + self.do_cmd('literal', length) + self.literal = None + + def score(self): + self.reset_scores() + + map(self.score_literal, ['iterate', 'alternate', 'blank']) + + for method in self.lookback_methods: + self.scores[method], self.offsets[method] = self.find_lookback(method, self.address) + + # Compatibility: + # If a lookback is close, reduce the scores of other commands + best_method, best_score = max( + self.scores.items(), + key = lambda x: ( + x[1], + -self.preference.index(x[0]) + ) + ) + for method in self.lookback_methods: + for address in xrange(self.address+1, self.address+min(best_score, 6)): + if self.find_lookback(method, address)[0] > max(self.min_scores[method], best_score): + # BUG: lookbacks can reduce themselves. This appears to be a bug in the target also. + for m, score in self.scores.items(): + self.scores[m] = min(score, address - self.address) + + return any( + score + > self.min_scores[method] + int(score > lowmax) + for method, score in self.scores.iteritems() + ) + + def read(self, address=None): + if address is None: + address = self.address + if 0 <= address < len(self.data): + return self.data[address] + return None + + def find_all_lookbacks(self): + for method in self.lookback_methods: + for address, byte in enumerate(self.data): + self.find_lookback(method, address) + + def find_lookback(self, method, address=None): + if address is None: + address = self.address + + existing = self.lookbacks.get(method, {}).get(address) + if existing != None: + return existing + + lookback = 0, None + + # Better to not carelessly optimize at the moment. + """ + if address < 2: + return lookback + """ + + byte = self.read(address) + if byte is None: + return lookback + + direction, mutate = { + 'repeat': ( 1, int), + 'reverse': (-1, int), + 'flip': ( 1, self.bit_flip), + }[method] + + # Doesn't seem to help + """ + if mutate == self.bit_flip: + if byte == 0: + self.lookbacks[method][address] = lookback + return lookback + """ + + data_len = len(self.data) + is_two_byte_index = lambda index: int(index < address - 0x7f) + + for index in self.get_indexes(mutate(byte)): + + if index >= address: + break + + old_length, old_index = lookback + if direction == 1: + if old_length > data_len - index: break + else: + if old_length > index: continue + + if self.read(index) in [None]: continue + + length = 1 # we know there's at least one match, or we wouldn't be checking this index + while 1: + this_byte = self.read(address + length) + that_byte = self.read(index + length * direction) + if that_byte == None or this_byte != mutate(that_byte): + break + length += 1 + """ + if direction == 1: + if not any(self.data[address+2:address+length]): continue + """ + if length - is_two_byte_index(index) >= old_length - is_two_byte_index(old_index): # XXX >? + # XXX maybe avoid two-byte indexes when possible + lookback = length, index + + self.lookbacks[method][address] = lookback + return lookback + + def get_indexes(self, byte): + if not self.indexes.has_key(byte): + self.indexes[byte] = [] + index = -1 + while 1: + try: + index = self.data.index(byte, index + 1) + except ValueError: + break + self.indexes[byte].append(index) + return self.indexes[byte] + + def score_literal(self, method): + address = self.address + + compare = { + 'blank': [0], + 'iterate': [self.read(address)], + 'alternate': [self.read(address), self.read(address + 1)], + }[method] + + # XXX may or may not be correct + if method == 'alternate' and compare[0] == 0: + return + + length = 0 + while self.read(address + length) == compare[length % len(compare)]: + length += 1 + + self.scores[method] = length + self.helpers[method] = compare + + def do_winner(self): + winners = filter( + lambda (method, score): + score + > self.min_scores[method] + int(score > lowmax), + self.scores.iteritems() + ) + winners.sort( + key = lambda (method, score): ( + -(score - self.min_scores[method] - int(score > lowmax)), + self.preference.index(method) + ) + ) + winner, score = winners[0] + + length = min(score, max_length) + self.do_cmd(winner, length) + self.address += length + + def do_cmd(self, cmd, length): + start_address = self.address + + cmd_length = length - 1 + + output = [] + + if length > lowmax: + output.append( + (self.commands['long'] << 5) + + (self.commands[cmd] << 2) + + (cmd_length >> 8) + ) + output.append( + cmd_length & 0xff + ) + else: + output.append( + (self.commands[cmd] << 5) + + cmd_length + ) + + self.helpers['blank'] = [] # quick hack + output += self.helpers.get(cmd, []) + + if cmd in self.lookback_methods: + offset = self.offsets[cmd] + # Negative offsets are one byte. + # Positive offsets are two. + if start_address - offset <= 0x7f: + offset = start_address - offset + 0x80 + offset -= 1 # this seems to work + output += [offset] + else: + output += [offset / 0x100, offset % 0x100] # big endian + + if self.debug: + print ' '.join(map(str, [ + cmd, length, '\t', + ' '.join(map('{:02x}'.format, output)), + self.data[start_address:start_address+length] if cmd in self.lookback_methods else '', + ])) + + self.output += output + + + +class Decompressed: + """ + Interpret and decompress lz-compressed data, usually 2bpp. + """ + + """ + Usage: + data = Decompressed(lz).output + or + data = Decompressed().decompress(lz) + or + d = Decompressed() + d.lz = lz + data = d.decompress() + + To decompress from offset 0x80000 in a rom: + data = Decompressed(rom, start=0x80000).output + """ + + lz = None + start = 0 + commands = lz_commands + debug = False + + arg_names = 'lz', 'start', 'commands', 'debug' + + def __init__(self, *args, **kwargs): + self.__dict__.update(dict(zip(self.arg_names, args))) + self.__dict__.update(kwargs) + + self.command_names = dict(map(reversed, self.commands.items())) + self.address = self.start + + if self.lz is not None: + self.decompress() + + if self.debug: print self.command_list() + + + def command_list(self): + """ + Print a list of commands that were used. Useful for debugging. + """ + + text = '' + + for name, attrs in self.used_commands: + length = attrs['length'] + address = attrs['address'] + offset = attrs['offset'] + direction = attrs['direction'] + + text += '{0}: {1}'.format(name, length) + text += '\t' + ' '.join( + '{:02x}'.format(int(byte)) + for byte in self.lz[ address : address + attrs['cmd_length'] ] + ) + + if offset is not None: + repeated_data = self.output[ offset : offset + length * direction : direction ] + text += ' [' + ' '.join(map('{:02x}'.format, repeated_data)) + ']' + + text += '\n' + + return text + + + def decompress(self, lz=None): + + if lz is not None: + self.lz = lz + + self.lz = bytearray(self.lz) + + self.used_commands = [] + self.output = [] + + while 1: + + cmd_address = self.address + self.offset = None + self.direction = None + + if (self.byte == lz_end): + self.next() + break + + self.cmd = (self.byte & 0b11100000) >> 5 + + if self.cmd_name == 'long': + # 10-bit length + self.cmd = (self.byte & 0b00011100) >> 2 + self.length = (self.next() & 0b00000011) * 0x100 + self.length += self.next() + 1 + else: + # 5-bit length + self.length = (self.next() & 0b00011111) + 1 + + self.__class__.__dict__[self.cmd_name](self) + + self.used_commands += [( + self.cmd_name, + { + 'length': self.length, + 'address': cmd_address, + 'offset': self.offset, + 'cmd_length': self.address - cmd_address, + 'direction': self.direction, + } + )] + + # Keep track of the data we just decompressed. + self.compressed_data = self.lz[self.start : self.address] + + + @property + def byte(self): + return self.lz[ self.address ] + + def next(self): + byte = self.byte + self.address += 1 + return byte + + @property + def cmd_name(self): + return self.command_names.get(self.cmd) + + + def get_offset(self): + + if self.byte >= 0x80: # negative + # negative + offset = self.next() & 0x7f + offset = len(self.output) - offset - 1 + else: + # positive + offset = self.next() * 0x100 + offset += self.next() + + self.offset = offset + + + def literal(self): + """ + Copy data directly. + """ + self.output += self.lz[ self.address : self.address + self.length ] + self.address += self.length + + def iterate(self): + """ + Write one byte repeatedly. + """ + self.output += [self.next()] * self.length + + def alternate(self): + """ + Write alternating bytes. + """ + alts = [self.next(), self.next()] + self.output += [ alts[x & 1] for x in xrange(self.length) ] + + def blank(self): + """ + Write zeros. + """ + self.output += [0] * self.length + + def flip(self): + """ + Repeat flipped bytes from output. + + Example: 11100100 -> 00100111 + """ + self._repeat(table=bit_flipped) + + def reverse(self): + """ + Repeat reversed bytes from output. + """ + self._repeat(direction=-1) + + def repeat(self): + """ + Repeat bytes from output. + """ + self._repeat() + + def _repeat(self, direction=1, table=None): + self.get_offset() + self.direction = direction + # Note: appends must be one at a time (this way, repeats can draw from themselves if required) + for i in xrange(self.length): + byte = self.output[ self.offset + i * direction ] + self.output.append( table[byte] if table else byte ) From 8d86abe8823acc8c44780ae15646ebb7f719c5ec Mon Sep 17 00:00:00 2001 From: yenatch Date: Wed, 11 Mar 2015 22:43:04 -0700 Subject: [PATCH 13/13] hotfix: gfx.py doesn't need baserom.gbc to work --- pokemontools/gfx.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pokemontools/gfx.py b/pokemontools/gfx.py index 53fc039..066811a 100644 --- a/pokemontools/gfx.py +++ b/pokemontools/gfx.py @@ -934,12 +934,15 @@ def get_pic_animation(tmap, w, h): return frame_text, bitmask_text -def dump_pic_animations(addresses={'bitmasks': 'BitmasksPointers', 'frames': 'FramesPointers'}, pokemon=pokemon_constants, rom=load_rom()): +def dump_pic_animations(addresses={'bitmasks': 'BitmasksPointers', 'frames': 'FramesPointers'}, pokemon=pokemon_constants, rom=None): """ The code to dump pic animations from rom is mysteriously absent. Here it is again, but now it dumps images instead of text. Said text can then be derived from the images. """ + + if rom is None: rom = load_rom() + # Labels can be passed in instead of raw addresses. for which, offset in addresses.items(): if type(offset) is str: