#!python """ This program converts SVG files to raw Unicode strings and back again It was written for a twitter image encoding competition on Stack Overflow http://stackoverflow.com/questions/891643/twitter-image-encoding-challenge Copyright: SpliFF License: Public Domain Requirements: Python 2.5+ lxml library bitfields module (http://www.warriorhut.org/graphics/svg_to_unicode/bitfields.py) python built for wide unicode characters (UCS2) Usage: python2.5 svg_to_unicode-0.2.py [-v] encode -i in.svg -o unicode.txt python2.5 svg_to_unicode-0.2.py [-v] decode -i unicode.txt -o out.svg -v: verbose -i: input file (default stdin) -o: output file (default stdout) action: encode / decode (default encode) How it works: When tracing bitmaps most libraries will give only 4 types of information we really need to send: 1.) width and height of document (possibly optional since this is vector graphics) 2.) fill color of each path 3.) commands on each path (basically handle types like corner, curve) 4.) point values (where to place the commands) So this script first dives in with an XML parser and grabs (1). It then walks each path node in the XML and grabs (2), (3) and (4). To better compact the data it then compresses the range of (4) from a float down to a maximum integer value of 127 so it can pack two points into each unicode character. Finally the script increases packed values by about 5000 to avoid a conflict with the values reserved for colors and commands. Colors are compressed to a range no greater than 4096 by removing 4 bits of each color channel (RGB). Known Issues: - Conflict with unicode surrogates Changelog: 0.2 - Switch to XML parsing (handles more SVG files) - Allow input/output from stdin/stdout - Stores 2 path nodes per unicode character - Supports command-line options - Debug sent to stderr - Output to stdout """ import re import sys from bitfields import * from lxml import etree as et from math import sqrt # Default configuration. Can be set from command-line. config = { 'i': '-', # input file path or '-' (stdin) 'o': '-', # output file path or '-' (stdin) 'v': True # verbose } # Debug writer def debug(*msg): """Writes debugging to stderr in verbose mode""" if config['v']: sys.stderr.write(' '.join([str(m) for m in msg]) + '\r\n') # Unicode Character Sets (stored by value) UC_SAFE = frozenset(range(0,0xD800)) # no reserved characters UC_NONCHAR = frozenset([eval('0x'+str(x)+'FFFE') for x in range(1,11)] + range(0xFDD0,0xFDEF)) UC_SURROGATE = frozenset(range(0xD800,0xDFFF)) UC_ALL = frozenset(range(0,0x10FFFF)) UC_ALLOWED = UC_ALL - UC_NONCHAR - UC_SURROGATE UC_MAX = len(UC_ALLOWED) debug('Max Word Value:', UC_MAX) OUT_HEADER = """ """ OUT_PATH = """""" OUT_BKG = """""" OUT_FOOTER = """""" RE_WIDTH_HEIGHT = re.compile(r' 1 and arg[0] == '-': for key in arg[1:]: if config.has_key(key): if type(config[key]) == str: config_key = key # string argument else: config[key] = True # boolean arg else: raise "Unknown configuration option '%s'" % key else: config[config_key] = arg if config['i'] == '-': input_file = sys.stdin else: input_file = file(config['i']) if config['o'] == '-': output_file = sys.stdout else: output_file = file(config['o'],'w+') #encode to unicode if action == 'encode': svg_data = input_file.read() encoded_data = encode(svg_data) debug('=========== UNICODE ===============') debug(repr(encoded_data)) output_file.write(encoded_data.encode('utf-16')) #decode to svg if action == 'decode': raw_data = input_file.read() repr(raw_data) encoded_data = unicode( raw_data, "utf-16", 'ignore' ) new_svg_data = decode(encoded_data) debug('============= SVG =================') debug(new_svg_data) output_file.write(new_svg_data)