#!python
"""
This program converts SVG files to raw Unicode strings and back again
It was written for a twitter image encoding competition on Stack Overflow
http://stackoverflow.com/questions/891643/twitter-image-encoding-challenge
Copyright: SpliFF
License: Public Domain
Requirements:
Python 2.5+
lxml library
bitfields module (http://www.warriorhut.org/graphics/svg_to_unicode/bitfields.py)
python built for wide unicode characters (UCS2)
Usage:
python2.5 svg_to_unicode-0.2.py [-v] encode -i in.svg -o unicode.txt
python2.5 svg_to_unicode-0.2.py [-v] decode -i unicode.txt -o out.svg
-v: verbose
-i: input file (default stdin)
-o: output file (default stdout)
action: encode / decode (default encode)
How it works:
When tracing bitmaps most libraries will give only 4 types of information we really need to send:
1.) width and height of document (possibly optional since this is vector graphics)
2.) fill color of each path
3.) commands on each path (basically handle types like corner, curve)
4.) point values (where to place the commands)
So this script first dives in with an XML parser and grabs (1). It then walks each path node in the XML and
grabs (2), (3) and (4). To better compact the data it then compresses the range of (4) from a float down to
a maximum integer value of 127 so it can pack two points into each unicode character. Finally the script
increases packed values by about 5000 to avoid a conflict with the values reserved for colors and commands.
Colors are compressed to a range no greater than 4096 by removing 4 bits of each color channel (RGB).
Known Issues:
- Conflict with unicode surrogates
Changelog:
0.2
- Switch to XML parsing (handles more SVG files)
- Allow input/output from stdin/stdout
- Stores 2 path nodes per unicode character
- Supports command-line options
- Debug sent to stderr
- Output to stdout
"""
import re
import sys
from bitfields import *
from lxml import etree as et
from math import sqrt
# Default configuration. Can be set from command-line.
config = {
'i': '-', # input file path or '-' (stdin)
'o': '-', # output file path or '-' (stdin)
'v': True # verbose
}
# Debug writer
def debug(*msg):
"""Writes debugging to stderr in verbose mode"""
if config['v']: sys.stderr.write(' '.join([str(m) for m in msg]) + '\r\n')
# Unicode Character Sets (stored by value)
UC_SAFE = frozenset(range(0,0xD800)) # no reserved characters
UC_NONCHAR = frozenset([eval('0x'+str(x)+'FFFE') for x in range(1,11)] + range(0xFDD0,0xFDEF))
UC_SURROGATE = frozenset(range(0xD800,0xDFFF))
UC_ALL = frozenset(range(0,0x10FFFF))
UC_ALLOWED = UC_ALL - UC_NONCHAR - UC_SURROGATE
UC_MAX = len(UC_ALLOWED)
debug('Max Word Value:', UC_MAX)
OUT_HEADER = """
"""
RE_WIDTH_HEIGHT = re.compile(r'