#!/usr/bin/python import string import math import bisect wordlist = map(lambda x: x[:-1], open('/usr/share/dict/web2').xreadlines()) marker = wordlist[0] wordlist = wordlist[1:] wordlist.sort() wlcnt = len(wordlist) ngrps = 6 bitspgrp = math.log(wlcnt) / math.log(2) bits = int(bitspgrp * ngrps) def bin2word(bin): ret = [] g = 0l nbits = 0 #print len(bin) #print len(bin) * 8 #print (len(bin) * 8) % bits #print bits - (len(bin) * 8) % bits #print (bits - (len(bin) * 8) % bits + 7) / 8 nz = ((bits - ((len(bin) * 8) % bits) + 7) / 8) bin += '\0' * nz for i in bin: g = g * 256 + ord(i) nbits = nbits + 8 #print hex(ord(i)), g, nbits if nbits >= bits: g, r = divmod(g, 2**(nbits - bits)) t = [] for j in range(ngrps): g, indx = divmod(g, wlcnt) t.append(wordlist[indx]) t.reverse() ret.extend(t) g = r nbits -= bits ret.append(wordlist[nz + nbits * ((bits + 7) / 8)]) return ' '.join(ret) def word2bin(words): words = words.split(' ') cnt = len(words) ret = [] g = 0 nbits = 0 for i in range(cnt / ngrps): r = g g = 0 for j in range(ngrps): g *= wlcnt g += bisect.bisect(wordlist, words[i * ngrps + j]) - 1 g = r * 2**bits + g nbits += bits #print nbits, g while nbits >= 8: r, g = divmod(g, 2**(nbits - 8)) ret.append(chr(r)) nbits -= 8 #print nbits, hex(r), g z = bisect.bisect(wordlist, words[-1]) - 1 eb, nz = divmod(z, (bits + 7) / 8) if nbits + eb != 8: raise ValueError, "input isn't valid." return ''.join(ret)[:-nz + 1]