#!/usr/bin/python import string import math import bisect #wordlist = map(lambda x: x[:-1], open('/usr/share/dict/web2').xreadlines()) wordlist = [ 'Carlin', 'shit', 'piss', 'fuck', 'cunt', 'cocksucker', 'motherfucker', 'tits' ] wordlist.sort() wlcnt = len(wordlist) ngrps = 8 bitspgrp = math.log(wlcnt) / math.log(2) bits = int(bitspgrp * ngrps) def bin2word(bin): ret = [] g = 0l nbits = 0 #print len(bin) #print len(bin) * 8 #print (len(bin) * 8) % bits #print bits - (len(bin) * 8) % bits #print (bits - (len(bin) * 8) % bits + 7) / 8 nz = ((bits - ((len(bin) * 8) % bits) + 7) / 8) bin += '\0' * nz for i in bin: g = g * 256 + ord(i) nbits = nbits + 8 #print hex(ord(i)), g, nbits if nbits >= bits: g, r = divmod(g, 2**(nbits - bits)) t = [] for j in range(ngrps): g, indx = divmod(g, wlcnt) t.append(wordlist[indx]) t.reverse() ret.extend(t) g = r nbits -= bits ret.append(wordlist[nz + nbits * (((bits + 7) / 8) + 1)]) return ' '.join(ret) def word2bin(words): words = words.split(' ') cnt = len(words) ret = [] g = 0 nbits = 0 for i in range((cnt - 1) / ngrps): r = g g = 0 for j in range(ngrps): g *= wlcnt g += bisect.bisect(wordlist, words[i * ngrps + j]) - 1 g = r * 2**bits + g nbits += bits #print nbits, g while nbits >= 8: r, g = divmod(g, 2**(nbits - 8)) ret.append(chr(r)) nbits -= 8 #print nbits, hex(r), g z = bisect.bisect(wordlist, words[-1]) - 1 eb, nz = divmod(z, (bits + 7) / 8 + 1) if nbits != eb: raise ValueError, ("input isn't valid", nbits, eb, nz) return ''.join(ret)[:-nz] if __name__ == '__main__': import sys inp = open('/dev/urandom') for l in xrange(100): i = inp.read(l) j = bin2word(i) a = 0 try: k = word2bin(j) a = 1 if k != i: raise ValueError except ValueError, x: print 'failed at length:', l print repr(i) print j if a: print repr(k) raise ValueError, x print 'testing completed...'