'=========================================================================== ' Subject: WORD COMPRESSIONS Date: Unknown Date (00:00) ' Author: Quinn Tyler Jackson Code: QB, QBasic, PDS ' Keys: WORD,COMPRESSIONS Packet: ALGOR.ABC '=========================================================================== DECLARE FUNCTION fqjSqueeze$ (InString$, Table$()) DECLARE FUNCTION fqjTableInit% () DECLARE FUNCTION funQompress$ (InText$) DEFINT A-Z OPTION BASE 1 ' Compression types CONST WordType = 255 'Word compression was achieved. CONST TrigramType = 254 'Trigram compression was acheived. CONST DigramType = 253 'Digram compression was achieved. CONST NoType = 254 'No reasonable compression was possible. 'The maximum amount that could be achieved 'follows the NoType byte. DIM SHARED WordTable$(128 TO 228) DIM SHARED TriTable$(128 TO 226) DIM SHARED DiTable$(128 TO 227) nul = fqjTableInit OPEN "TEST.TXT" FOR INPUT AS #1 DO LINE INPUT #1, Txt$ NewText$ = funQompress(Txt$) NewLen = LEN(NewText$) Ratio = (LEN(Txt$) - NewLen) Saved = Saved + Ratio SELECT CASE ASC(NewText$) CASE WordType PRINT "W: "; Ratio CASE TriType PRINT "T: "; Ratio CASE DiType PRINT "D: "; Ratio CASE NoType PRINT "N: "; Ratio CASE ELSE END SELECT LOOP UNTIL EOF(1) PRINT "Saved"; Saved; "bytes on a file of"; LOF(1); "bytes." CLOSE END EnglishWordData: ' Below are listed the hundred most frequently used words in English. ' Taken from fifteen English authors and many newspapers. Compiled by ' Frank R. Fraprie. Taken from Helen Gaines Fouche's _Cryptanalysis_, ' which is published by Dover Publications since 1956. DATA the,of,and,to,a,in,that,is,I,it DATA for,as,with,was,his,he,be,not,by,but DATA have,you,which,are,on,or,her,had,at,from DATA this,my,they,all,their,an,she,has,were,me DATA been,him,one,so,if,will,there,who,no,we DATA when,what,your,more,would,them,some,than,may,upon DATA its,out,into,our,these,man,up,do,like,after DATA shall,great,now,such,should,other,only,any,then,yet DATA about,those,can,made,well,old,must,us,said,time,two DATA time,even,new,could,very,much,own,most,might,first EnglishTrigramData: ' The ninety-eight most frequent English trigrams, combining a count of ' 20,000 trigrams by Fletcher Pratt, in "Secret and Urgent," supposed not ' to include include overlaps between words, and 5,000 by Frank R. Fraprie, ' including overlaps. Also taken from Fouche's _Crytanalysis_. DATA the,ing,and,ion,ent,for,tio,ere,her,ate DATA ver,ter,tha,ati,hat,ers,his,res,ill,are DATA con,nce,all,eve,ith,ted,ain,est,man,red DATA thi,ive,rea,wit,ons,ess,ave,per,ect,one DATA und,int,ant,hou,men,was,oun,pro,sta,ine DATA whi,ove,tin,ast,der,ous,rom,ven,ard,ear DATA din,sti,not,ort,tho,day,ore,but,out,ure DATA str,tic,ame,com,our,wer,ome,een,lar,les DATA san,set,any,art,nte,rat,rut,ica,ich,nde DATA pre,enc,has,whe,will,era,lin,tra EnglishDigramData: ' One-hundred digrams from the same book, also compiled by ' Pratt and Fraprie from 20,000 trigrams. DATA th,in,er,re,an,he,ar,en,ti,te DATA at,on,ha,ou,it,es,st,or,nt,hi DATA es,st,or,nt,hi,ea,ve,co,de,ra DATA ro,li,ri,io,le,nd,ma,se,al,ic DATA fo,il,ne,la,ta,el,me,ec,is,di DATA si,ca,un,ut,nc,wi,ho,tr,be,ce DATA wh,ll,fi,no,to,pe,as,wa,ur,lo DATA pa,us,mo,om,ai,pr,we,ac,ee,et DATA sa,ni,rt,na,ol,ev,ie,mi,ng,pl DATA iv,po,ch,ei,ad,ss,il,os,ul,em FUNCTION fqjSqueeze$ (InString$, Table$()) s$ = InString$ FOR ASCII = LBOUND(Table$) TO UBOUND(Table$) DO i = INSTR(s$, Table$(ASCII)) IF i THEN s$ = LEFT$(s$, i - 1) + CHR$(ASCII) + MID$(s$, i + LEN(Table$(ASCII))) END IF LOOP WHILE i NEXT fqjSqueeze$ = s$ END FUNCTION FUNCTION fqjTableInit ' This function initializes all compression tables and returns a value ' equal to how many thousandths of a second is required to intialize the ' tables. StartTime! = TIMER RESTORE EnglishWordData FOR i = LBOUND(WordTable$) TO UBOUND(WordTable$) READ WordTable$(i) WordTable$(i) = WordTable$(i) + " " NEXT i RESTORE EnglishTrigramData FOR i = LBOUND(TriTable$) TO UBOUND(TriTable$) READ TriTable$(i) NEXT i RESTORE EnglishDigramData FOR i = LBOUND(DiTable$) TO UBOUND(DiTable$) READ DiTable$(i) NEXT i fqjTableInit = (TIMER - StartTime!) * 1000 END FUNCTION FUNCTION funQompress$ (InText$) STATIC WCount, TCount, DiCount IF LTRIM$(RTRIM$(InText$)) = "" THEN funQompress$ = CHR$(NoType) EXIT FUNCTION END IF OrigLen = LEN(InText$) Word$ = fqjSqueeze(InText$, WordTable$()) Tri$ = fqjSqueeze(InText$, TriTable$()) Di$ = fqjSqueeze(InText$, DiTable$()) Word = LEN(Word$) Tri = LEN(Tri$) Di = LEN(Di$) IF Tri < Di AND Tri < Word THEN Txt$ = Tri$ METH$ = CHR$(TriType) TCount = TCount + 1 ELSEIF Di < Word THEN Txt$ = Di$ METH$ = CHR$(DiType) DiCount = DiCount + 1 ELSE Txt$ = Word$ METH$ = CHR$(WordType) WCount = WCount + 1 END IF SELECT CASE OrigLen > LEN(Txt$) CASE -1 Txt$ = METH$ + MKI$(OrigLen) + Txt$ CASE ELSE Txt$ = CHR$(NoType) + MKI$(OrigLen) + InText$ END SELECT funQompress$ = Txt$ END FUNCTION