'=========================================================================== ' Subject: SORT ANY FILE Date: 10-19-99 (10:42) ' Author: Nigel Traves Code: QB, PDS ' Origin: 998053280@99.humber.ac.uk Packet: ALGOR.ABC '=========================================================================== REM SORTANY.BAS - PD - N. Traves 6/99 REM Use entirely at own risk REM REM compile to stand alone .exe REM load as 'QB SORTANY/AH/L REM REM written to overcome the 64k input filesize limit of DOS's SORT command REM NOTE unlike the DOS SORT command, this is not a filter REM REM invoke as either 'SORTANY InFile' or 'SORTANY InFile OutFile' REM '$INCLUDE: 'QB.BI' '$DYNAMIC REM These 2 TYPEs are used (indirectly) by ItExists% TYPE PathString MyPath AS STRING * 65 END TYPE TYPE DirEntry ReservedByDOS AS STRING * 21 FileAttribute AS STRING * 1 FileTime AS INTEGER FileDate AS INTEGER FileSize AS LONG FileName AS STRING * 13 END TYPE REM This TYPE is Used by HuthSort TYPE StackType Low AS INTEGER High AS INTEGER END TYPE DECLARE SUB SortMerge ( InFile1$, InFile2$, OutFile$ ) DECLARE FUNCTION RandFileName$ () DECLARE FUNCTION MakeNumStr$ ( MyValue& ) DECLARE FUNCTION ItExists% ( Name$ ) DECLARE FUNCTION FindFirstMatchingEntry% ( DirName$, Attribute%, MyEntry AS DirEntry ) DECLARE SUB DosInterruptX ( InReg AS RegTypeX, OutReg AS RegTypeX ) DECLARE FUNCTION GetName% ( Name$, PathName AS PathString ) DECLARE SUB HuthSort ( ArrayToSort$() ) DECLARE SUB Usage () CONST CarryFlag = 1 CONST TRUE% = -1 CONST FALSE% = 0 CONST NORMAL = 0 CONST NoMoreFiles = 18 CONST DOSService% = &H21 CONST GetDTA% = &H2F00 CONST SetDTA% = &H1A00 CONST FindFirst% = &H4E00 REDIM SortThese$(1) MyInput$ = LTRIM$(RTRIM$(COMMAND$)) '| get file(s) to sort IF MyInput$ = "" THEN '| no files! Usage '| that's the way to do it STOP '| Bye END IF Here% = INSTR(MyInput$, " ") '| have we got 1 or 2 filenames? IF Here% = 0 THEN '| 1 only InFile$ = MyInput$ '| so input and output filenames OutFile$ = MyInput$ '| are identical ELSE '| 2 filenames - go get 'em InFile$ = LTRIM$(RTRIM$(LEFT$(MyInput$, Here% - 1))) OutFile$ = LTRIM$(RTRIM$(MID$(MyInput$, Here% + 1))) END IF '| check for backup files IF RIGHT$(InFile$, 4) = ".BAK" OR RIGHT$(OutFile$, 4) = ".BAK" THEN PRINT '| can't use 'em PRINT "ERROR - Cannot safely sort backup ('.BAK') files" PRINT PRINT "Terminating Now!" STOP END IF IF NOT ItExists%(InFile$) THEN '| input file doesn't exist PRINT PRINT "ERROR - File "; InFile$; " does not exist in this directory!" PRINT PRINT "Terminating Now!" STOP END IF IF InFile$ <> OutFile$ THEN '| filenames not identical? IF ItExists%(OutFile$) THEN '| output file already exists! Error! PRINT PRINT "ERROR - File "; OutFile$; " already exists in this directory!" PRINT PRINT "Terminating Now!" STOP END IF ELSE '| they are identical so make input Here% = INSTR(InFile$, ".") '| file a backup IF Here% = 0 THEN '| no existing extender to filename NewName$ = InFile$ + ".BAK" ELSE '| change existing extender to .BAK NewName$ = LEFT$(InFile$, Here%) + "BAK" END IF NAME InFile$ AS NewName$ '| do the actual renaming InFile$ = NewName$ END IF OPEN InFile$ FOR INPUT SHARED AS 1 MySize& = LOF(1) '| get input file size IF MySize& <= 64000 THEN '| its small enough so CLOSE 1 DosString$ = "SORT < " + InFile$ + " > " + OutFile$ SHELL DosString$ '| use the DOS SORT command STOP '| and exit END IF LineCount& = 0 '| otherwise lets find how many DO '| lines of text we've actually got LineCount& = LineCount& + 1 LINE INPUT #1, A$ LOOP UNTIL EOF(1) CLOSE 1 IF LineCount& <= 750 THEN '| We've got very long lines AverageLineLength& = MySize& \ LineCount& LinesToUse& = 30000 \ AverageLineLength& IF LinesToUse& < 5 THEN '| they are probably too long for QB PRINT '| so bomb out PRINT PRINT "ERROR - Lines are too long for QB to handle! PRINT PRINT "EXITING NOW!" STOP END IF TempFiles& = LineCount& \ LinesToUse& '| get number of temporary files REDIM SortThese$(1 TO LinesToUse&) '| to use and re-size array to suit ELSE Power& = 1 '| calc number of temp files to use LinesToUse& = LineCount& DO WHILE LinesToUse& > 750 TempFiles& = CLNG(2^Power&) - 1 Power& = Power& + 1 LinesToUse& = LineCount& \ TempFiles& LOOP REDIM SortThese$(1 TO LinesToUse&) '| and re-size array to suit END IF Remainder& = LineCount& MOD LinesToUse& '| any lines left? MyHalf$ = RandFileName$ '| get guaranteed unused filename component OPEN InFile$ FOR INPUT SHARED AS 1 '| start sorting FOR FileCount& = 1 TO TempFiles& '| for each temporary file FOR Index& = 1 TO LinesToUse& '| get chunk of data LINE INPUT #1, SortThese$(Index&) NEXT Index& HuthSort SortThese$() '| sort it and build temp filename ThisFile$ = MyHalf$ + MakeNumStr$(Filecount&) + "." + MyHalf$ OPEN ThisFile$ FOR OUTPUT AS 2 '| open temp file FOR Index& = 1 TO LinesToUse& '| write sorted (non-blank) lines to it IF SortThese$(Index&) <> "" THEN ValidData% = ((LEN(SortThese$(Index&)) > 1) OR (ASC(SortThese$(Index&)) > 31)) IF ValidData% THEN PRINT #2, SortThese$(Index&) END IF END IF NEXT Index& CLOSE 2 '| close temp file and do next one NEXT FileCount& IF Remainder& > 0 THEN '| so we've got a few lines left REDIM SortThese$(1 TO Remainder&) '| re-size array to suit FOR Index& = 1 TO Remainder& '| get remaining data LINE INPUT #1, SortThese$(Index&) NEXT Index& HuthSort SortThese$() '| sort it ThisFile$ = MyHalf$ + MakeNumStr$(TempFiles& + 1) + "." + MyHalf$ OPEN ThisFile$ FOR OUTPUT AS 2 FOR Index& = 1 TO Remainder& '| save it in temp file IF SortThese$(Index&) <> "" THEN ValidData% = ((LEN(SortThese$(Index&)) > 1) OR (ASC(SortThese$(Index&)) > 31)) IF ValidData% THEN PRINT #2, SortThese$(Index&) END IF END IF NEXT Index& CLOSE 2 StartValue% = 2 '| and set values prior to final sort File2$ = ThisFile$ ELSE '| no remaining lines StartValue% = 3 '| so just set values File2$ = MyHalf$ + "00002." + MyHalf$ END IF CLOSE 1 '| finally close input file File1$ = MyHalf$ + "00001." + MyHalf$ '| lets start merging those File3$ = MyHalf$ + "TEMPA." + MyHalf$ '| temp files together SortMerge File1$, File2$, File3$ KILL File1$ '| and delete temp files as we go KILL File2$ File1$ = File3$ FOR Index& = StartValue% TO TempFiles& '| now merge the rest File2$ = MyHalf$ + MakeNumStr$(Index&) + "." + MyHalf$ IF StartValue% = 2 THEN '| we've got an extra file IF (Index& AND 1) = 0 THEN File3$ = MyHalf$ + "TEMPB." + MyHalf$ ELSE File3$ = MyHalf$ + "TEMPA." + MyHalf$ END IF ELSE '| just TempFiles& temp files IF (Index& AND 1) = 0 THEN File3$ = MyHalf$ + "TEMPA." + MyHalf$ ELSE File3$ = MyHalf$ + "TEMPB." + MyHalf$ END IF END IF SortMerge File1$, File2$, File3$ '| merge and kill current temp files KILL File1$ KILL File2$ File1$ = File3$ NEXT Index& NAME File3$ AS OutFile$ '| finally rename remaining temp file END '| as output file SUB SortMerge ( InFile1$, InFile2$, OutFile$ ) REM Take 2 sorted input files and merge them in such a way as to produce REM a sorted output file. File size is irrelevant. FirstFile% = FREEFILE '| open 2 input and one output file OPEN InFile1$ FOR INPUT SHARED AS FirstFile% SecondFile% = FREEFILE OPEN InFile2$ FOR INPUT SHARED AS SecondFile% ThirdFile% = FREEFILE OPEN OutFile$ FOR OUTPUT AS ThirdFile% DO '| get 1st non-blank line from file 1 LINE INPUT #FirstFile%, A$ FirstDone% = EOF(FirstFile%) LOOP WHILE A$ = "" AND NOT FirstDone% IF FirstDone% THEN '| file 1 is empty! PRINT "ERROR - NO VALID DATA IN "; InFile1$ PRINT "TERMINATING NOW!" STOP END IF DO '| get 1st non-blank line from file 2 LINE INPUT #SecondFile%, B$ SecondDone% = EOF(SecondFile%) LOOP WHILE B$ = "" AND NOT SecondDone% IF SecondDone% THEN '| file 2 is empty! PRINT "ERROR - NO VALID DATA IN "; InFile2$ PRINT "TERMINATING NOW!" STOP END IF DO '| repeat until EOF(1) or EOF(2) IF UCASE$(A$) <= UCASE$(B$) THEN '| get lesser string PRINT #ThirdFile%, A$ '| from file 1 & output it DO '| get next valid line from 1st file LINE INPUT #FirstFile%, A$ FirstDone% = EOF(FirstFile%) LOOP WHILE A$ = "" AND NOT FirstDone% ELSE PRINT #ThirdFile%, B$ '| from file 2 and output DO '| get next valid line from 2nd file LINE INPUT #SecondFile%, B$ SecondDone% = EOF(SecondFile%) LOOP WHILE B$ = "" AND NOT SecondDone% END IF LOOP UNTIL FirstDone% OR SecondDone% IF FirstDone% AND SecondDone% THEN '| this would be a miraculous occurrence CLOSE FirstFile%, SecondFile%, ThirdFile% STOP ELSEIF FirstDone% THEN '| file 1 is now empty so keep going DO '| until we've output its last line IF UCASE$(A$) <= UCASE$(B$) THEN PRINT #ThirdFile%, A$ EXIT DO ELSE PRINT #ThirdFile%, B$ DO LINE INPUT #SecondFile%, B$ SecondDone% = EOF(SecondFile%) LOOP WHILE B$ = "" AND NOT SecondDone% END IF LOOP UNTIL SecondDone% IF SecondDone% THEN '| we've still got 1 line from each '| file to output so decide the IF UCASE$(A$) <= UCASE$(B$) THEN '| order to output them PRINT #ThirdFile%, A$ PRINT #ThirdFile%, B$ ELSE PRINT #ThirdFile%, B$ PRINT #ThirdFile%, A$ END IF ELSE '| no lines left in file 1 but an '| arbitrary number of lines left '| in file 2 so first we output PRINT #ThirdFile%, B$ '| line already read from file 2 DO '| and then append remaining lines DO LINE INPUT #SecondFile%, B$ SecondDone% = EOF(SecondFile%) LOOP WHILE B$ = "" AND NOT SecondDone% IF B$ <> "" THEN PRINT #ThirdFile%, B$ END IF LOOP UNTIL SecondDone% END IF ELSE '| file 2 is now empty so keep going DO '| until we've output its last line IF UCASE$(A$) <= UCASE$(B$) THEN PRINT #ThirdFile%, A$ DO LINE INPUT #FirstFile%, A$ FirstDone% = EOF(FirstFile%) LOOP WHILE A$ = "" AND NOT FirstDone% ELSE PRINT #ThirdFile%, B$ EXIT DO END IF LOOP UNTIL FirstDone% IF FirstDone% THEN '| both files empty - 1 line from each IF UCASE$(A$) <= UCASE$(B$) THEN '| select order to do it in PRINT #ThirdFile%, A$ PRINT #ThirdFile%, B$ ELSE PRINT #ThirdFile%, B$ PRINT #ThirdFile%, A$ END IF ELSE '| no lines left in file 2 but an PRINT #ThirdFile%, A$ '| arbitrary number in file 1 so DO '| output last line read from file 1 DO '| and then append remaining lines LINE INPUT #FirstFile%, A$ FirstDone% = EOF(FirstFile%) LOOP WHILE A$ = "" AND NOT FirstDone% IF A$ <> "" THEN PRINT #ThirdFile%, A$ END IF LOOP UNTIL FirstDone% END IF END IF CLOSE FirstFile%, SecondFile%, ThirdFile% '| we're done so close all files END SUB FUNCTION RandFileName$ REM Produce a filename fragment that can be used to create from 1 to REM (2^20) - 1 unique filenames that do not already exist RANDOMIZE -TIMER DO MyName$ = "" FOR Index% = 1 TO 3 MyName$ = MyName$ + CHR$((RND(1) * 26) + 65) NEXT Index% TestName$ = MyName$ + "*." + MyName$ LOOP WHILE ItExists%(TestName$) RandFileName$ = MyName$ END FUNCTION FUNCTION MakeNumStr$ ( MyValue& ) REM Returns a hex string representation of MyValue& padded with leading REM zeros where necessary MyNum$ = LTRIM$(RTRIM$(HEX$(MyValue&))) MyLen% = LEN(MyNum$) IF MyLen% < 5 THEN MakeNumStr$ = STRING$(5 - MyLen%, "0") + MyNum$ ELSE MakeNumStr$ = MyNum$ END IF END FUNCTION FUNCTION ItExists% ( Name$ ) REM check whether a file with Name$ exists without disturbing the REM current Disk Transfer Area IF Name$ = "" THEN ItExists% = 0 ELSE DIM MyEntry AS DirEntry DIM InReg AS RegTypeX, OutReg AS RegTypeX InReg.AX = GetDTA% DosInterruptX InReg, OutReg Result% = FindFirstMatchingEntry%(Name$, NORMAL, MyEntry) InReg.AX = SetDTA% InReg.DS = OutReg.ES InReg.DX = OutReg.BX DosInterruptX InReg, OutReg ItExists% = Result% END IF END FUNCTION FUNCTION FindFirstMatchingEntry% ( DirName$, Attribute%, MyEntry AS DirEntry ) REM used by ItExists% DIM ThisPath AS PathString DIM InReg AS RegTypeX, OutReg AS RegTypeX IF GetName%(DirName$,ThisPath) THEN InReg.AX = SetDTA% InReg.DS = VARSEG(MyEntry) InReg.DX = VARPTR(MyEntry) DosInterruptX InReg, OutReg InReg.AX = FindFirst% InReg.DS = VARSEG(ThisPath) InReg.DX = VARPTR(ThisPath) InReg.CX = Attribute% DosInterruptX InReg, OutReg IF ((CarryFlag AND OutReg.Flags <> 0) AND (OutReg.AX = NoMoreFiles)) THEN FindFirstMatchingEntry% = FALSE% ELSE FindFirstMatchingEntry% = TRUE% END IF ELSE FindFirstMatchingEntry% = FALSE% END IF END FUNCTION SUB DosInterruptX ( InReg AS RegTypeX, OutReg AS RegTypeX ) REM used indirectly by ItExists% InterruptX DosService%, InReg, OutReg END SUB FUNCTION GetName% ( Name$, PathName AS PathString ) REM used indirectly by ItExists% Name$ = LTRIM$(RTRIM$(Name$)) Length% = LEN(Name$) IF Length% > 64 THEN GetName% = FALSE% ELSE FOR X = 1 TO Length% MID$(PathName.MyPath,X,1) = MID$(Name$,X,1) NEXT X FOR X = (Length% + 1) TO 65 MID$(PathName.MyPath,X,1) = CHR$(0) NEXT X GetName% = TRUE% END IF END FUNCTION SUB HuthSort ( ArrayToSort$() ) ' HUTHSORT.BAS written by Cornel Huth ' Iterative QuickSort Routine ' ' Tweaked by Quinn Tyler Jackson ' ' Cosmetically altered + case insensitised by N. Traves 6/99 DIM MyStack(1 TO 128) AS StackType StackIndex% = 1 MyStack(StackIndex%).Low = LBOUND(ArrayToSort$) MyStack(StackIndex%).High = UBOUND(ArrayToSort$) StackIndex% = StackIndex% + 1 DO StackIndex% = StackIndex% - 1 Lowest% = MyStack(StackIndex%).Low Highest% = MyStack(StackIndex%).High DO Index1% = Lowest% Index2% = Highest% MidPoint% = (Lowest% + Highest%) \ 2 MidPointString$ = UCASE$(ArrayToSort$(MidPoint%)) DO DO WHILE UCASE$(ArrayToSort$(Index1%)) < MidPointString$ Index1% = Index1% + 1 LOOP DO WHILE UCASE$(ArrayToSort$(Index2%)) > MidPointString$ Index2% = Index2% - 1 LOOP IF Index1% <= Index2% THEN SWAP ArrayToSort$(Index1%), ArrayToSort$(Index2%) Index1% = Index1% + 1 Index2% = Index2% - 1 END IF LOOP WHILE Index1% <= Index2% IF (Index2% - Lowest%) < (Highest% - Index1%) THEN IF Index1% < Highest% THEN MyStack(StackIndex%).Low = Index1% MyStack(StackIndex%).High = Highest% StackIndex% = StackIndex% + 1 END IF Highest% = Index2% ELSE IF Lowest% < Index2% THEN MyStack(StackIndex%).Low = Lowest% MyStack(StackIndex%).High = Index2% StackIndex% = StackIndex% + 1 END IF Lowest% = Index1% END IF LOOP WHILE Lowest% < Highest% LOOP WHILE StackIndex% <> 1 END SUB SUB Usage PRINT PRINT PRINT "USAGE - SortAny Infile [OutFile]" PRINT END SUB