'===========================================================================
' Subject: REMOVE DUPLICATES IN *.ABC        Date: 01-23-99 (14:20)       
' Author:  Michael Marquart                  Code: QB, QBasic, PDS        
' Origin:  micm@melbpc.org.au              Packet: ABC.ABC
'===========================================================================
' Here is a program that will remove all duplicate entries from all the *.abc
' files, re-writing the *.abc files - as a consequence it sorts the entries.
'
' It is quick and dirty, uses the MSDOS SORT.EXE which it expects to find
' on the path, and I lied about it being quick - it's dead slow. :P
'
' With SmartDrive loaded, using a Cyrix P200+ on 27.5 Megabytes of files
' it took 25 minutes.
'
' I used the following file as a guide and as a template, so thank you to
' The ABC Programmer for the impetus to go this far.
'
'===========================================================================
' Subject: ABC FILE STRUCTURES               Date: 02-25-96 (10:42)
'  Author: The ABC Programmer                Code: QB, QBasic, PDS
'  Origin: The ABC Reader v1.00            Packet: ABC.ABC
'===========================================================================
'
' Why did I write this you ask?  Many moons ago I installed an ABC packet
' twice, and that's how I got the duplicate entries...
'
' To process only one (or more) files, copy the .ABC/.IDX pair into a
' directory with this program. (EG: GRAPHICS.ABC and GRAPHICS.IDX)
'
'
' All care taken, but I am not to be held responsible for any damage, ill-will
' or loss caused by using or misusing this program.
'
' Tested with MSdos V6.22 and this program is written to run under Qbasic v1.1
'

TYPE ABCFileStructure         ' Structures for *.ABC
  From AS STRING * 31         ' Programmer/Author's name
  Subject AS STRING * 31      ' Subject/Title of code
  Origin AS STRING * 31       ' Origin of code, or keywords
  DateOf AS STRING * 23       ' Date of code release
  CodeOf AS STRING * 23       ' Code of (QB,QBasic,PDS,ASIC,PB,VB,ASM,TEXT)
  NumLines AS STRING * 5      ' Number of lines in code
  NumBytes AS STRING * 9      ' Number of bytes when extracted
  SaveFile AS STRING * 12     ' Default extraction name
END TYPE

' NOTE:  (Number of bytes) - (Number of lines in code) = (Bytes in Packet)
'
'   Lines are deliminated by CHR$(227) therefore reducing [CR,LF] to just one
'   byte.  Note that the stucture is NOT similar to the .QWK packets.
'   Please do not use their structure or method of reading the ABC packets.
'
'   For best results, read blocks in 250 bytes.  4096 bytes would be best
'   if you decide to transfer them directly into the memory buffer.
'
' NOTE:  DO NOT read them into a string array, unless you know how to
'   free up LOTS of memory (XMS/EMS) just use file pointers (LONG).

DIM ABCIndex AS STRING * 78

' For the .IDX files, here's the structure:
'
' First character is NULL or a "û" for NEW or a "¯" for SPECIAL (Your code)
'
' Next 10 bytes are the position for the code in the .ABC counterpart.
' Next  5 bytes are the number of lines in the file
' Next 31 bytes are the Subject/Title of the code
' Last 31 bytes are the Author/Programmer's Name
'
' TYPE ABCIndexStructure
'   Code AS STRING * 1
'   Position AS STRING * 10
'   NumLines AS STRING * 5
'   Subject AS STRING * 31
'   From AS STRING * 31
' END TYPE

' Here's a short demostration on reading the INDEX and ABC Packet
' Make sure the following packets are in your current directory
' or change the path to compensate.

'============================== Mostly My bits =============================
Start$ = TIME$

CLS

' get a list of .IDX files and assume there is a corresponding .ABC file ===

SHELL "dir *.idx /b /-p /o:n" + CHR$(62) + "!_~_!.tmp"

OPEN "!_~_!.tmp" FOR INPUT AS 255: count = 0

DO WHILE NOT EOF(255)
LINE INPUT #255, Name$: count = count + 1 'Count is the total number of files
LOOP
CLOSE

' Open list of .IDX files ==================================================

OPEN "!_~_!.tmp" FOR INPUT AS 255: current = 0

DO WHILE NOT EOF(255)
LINE INPUT #255, Name$
current = current + 1        ' Current keeps track of which file this is up to
' Define temporary and final filenames
Name$ = LEFT$(Name$, INSTR(Name$, ".") - 1)

ABCFile$ = Name$ + ".ABC"    ' Current .ABC file
ABCFil1$ = Name$ + ".AB1"    ' New .ABC file
Indexfile$ = Name$ + ".IDX"  ' Current .IDX file
Indexfil1$ = Name$ + ".ID1"  ' New .IDX file, which at first is a Text
                             ' version of current .IDX file
IndexFil2$ = Name$ + ".ID2"  ' Temporary sorted text version of .IDX file

' Create a sortable text .IDX file =========================================

OPEN Indexfile$ FOR BINARY AS #1    ' Open the .IDX file
OPEN Indexfil1$ FOR OUTPUT AS #2    ' Open the .ID1 file

 DO WHILE NOT EOF(1)
  GET #1, , ABCIndex      ' Get record
  PRINT #2, ABCIndex      ' Save to file as text
 LOOP
CLOSE #1, #2

' Sort the file using MSDOS SORT ===========================================

SHELL " sort /+17" + CHR$(60) + Indexfil1$ + CHR$(62) + IndexFil2$

' Main program =============================================================

OPEN ABCFile$ FOR BINARY AS #1               ' Open the .ABC file
OPEN ABCFil1$ FOR BINARY AS #2: Pos1& = 1    ' Open the .AB1 file
OPEN IndexFil2$ FOR INPUT AS #3              ' Open the .ID2 file
OPEN Indexfil1$ FOR OUTPUT AS #4             ' Reuse the .ID1 file

ABCIndexold$ = ""               ' Use to compare, to remove duplicates

' Read entry and write to both new .ABC and .IDX files =====================

DO WHILE NOT EOF(3)
  LINE INPUT #3, ABCIndex$      ' Get record

   ' Kludge to eliminate a blank record that ends up in the text .IDX file
   ' and to test for duplicate entries.
IF LTRIM$(ABCIndex$) = "" OR MID$(ABCIndex$, 12) = MID$(ABCIndexold$, 12) THEN
    GOTO skip
   END IF

  ' Display the current entry and file
  LOCATE 1, 1: PRINT MID$(ABCIndex$, 17); " Lines = "; MID$(ABCIndex$, 12, 5)
  LOCATE 3, 1: PRINT "Current File is"; current; "of"; count; "- ";
  PRINT Name$; ".ABC        "

  ' Save the last entry for the duplicate compare
  ABCIndexold$ = ABCIndex$

Position& = VAL(MID$(ABCIndex$, 2, 10))     ' Get our .ABC byte position
                             ' ^Since the first byte is the Code chr
                             '  We must skip to the second byte and read
                             '  10 characters from the string ABCIndex

ABCHeader$ = SPACE$(165)     ' Total number of bytes in header is 165
                             ' Don't normally use this, only for DEMO
                             ' purposes.

Lines = VAL(MID$(ABCIndex$, 12, 5))  ' Get # of Lines (also # of CHR$(227))

  ' Write new index file
  Pos1$ = LTRIM$(STR$(Pos1&))
  PRINT #4, LEFT$(ABCIndex$, 1) + Pos1$ + SPACE$(10 - LEN(Pos1$));
  PRINT #4, MID$(ABCIndex$, 12);


  GET #1, Position&, ABCHeader$     ' Goto Position& and read contents
  PUT #2, Pos1&, ABCHeader$         ' Write new .ABC file
  Pos1& = Pos1& + 165               ' Update pointer to the end of the file
  Position& = Position& + 165       ' Set pointer to the start of the code

 copy$ = " "            ' Single byte copying, you have to be kidding!
  FOR z = 1 TO Lines    ' Crappy routine to copy code from original .ABC file
    DO
     GET #1, Position&, copy$       ' Goto Position& and read code
     PUT #2, Pos1&, copy$           ' Write code to the new file
     Pos1& = Pos1& + 1              ' Increment pointer to the end of file
     Position& = Position& + 1      ' Increment pointer to the next byte
     IF copy$ = CHR$(227) THEN EXIT DO ' Check for the end of line and do NEXT
    LOOP
  NEXT

skip:           ' Label to jump to when a duplicate is found
  LOOP          ' Loop to next entry in the .ABC file
CLOSE #1, #2, #3, #4

' Kill originals, and Rename new files
KILL Name$ + ".ABC"
NAME Name$ + ".AB1" AS Name$ + ".ABC"
KILL Name$ + ".IDX"
NAME Name$ + ".ID1" AS Name$ + ".IDX"
KILL Name$ + ".ID2"


LOOP            ' Loop to the next .ABC/.IDX pair

CLOSE : KILL "!_~_!.tmp"
PRINT : PRINT "Commenced at: "; Start$: PRINT " Finished at: "; TIME$

'Audible alert that process is finished
FOR c% = 1 TO 5: FOR i% = 550 TO 660 STEP 5: SOUND i%, i% / 4000: NEXT: NEXT

SYSTEM