;-------------------------------------------------------------------------------
; ARK -- Executable COM file archive
;
; (c) Copyright 1998 by K. Heidenstrom.
;
; This program is free software.  You may redistribute it and/or
; modify it under the terms of the GNU General Public License as
; published by the Free Software Foundation; either version 2 of
; the License, or (at your option) any later version.
;
; This program is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	In no
; event will the author be liable for any damages of any kind
; related to the use of this program.  See the GNU General Public
; License for more details.
;
; You should have received a copy of the GNU General Public License
; along with this program; if not, write to the Free Software
; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
;
; If you find any bugs or make significant improvements, please
; consider sending the details to the author so that other users
; may benefit.	Contact details are:
;
; Email:  kheidens@clear.net.nz
; Web:	  http://home.clear.net.nz/pages/kheidens
; Snail:  K. Heidenstrom c/- P.O. Box 27-103, Wellington, New Zealand
;
;-------------------------------------------------------------------------------
;
; Modified:
;
; KH.19980803.001  0.0.0  Started
; KH.19980809.002  More work; converted to COM structure internally
; KH.19980810.003  First usable version
; KH.19980813.004  Miscellaneous optimisations, improved error detection
; KH.19980814.005  Finalised usage syntax
; KH.19980815.006  1.0.0  Various optimisations
; KH.19980817.007  Tidied up one comment!
; KH.19980823.008  Release 1.0.1 - no functional changes

Ver		EQU	1
SubVer		EQU	0
ModVer		EQU	1
VerDate		EQU	"19980823"		; YYYYMMDD format

;-------------------------------------------------------------------------------
;
; This program must be constructed in several stages.  It requires Borland's
; Turbo Assembler (TASM) and TLINK.  I used TASM 3.1 and TLINK 4.0.  Create a
; batch file containing the following lines:

comment * --------------------	snip  snip  snip

@echo off
tasm %1 /ml /t /w2 ark;
if errorlevel 1 goto end
tlink /t ark, ark.bin, nul
if errorlevel 1 goto end
rem binary image 7F00h bytes
rem header 20h bytes
rem final size 7F20h
echo rcx >arkhdr.scr
echo 20 >>arkhdr.scr
echo e100 4D 5A 20 01 40 00 01 00  02 00 80 00 FF FF F0 07 >>arkhdr.scr
echo e110 00 08 00 00 00 00 00 00  1C 00 00 00 03 00 00 00 >>arkhdr.scr
echo n arkhdr.bin >>arkhdr.scr
echo w >>arkhdr.scr
echo q >>arkhdr.scr
debug <arkhdr.scr >NUL
copy /b arkhdr.bin + ark.bin ark.exe >NUL
md $ark
cd $ark
..\ark -r
cd ..
rd $ark
echo ARK make finished
:end
if exist arkhdr.scr del arkhdr.scr
if exist arkhdr.bin del arkhdr.bin
if exist ark.obj del ark.obj
if exist ark.bin del ark.bin

* ----------------------------	snip  snip  snip

;-------------------------------------------------------------------------------

		PAGE	,132
		IDEAL
		%TITLE	"ARK -- Executable COM file archive"
		NAME	ARK

; Documentation ----------------------------------------------------------------
;
; This program manages an archive of executable programs.  The archive is stored
; in the EXE file, which contains a code stub (created from this code) and the
; archive of COM-type programs.  The code stub allows any archived program to
; be executed directly (without extracting it to disk), and provides four simple
; archive maintenance functions:
;
;	Execute a program,
;	List the archive,
;	Extract one or more programs from archive to disk,
;	Rebuild the archive from all COM files in the current directory.
;
; When loaded and running in memory, this program is structured like a COM file
; (single segment, load image starts immediately above the PSP at offset 100h)
; except that the stack is in a separate segment, 2K in size, at the top of
; available memory.  The stack segment is also used to hold some variables and
; the mover code which is used to move code down in memory when an archived
; program is to be executed.  The program archive is not allowed to grow so
; large that it meets the stack segment.
;
; This program is able to modify and create its own image on disk.  Since it is
; an EXE program, this image includes the EXE header.  Some fields in the EXE
; header must be set according to the size of the program, which varies when
; the archive changes.	These fields are updated when the archive is to be
; written to disk.  A dummy EXE header is created by the program, and written
; to disk before the load image is written.
;
; The EXE header is always 32 bytes in size.  It is formatted as follows.
;
; Offset  Description
;    00h  Signature (always "MZ")
;    02h  Last page size (see notes)
;    04h  File pages (see notes)
;    06h  Relocation table entry count (always 1)
;    08h  Header paragraphs (always 2)
;    0Ah  Minimum memory (paragraphs) required above load image (always 80h)
;    0Ch  Maximum memory (paragraphs) to allocate (always 0FFFFh)
;    0Eh  Pre-relocation SS register contents (see notes)
;    10h  Initial SP register value (always 800h for 2K stack size)
;    12h  Negative checksum (always 0)
;    14h  Initial IP register value (always 0)
;    16h  Pre-relocation CS register contents (always 0)
;    18h  Relocation table offset into header (always 1Ch)
;    1Ah  Overlay number (always 0)
;    1Ch  Relocation entry offset (always 3, offset of RelocItem in load image)
;    1Eh  Relocation entry segment (always 0)
;    20h  End of header; start of code
;
; The items which are marked "see notes" have values which vary depending on
; the size of the archive, and are recreated dynamically by this program when
; it needs to update its image on disk (after the archive has been changed).
;
; The Last page size parameter (offset 2) contains the actual number of bytes
; in the final 512-byte page of the file.  DOS combines it with the File pages
; parameter (offset 4) to calculate the amount of data to load.
;
; The File pages parameter (offset 4) contains the total number of 512-byte
; 'pages' required to hold the file, including any final partial page (if the
; Last page size parameter at offset 2 is non-zero).
;
; These two values include the header size.  For example, if the load module
; (which includes the code stub and the archived programs) is 12340h (74,560)
; bytes in size, and the header is 20h bytes, the total file size is 12360h
; (74,592) bytes, which is 145 512-byte pages plus an overflow of 352 bytes.
; This would give values of 146 at header offset 4 (145 full pages plus one
; partial page) and 352 at header offset 2.
;
; The pre-relocation SS register value (offset 0Eh) is equal to the number of
; paragraphs in the load image.  This means that the initial stack segment is
; located immediately above the load image.  The code then moves the stack up
; to the top of memory.
;
; The fixup (relocation) table entry is required for compatibility with some
; versions of DOS which implemented a workaround for the old EXEPACK bug.
; There was a bug in the EXEPACK unpacker which caused it to fail and report
; the message "!Packed file corrupt" if the program is loaded below the 64K
; memory point.  EXEPACK programs can't easily be identified, but they have
; no relocation table entries, which is unusual for an EXE file, so some
; versions of DOS specifically check the number of relocation table entries
; when loading a program, and force it to load at (or above) 64K if there are
; no relocation table entries.	This is a nuisance for programs like this,
; which don't need any relocation table entries.  There is a dummy relocation
; table entry in the EXE header, to avoid this problem.
;
; The programs in the archive are stored in the load image, immediately above
; the code, one after the other.  Each program image is preceded by a 16-byte
; (one-paragraph) header, which is formatted as follows:
;
; Offset   Size      Meaning
;      0   8 bytes   Name, upper case, excl. '.COM', space-padded
;      8   1 word    File time (in directory/DTA format)
;     10   1 word    File date (in directory/DTA format)
;     12   1 word    File size (in bytes, range is 1 to 65278)
;     14   1 word    Image size (paragraphs, range 2 to 4081, includes header)
;     16   n bytes   File contents
;
; The archive entry contains the 16-byte archive entry header followed by the
; contents of the file, padded up to the next multiple of 16 bytes.  In other
; words, each entry header and each data block starts at a segment-paragraph.
;
; The end of the archive is marked by a dummy header which has the first two
; bytes set to zero.
;
; At present I know of no problems in the execution code.  There are a few
; differences between execution via ARK and direct execution:
;
;   1.	The path to the executable is 'NUL' under ARK; with direct execution
;	it is the true path of the executable (e.g. 'C:\UTIL\XTPRO.COM').
;	Under ARK, the executable that is running does not even exist on disk,
;	so it is impossible to give a meaningful path for it.  Some programs
;	store default settings and other parameters in the executable file, and
;	these programs will not be able to update their settings when run under
;	ARK.  To work around this, extract the archive to a temporary directory,
;	run the program directly and allow it to update the settings in its
;	image on disk, then rebuild the archive, so that the updated executable
;	is placed into the ARK archive.  Also, programs which get their own name
;	from this string, to display in usage syntax messages etc, will show
;	their name as 'NUL'.
;
;   2.	The FCBs are pre-initialised with different contents under ARK compared
;	to when the program is executed directly, and the AX register on entry
;	to the program is always zero under ARK (under DOS, it is 1 if the
;	command tail contained wildcards, or something like that).  These
;	differences shouldn't be a practical problem since I doubt that any
;	programs written in the last ten years make use of these 'features'.
;
; Program requirements
;
; CPU:	   8086 or later
; Memory:  As much system memory as possible (archive size is limited by
;	   amount of available memory)
; DOS:	   3.0 or later
;
; The segment register usage changes around quite a lot.  To find all major
; segment register usage changes, search for the string '; CS'.
;
; This program is fairly carefully optimised for size.	Please be careful when
; modifying it!
;
; ------------------------------------------------------------------------------

; Equates

AppName		EQU	"ARK"

PAD_CMD_TAIL	=	0		; 1 = Zero remaining command tail
					;     Enabling this wastes 9 bytes

SHOW_PROGRESS	=	1		; 1 = Show progress via dots and stars
					;     Enabling this uses 22 bytes

ExeHdrParas	=	2		; !! If you change this, changes are
					;    required in the debug script at
					;    the start of this file.

StackSize	=	2048		; !! If you change this, changes are
					;    required in the debug script at
					;    the start of this file.

MaxCOMSize	=	65274		; Maximum size of a COM file
					; is 65536 - 256 - 6 (the six comes
					; from two bytes for the zero 'return
					; address' word, plus two words that
					; the mover code uses).

WriteParas	=	0FF0h		; Must be 0FF0h or less

; Offsets into PSP

PSP_Envir	=	WORD PSP:2Ch	; Paragraph of environment block
PSP_DTA		=	PSP:80h		; Default DTA starts at PSP:80h
PSP_CmdLen	=	BYTE PSP:80h	; Length of command tail
PSP_CmdTail	=	BYTE PSP:81h	; Start of commnad tail contents
PSP_End		=	PSP:100h	; End of PSP; start of load image
COM_Image	=	100h		; Start of load image

; Offsets into memory allocation arena header

ARNA_Type	=	0		; Byte - 'M' (not last) or 'Z' (last)
ARNA_Para	=	1		; Word - paragraph of controlled block
ARNA_Size	=	3		; Word - number of paragraphs in block
ARNA_ID		=	8		; Eight-character owner identifier
ARNA_End	=	16		; Past end of arena header

; Offsets into DTA

DTA_ATTRIB	=	21		; Attribute of file found
DTA_TIME	=	22		; Time stamp word
DTA_DATE	=	24		; Date stamp word
DTA_FSIZEL	=	26		; File size loword
DTA_FSIZEH	=	28		; File size hiword
DTA_FNAME	=	30		; Filename, ASCIIZ, up to 13 characters

; Offsets into archive entry

ARK_FNAME	=	0		; Name - eight chars, space-padded
ARK_FTIME	=	8		; File time
ARK_FDATE	=	10		; File date
ARK_FSIZE	=	12		; File size in bytes
ARK_PARAS	=	14		; Image size in paragraphs
ARK_CONT	=	16		; Start of contents
ARK_LENGTH	=	16		; Length of ARK header

;-------------------------------------------------------------------------------

; The following block of variables is stored in the bottom of the stack
; segment and is addressed using offsets from BP.  Important!  CmdName
; MUST start at the start of the segment (several XOR reg,reg instructions
; are used, on this assumption).

CmdName		=	BYTE ss:bp+0	; Cmd name, space-padded, up to 9 chars
					; Don't move CmdName (see note above).
ArkPara		=	WORD ss:bp+10	; Paragraph of start of archive data
BlockParas	=	WORD ss:bp+12	; Paragraphs in program's memory block
ProgName	=	WORD ss:bp+14	; Offset into env. of prog name  MUST BE
EnvirSeg	=	WORD ss:bp+16	; Environment block paragraph	ADJACENT

; The load image starts here

		SEGMENT	PSP
		ASSUME	cs:PSP,ds:nothing,es:nothing,ss:nothing

		ORG	COM_Image	; This code works like a COM file
					;   after the startup stub has
					;   adjusted the segmentation.

PROC	Main0		near
		jmp	Main1		; Get outta here
RelocItem	DW	0		; Relocation is applied to this word
ENDP	Main0

Signature	DB	13,AppName," V",Ver+"0",".",SubVer+"0","."
		DB	ModVer+"0",", ",VerDate,"; "
;---------------
;!! Change the following line if you create a derivative version of this program
		DB	"Original"
;---------------
		DB	9,9,9,26		; Tabs and Ctrl-Z

; Transient messages -----------------------------------------------------------

DOSVersMsg	DB	AppName,": Requires DOS 3.0 or later",13,10,"$"
MemAllocEM	DB	144,AppName,": Memory allocation error",13,10,0
UsageEM		DB	255,13,10,AppName," --",9,"Executable COM file archive  Version ",Ver+"0",".",SubVer+"0",".",ModVer+"0",", ",VerDate
		DB	13,10,9,"(c) Copyright 1998 by K. Heidenstrom (kheidens@clear.net.nz)"
		DB	13,10,10,"Usage:",9,AppName," Command [Parameters]",9,"- Run specified command"
		DB	13,10,9,AppName," -E Filespec",9,9,9,"- Extract file(s) to disk"
		DB	13,10,9,AppName," -L",9,9,9,9,"- List archive"
		DB	13,10,9,AppName," -R",9,9,9,9,"- Rebuild archive from *.COM"
		DB	13,10,0

NotFoundEM	DB	9,AppName,": Command not found",13,10,0
ExtractEM	DB	32,13,10,AppName,": Error during extraction",13,10,0
LineBuf		DB	"filename COM  bytes  yyyymmdd  hh:mm",13,10
LineBufLen	=	$ - LineBuf
StarDotCom	DB	"*.COM",0
RebuildFailEM	DB	61,13,10,AppName,": Error during archive rebuild",13,10,0

Switches	DB	"ELR"		; Switch letters
NumSwitches	=	$ - Switches
SwitchHnd	DW	SwitchE,SwitchL,SwitchR ; Pointers to switch handlers

; Transient tables -------------------------------------------------------------

ExeHeader	DB	"MZ"
LastPgSize	DW	0		; Bytes in last page (512-byte) of file
FilePages	DW	0		; Number of pages required to hold file
		DW	1		; Number of relocation table entries
		DW	ExeHdrParas	; Header paragraphs
		DW	StackSize/16	; Min mem (paras) req'd above load image
		DW	0FFFFh		; Max memory (paras) to allocate
StackSegReg	DW	0		; Pre-relocation SS register contents
		DW	StackSize	; Initial SP register value
		DW	0		; Negative checksum
		DW	0		; Initial IP register value
		DW	0		; Pre-relocation CS register contents
		DW	RelocTable - ExeHeader ; Reloc. table offset into header
		DW	0		; Overlay number
RelocTable	DW	3		; Dummy relocation table entry - offset
		DW	0		; Dummy relocation table entry - segment
ExeHdrLen	=	$ - ExeHeader
		IF	ExeHdrLen NE (ExeHdrParas SHL 4)
		 ERR	"EXE header is wrong length!"
		ENDIF

; Transient code ===============================================================

PROC	Main1		near		; Initially the image is at offset zero
		mov	di,cs
		sub	di,10h
		push	di
		mov	cx,OFFSET Main2	; Offset into COM-type memory model
		push	cx
		retf			; Adjust segmentation, 100h image base

		ASSUME	ds:PSP

Main2:		mov	ah,30h		; CS,DS,ES = PSP; SS = startup stack
		int	21h
		cmp	al,3		; Expect DOS 3.0 or later
		jae	DOS_Ok
		mov	dx,OFFSET DOSVersMsg
		mov	ah,9
		int	21h
		int	20h

; Errexit code - aborts the program with a specified message and errorlevel.
; On entry, BX points to a control string within the current code segment,
; which consists of a one-byte errorlevel followed by a null-terminated error
; message which may be blank.  This code assumes DOS version 2.0 handle
; functions are supported.  After writing the error message to StdErr, DOS
; function 4Ch (terminate with return code) is used to terminate the program.
; This code assumes only that CS is equal to the current code segment.

ErrExit:	push	[WORD cs:bx]	; Errorlevel onto stack
		inc	bx		; Point to error message
		call	ErrWriteMsg	; Display
		pop	ax		; Errorlevel
ErrExitQ:	mov	ah,4Ch
		int	21h		; Terminate with errorlevel in AL

ErrWriteMsg:	mov	cx,2		; Handle of standard error device
WriteMsg:	push	cs
		pop	ds		; Make DS valid
		mov	dx,bx		; Point DX for later
Err_Parse1:	inc	bx		; Bump pointer
		cmp	[BYTE ds:bx-1],0 ; Hit null terminator yet?
		jnz	Err_Parse1	; If not, loop
		xchg	cx,bx		; Get address of null terminator
		sub	cx,dx		; Subtract offset of start of segment
		dec	cx		; Adjust to correct number of chars
		jz	Err_Parse2	; If no text present
		mov	ah,40h		; Function to write to file or device
		int	21h		; Write error message to StdErr
Err_Parse2:	ret			; Return to exit code or whoever

MemAllocErr:	mov	bx,OFFSET MemAllocEM
		jmp	ErrExit		; Check PSP is directly below program

; Check PSP was placed (by DOS) directly below image (i.e. now at offset 0)

DOS_Ok:		cld			; Upwards string direction
		mov	cx,ds		; Get PSP segment to CX
		cmp	cx,di		; PSP should be image segment minus 10h
		jne	MemAllocErr

; Determine size of memory block allocated to this program

		dec	cx
		mov	es,cx		; CS,DS = PSP; ES = arena block
		cmp	di,[es:ARNA_Para] ; Check block paragraph in arena hdr
		jne	MemAllocErr	; If not an arena header - error
		mov	bx,[es:ARNA_Size] ; Get block size (in paragraphs)

; Calculate new stack segment at the top of the allocated block

		lea	cx,[bx+di-(StackSize/16)]
		mov	ss,cx		; Move stack - leave SP at StackSize
					; Note - CX is used later as the byte
					; limit for the environment scan
					; CS,DS = PSP; SS = high stack segment
		xor	bp,bp		; Will use SS:BP to address data vars
		mov	[BlockParas],dx	; Store memory block size

; Calculate paragraph of start of archive

		DB	8Dh,55h,ArkParas ; LEA DX,[DI+ArkParas]
		mov	[ArkPara],dx	; Calculate paragraph of ark data

; Find program name at end of environment block

		mov	es,[PSP_Envir]	; Get environment segment-paragraph
		mov	[EnvirSeg],es	; CS,DS = PSP; ES = environment
		xor	ax,ax		; Note - CX was set earlier, to the
		xor	di,di		;   stack paragraph.  Good enough.
		mov	[ProgName],di	; Prepare for no program name
FindEnvEnd:	repne	scasb		; Find null-terminator
		jne	DoneEnvir	; If corrupted environment
		scasb			; Was that the last environment string?
		jne	FindEnvEnd	; If not, keep scanning
		inc	di
		inc	di
		mov	[ProgName],di	; Store offset of program name

; Parse command tail -----------------------------------------------------------

DoneEnvir:	push	cs
		pop	es		; CS,DS,ES = PSP

		mov	ah,37h		; Note - AL was zero from earlier!
		int	21h		; Get switchar to DL

		mov	si,81h
		call	SkipWhite	; Skip whitespace; BadUsage if early C/R
		cmp	al,"-"
		je	GotSwitch	; If a switch
		cmp	al,dl
		jne	DoCommand	; If not a switch - must be a command

GotSwitch:	lodsb			; Get switch letter
		call	ToUpper

		mov	di,OFFSET Switches ; Point to option switch table
		mov	cx,NumSwitches	; Get number of switches to scan
		repne	scasb		; Look for switch
		jne	BadUsage	; If not found
		shl	di,1		; Shift for word sized pointers
OffSwitches	=	OFFSET (Switches)
		jmp	[SwitchHnd+di-OffSwitches-OffSwitches-2] ; Go to handler

BadUsage:	mov	bx,OFFSET UsageEM ; Incorrect usage
GoErrExit:	jmp	ErrExit

; Execute command --------------------------------------------------------------

DoCommand:	dec	si		; Back up to first character of name
		call	ParseCmdName	; Parse name of command to execute

; Move the program's command tail down

		dec	si
		mov	di,OFFSET PSP_CmdTail ; New command tail start
MoveTail:	lodsb			; Get character
		stosb			; Move it down
		cmp	al,13		; End of tail?
		jne	MoveTail	; If not, loop
		lea	ax,[di-(OFFSET PSP_CmdTail)-1] ; Calculate length
		mov	[PSP_CmdLen],al	; Store it
		IF	PAD_CMD_TAIL
		 xor	ax,ax
PadTail:	 stosb			; Null-pad
		 cmp	di,OFFSET PSP_End
		 jb	PadTail		; Loop
		ENDIF

; Find command in archive

		mov	bx,[ArkPara]	; Get segment of start of archive
FindCommand:	mov	es,bx		; CS,DS = PSP; ES = entry
		xor	si,si		; Assumes CmdName = SS:0000
		xor	di,di		; Assumes ARK_FNAME is 0
		mov	cx,4
		repe	cmps [WORD ss:si],[di] ; Compare names
		je	FoundCommand	; If match
		add	bx,[es:ARK_PARAS] ; Prepare to check next entry
		cmp	bp,[es:ARK_FNAME] ; Is there a name at this entry?
		jnz	FindCommand	; If so, keep looking
		mov	bx,OFFSET NotFoundEM ; Command not found in archive
		jmp	ErrExit

FoundCommand:	push	es		; Keep paragraph for later

; Change program name in environment to 'NUL'

		les bx,	[DWORD ProgName] ; CS,DS = PSP; ES = environment
		test	bx,bx		; Check that we found a valid progname
		jz	NoEnvir1
		mov	[WORD es:bx+0],"UN"
		mov	[WORD es:bx+2],"L"

; Change identification string in arena header

NoEnvir1:	mov	ax,cs
		dec	ax		; Point to arena header again
		mov	es,ax		; CS,DS = PSP; ES = arena header
		xor	si,si		; Assumes CmdName = SS:0000
		mov	di,OFFSET ARNA_ID
		mov	cx,4		; We will copy all eight characters
		DB	243,54,165 ;! TASM bug - won't allow 'rep movs [WORD ss:si],[es:di]'
NullArenaID:	dec	di
		cmp	[BYTE es:di]," " ; Skip spaces backwards
		je	NullArenaID
		inc	di		; Point just past last character
		cmp	di,OFFSET ARNA_End ; Is it the full 8 characters?
		jae	NoNullArena	; If so, don't null-terminate it
		mov	[BYTE es:di],cl	; Null-terminate name at end

; Get BlockParas before it is overwritten by the mover code

		mov	ax,[BlockParas]	; Get size of this block, in paragraphs

; Copy mover code into bottom of current stack segment

NoNullArena:	mov	si,OFFSET MoverCode
		push	ss
		pop	es		; CS,DS = PSP; ES,SS = high stack
		xor	di,di		; Zero offset into stack segment
		mov	cx,(MoverSize+1)/2 ; Move as words
		rep	movsw		; Copy mover code

; Set up parameters for mover code

		cmp	ax,1000h	; Is there at least 64K?
		jae	GotStack
		mov	cl,4
		shl	ax,cl		; Calculate offset past top of block
		xchg	ax,bp

; Leave space for a single word on the stack.  BP points just past the end
; of the stack (it was previously zero in the main code of this program; if
; there was less than 64K available to the COM program it has just been set
; to contain the offset just past the end of memory available to the program).

GotStack:	dec	bp
		dec	bp		; Start stack with one word on it

		pop	ds		; Get source segment
		ASSUME	ds:nothing	; CS = PSP; DS = entry; ES=SS
		mov	si,ARK_CONT	; Source offset is file contents

		push	cs
		pop	es		; Destination segment is here
					; CS,ES = PSP; DS = entry
		mov	di,COM_Image	; Destination offset

		mov	cx,[ds:ARK_FSIZE] ; Get length
		inc	cx		; One more byte
		shr	cx,1		; Convert to word count

; Jump to mover code at bottom of current stack segment

		push	ss		; Segment of mover code
		xor	ax,ax
		xor	bx,bx
		xor	dx,dx		; Zero some registers
		push	ax		; Offset - zero

		retf			; Jump to mover code

; Mover code image -------------------------------------------------------------

MoverCode	= $			; CS,SS = high stack; DS = arc. entry
		rep	movsw		; Move program image down to 100h
		push	es
		pop	ds		; Set DS to target segment
		push	es		; CS,SS = high stack; DS,ES = PSP
		cli
		pop	ss		; CS = high stack; DS,ES,SS = PSP
		mov	sp,bp		; Set up stack in new segment
		sti
		mov	[ss:bp],ax	; Place the termination address word
		push	es
		mov	bp,COM_Image	; Start offset is COM_Image
		push	bp
		xor	si,si		; Note - AX was already zero
		xor	di,di		; Note - CX was zeroed by the REP MOVSW
		xor	bp,bp
		retf			; Start executing COM file at 100h
MoverSize	= $ - MoverCode

ENDP	Main1

; Extract specified file(s) from archive to disk -------------------------------

		ASSUME	ds:PSP		; CS,DS,ES = PSP; SS = high stack

SwitchE:	call	ParseCmdName	; Parse name of command to execute

		mov	bx,[ArkPara]	; Get segment of start of archive

Extract1:	mov	ds,bx
		mov	es,bx
		ASSUME	ds:nothing	; CS = PSP; DS,ES = entry
		cmp	bp,[ds:ARK_FNAME] ; Is there a name?
		jz	Terminate	; If not - we're done.

		xor	si,si		; Assumes CmdName = SS:0000
		xor	di,di		; Assumes ARK_FNAME is 0
CompName1:	lods	[BYTE ss:si]	; Get char from search string
		scasb			; Compare to name in entry header
		je	Matches		; If char matches
		cmp	al,"*"		; Is source a star?
		je	Matched		; If so, the rest of the name matches
		cmp	al,"?"		; Is source a single-character wildcard?
		jne	SkipExtract	; If no match, don't extract this one
Matches:	cmp	di,8		; Done all characters?
		jb	CompName1	; If not, loop

Matched:	IF	SHOW_PROGRESS
		 call	ShowDot		; Progress indication
		ENDIF
		push	[WORD ds:ARK_FTIME] ; Keep time
		push	[WORD ds:ARK_FDATE] ;	and date
		push	[WORD ds:ARK_FSIZE] ;	and size
		mov	bx,8		; Start past end of file
Extract2:	dec	bx
		cmp	[BYTE ds:bx]," " ; Is it a padding space?
		je	Extract2
		mov	[WORD ds:bx+1],"C."
		mov	[WORD ds:bx+3],"MO"
		mov	[ds:bx+5],bp	; Add ".COM\0" to filename
		xor	dx,dx		; Point to filename
		mov	cx,00100000b	; Archive bit set, others clear
		mov	ah,3Ch
		int	21h		; Create file for write
		jc	CreateFailed
		xchg	ax,bx		; Handle to BX
		pop	cx
		mov	dx,ARK_CONT	; Point to contents
		mov	ah,40h
		int	21h		; Write
		jc	ExtractError	; If there was a write error
		cmp	ax,cx		; Did we write all bytes?
		jne	ExtractError	; If not, error
		pop	dx
		pop	cx		; Get file date and time
		mov	ax,5701h
		call	DosFCloseFile	; Set timestamp, don't check for error

SkipExtract:	mov	bx,ds
		add	bx,[ds:ARK_PARAS] ; Skip contents - go to next entry
		jmp	SHORT Extract1

ExtractError:	call	CloseFile
CreateFailed:	mov	bx,OFFSET ExtractEM
		jmp	ErrExit

Terminate:	mov	ax,4C00h
		int	21h

; List archive -----------------------------------------------------------------

		ASSUME	ds:PSP		; CS,DS,ES = PSP; SS = high stack

SwitchL:	mov	bx,[ArkPara]	; Get segment of start of archive
List1:		mov	ds,bx
		ASSUME	ds:nothing	; CS,ES = PSP; DS = entry
		cmp	bp,[ds:ARK_FNAME] ; Is there a name?
		jz	Terminate	; If not - done
		xor	si,si		; Assumes ARK_FNAME is 0
		mov	di,OFFSET LineBuf
		push	di		; For later
		mov	cx,4
		rep	movsw		; Copy name
		mov	di,OFFSET LineBuf+14
		mov	ax,"  "
		stosw
		stosw
		mov	ax,[ds:ARK_FSIZE]
		call	ToDecimal
		mov	di,OFFSET LineBuf+24
		mov	ax,[ds:ARK_FDATE]
		push	ax
		shr	ax,1
		mov	al,ah
		cbw
		add	ax,1980
		call	ToDecimal
		mov	di,OFFSET LineBuf+25
		pop	ax
		push	ax
		mov	cl,5
		shr	ax,cl
		and	al,00001111b
		call	TwoDigits6	; Month (4 bits)
		pop	ax
		call	TwoDigits5	; Day of month (5 bits)
		mov	di,OFFSET LineBuf+31
		mov	ax,[ds:ARK_FTIME]
		push	ax
		rol	ax,cl
		call	TwoDigits5	; Hours (5 bits)
		inc	di
		pop	ax
		shr	ax,cl
		call	TwoDigits6	; Minutes (6 bits)
		pop	dx		; Point to LineBuf
		push	ds		; Keep archive entry paragraph
		push	cs
		pop	ds		; CS,DS,ES = PSP
		mov	cx,LineBufLen
		mov	bx,1		; STDOUT
		mov	ah,40h
		int	21h
		pop	bx
		mov	ds,bx		; CS,ES = PSP; DS = entry
		add	bx,[ds:ARK_PARAS] ; Skip contents - go to next entry
		jmp	List1

; Rebuild archive --------------------------------------------------------------

; The following code performs the rather messy archive rebuild operation.
; The archive is created in memory, from scratch, by finding all files that
; match '*.COM' and for each file, creating a header and loading the file
; contents.  Once the archive is created in memory, the disk image of the
; ARK file is recreated.  The EXE header values are calculated and the EXE
; header is written, then the load image, which consists of the ARK stub and
; the archived files, is written in chunks of almost 64K.
; There are two error points.  RebErr1 is reached if there is a problem
; rebuilding the archive in memory, or in opening the ARK file for write.
; RebErr2 is reached if an error occurs during writing, while the ARK file
; is open.

		ASSUME	ds:PSP		; CS,DS,ES = PSP; SS = high stack

RebErr1:	mov	bx,OFFSET RebuildFailEM
		jmp	ErrExit

SwitchR:	mov	dx,OFFSET StarDotCom ; Use all files matching "*.COM"
		xor	cx,cx		; Standard attributes only
		mov	ah,4Eh
		int	21h
		mov	si,[ArkPara]	; Start at start of archive area
		jnc	RebuildLoop
		jmp	RebuildDone
RebuildLoop:	push	cs
		pop	ds		; CS,DS,ES = PSP
		IF	SHOW_PROGRESS
		 call	ShowDot		; Progress indication
		ENDIF
		cmp	bp,[WORD PSP_DTA+DTA_FSIZEH] ; Check hiword of size
		jnz	RebErr1		; If file too big - just abort
		mov	di,[PSP_DTA+DTA_FSIZEL]
		dec	di
		cmp	di,MaxCOMSize	; Check size
		jae	RebErr1		; If zero, or greater than MaxCOMSize

		push	si		; Keep paragraph to use for this file

		mov	cl,4		; Now calculate paragraphs needed
		shr	di,cl		;   and check for sufficient space
		inc	di		; (Size - 1) SHR 4 is paragraphs minus 1
		inc	di		; Add one paragraph for entry header
		IF	ARK_LENGTH NE 16
		 ERR	Change 'inc di' -- ARK_LENGTH is not 16
		ENDIF
		add	si,di		; Calculate next paragraph
		mov	cx,ss		; Get paragraph of top of usable memory
		cmp	si,cx		; Would we exceed it?
		jae	RebErr1		; If so

		mov	dx,OFFSET PSP_DTA+DTA_FNAME
		mov	ax,3D00h
		int	21h		; Open file
		jc	RebErr1
		xchg	ax,bx		; Handle to BX

		pop	ds		; Get DS pointing to current entry
		ASSUME	ds:nothing	; CS,ES = PSP; DS = entry

		mov	[ds:ARK_PARAS],di ; Store number of paragraphs

		mov	di,-1
CopyName:	inc	di
		mov	al,[cs:PSP_DTA+DTA_FNAME+di] ; Char from filename
		call	ToUpper		;; Probably not required!
		mov	[ds:ARK_FNAME+di],al ; Copy into entry header
		test	al,al
		jz	CopiedName
		cmp	al,"."
		jne	CopyName
CopiedName:	mov	[BYTE ds:ARK_FNAME+di]," "
		inc	di
		cmp	di,8
		jb	CopiedName	; Pad with trailing spaces

		mov	ax,[cs:PSP_DTA+DTA_TIME]
		mov	[ds:ARK_FTIME],ax
		mov	ax,[cs:PSP_DTA+DTA_DATE]
		mov	[ds:ARK_FDATE],ax
		mov	cx,[WORD cs:PSP_DTA+DTA_FSIZEL] ; File size is < 64K
		mov	[ds:ARK_FSIZE],cx

		mov	dx,ARK_CONT
		mov	ah,3Fh
		int	21h		; Read file contents into contents area
		pushf
		cmp	[WORD ds:ARK_CONT],"ZM" ; Check for actually an EXE file
		pushf
		xchg	ax,dx		; Keep count of bytes read
		call	CloseFile
		popf
		je	GoRebErr1	; If was an EXE file
		popf
		jc	GoRebErr1	; If error on read
		cmp	dx,cx
		jne	GoRebErr1	; Expect to have read all bytes

		mov	ah,4Fh
		int	21h		; Find next file
		jc	RebuildDone
		jmp	RebuildLoop	; If there are more

GoRebErr1:	jmp	RebErr1		; Report an error and terminate

RebuildDone:	mov	di,-1		; Initialise attempt counter

; Now recreate the executable.	Here, SI contains the segment-paragraph just
; past the last paragraph of the last file image, i.e. the paragraph where the
; dummy (zero) header will be set up.
; DI is used as an attempt counter.  Initially DI is 0FFFFh.  If the write
; operation is successful, the program terminates.  If an error occurs during
; the write operation, DI is incremented.  If this results in DI being zero
; (i.e. the first time an error occurs), then we reset SI to ArkPara (the
; paragraph of the first entry location) and return here, to write the stub
; with no archive files.  If the increment resulted in DI being non-zero,
; this means we have tried writing the stub only, and it failed too, so we
; terminate with an error message.
; After a successful write, we also check DI to see whether the first attempt
; to write a full archive failed, and if so, exit with an error message, even
; though the write of the stub (with no archive files) succeeded.

WriteExe:	push	cs
		pop	ds

		ASSUME	ds:PSP		; CS,DS,ES = PSP; SS = high stack

		mov	es,si
		mov	[es:ARK_FNAME],bp ; Mark entry as 'end of archive'

		lea	ax,[si-0Fh]	; Subtract 10h for PSP, add 1 for
		mov	cx,cs		;   final dummy header
		sub	ax,cx		; AX = paragraphs in load image

		mov	[StackSegReg],ax ; Store as startup stack paragraph
		IF	ExeHdrParas LT 3
		 REPT	ExeHdrParas
		  inc	ax
		 ENDM
		ELSE
		 add	ax,ExeHdrParas	; Add size of EXE header
		ENDIF
		push	ax
		add	ax,31
		mov	cl,5
		shr	ax,cl		; Get number of 512-byte pages in file
		mov	[FilePages],ax
		pop	ax
		dec	cx
		shl	ax,cl
		and	ax,511		; Get number of remaining bytes
		mov	[LastPgSize],ax

		lds dx,	[DWORD ProgName] ; Get program name
		ASSUME	ds:nothing	; CS,ES = PSP; DS = environment
		mov	ax,3D01h
		int	21h		; Open executable for writing
		push	cs		; Back to base segment
		pop	ds
		ASSUME	ds:PSP		; CS,DS,ES = PSP
		jc	GoRebErr1	; If error
		xchg	ax,bx		; Handle to BX

		mov	dx,OFFSET ExeHeader
		mov	cx,ExeHdrLen
		mov	ah,40h
		int	21h		; Write EXE header

		xor	si,si
		mov	[RelocItem],si	; Zero out some data fields
		mov	[LastPgSize],si
		mov	[FilePages],si
		xchg	si,[StackSegReg] ; Count of paragraphs to write

		ASSUME	ds:nothing	; CS,ES = PSP; DS = block being written

WriteLoop:	mov	ax,si		; Get paras remaining to write
		cmp	ax,WriteParas	; Write full block(s)?
		jb	DoWrite
		mov	ax,WriteParas	; Do a block
DoWrite:	sub	si,ax		; Subtract paras being written
		push	ax		; Keep number of paras to be written
		mov	cl,4
		shl	ax,cl
		xchg	ax,cx		; Count of bytes to CX
		IF	SHOW_PROGRESS
		 test	di,di
		 jns	NoStar		; Don't show asterisk if in failure fix
		 mov	dl,"*"
		 call	ShowChar
		ENDIF
NoStar:		mov	dx,COM_Image	; Offset
		mov	ah,40h
		int	21h		; Write block
		pop	dx		; Get number of paras in block
		jc	RebErr2		; If a write error
		cmp	ax,cx		; Did we write all bytes?
		jne	RebErr2		; If not, error
		mov	ax,ds
		add	ax,dx		; Bump segment up
		mov	ds,ax
		test	si,si		; More data to write?
		jnz	WriteLoop

		xor	cx,cx
		mov	ah,40h		; Write zero bytes - set file length
		call	DosFCloseFile	; Set length and close the file
		inc	di		; Did we have any problems?
		jnz	GoRebErr1B	; If so, report an error
		jmp	Terminate

RebErr2:	call	CloseFile	; Error - close file now
		mov	si,[ArkPara]	; Point to first entry location
		inc	di		; Count down attempts
		jnz	GoRebErr1B	; If no more attempts
		jmp	WriteExe	; If more, try writing stub only
GoRebErr1B:	jmp	RebErr1		; Report an error and terminate

; Utility functions ------------------------------------------------------------

		ASSUME	ds:nothing

PROC	DosFCloseFile			; CS = PSP; other regs undefined
		int	21h		; Call a DOS function and fall through
PROC	CloseFile	near
		mov	ah,3Eh
		int	21h		; Close file
		ret
ENDP	CloseFile
ENDP	DosFCloseFile

		ASSUME	ds:PSP

PROC	ParseCmdName	near		; CS,DS,ES = PSP
		call	SkipWhite
		xor	di,di
ParseCmd1:	call	ToUpper
		mov	[CmdName+di],al
		inc	di
		cmp	di,8
		ja	GoBadUsage	; If command name too long
		lodsb			; Next char
		cmp	al,13
		je	PadName
		cmp	al,"/"		; Allow option immediately after name
		je	PadName
		cmp	al," "
		ja	ParseCmd1	; Loop
PadName:	mov	[CmdName+di]," "
		inc	di
		cmp	di,8
		jb	PadName
		ret
ENDP	ParseCmdName

GoBadUsage:	jmp	BadUsage

PROC	SkipWhite	near		; CS,DS,ES = PSP
		lodsb			; Get char from command tail
		cmp	al,13
		je	GoBadUsage	; If early end of command tail
		cmp	al," "
		jbe	SkipWhite	; Loop if whitespace
		ret
ENDP	SkipWhite

		ASSUME	ds:nothing

PROC	ToDecimal	near		; CS,ES = PSP; DS undefined
		xor	dx,dx		; Clear high word of DX|AX
		mov	cx,10
		div	cx
		add	dl,"0"		; DL is remainder; convert to ASCII
		mov	[PSP:di],dl	; Store character
		dec	di		; Back up
		test	ax,ax		; Any more digits left?
		jnz	ToDecimal	; If so, loop
		ret
ENDP	ToDecimal

PROC	TwoDigits5	near		; CS,ES = PSP; DS undefined
		and	al,00011111b
PROC	TwoDigits6	near
		and	ax,0000000000111111b
		mov	ch,10
		div	ch
		add	ax,"00"
		stosw			; To PSP:di
		ret
ENDP	TwoDigits6
ENDP	TwoDigits5

		IF	SHOW_PROGRESS

PROC	ShowDot		near		; CS = PSP; DS and ES are undefined
		mov	dl,"."
PROC	ShowChar	near
		mov	ah,2
		int	21h
		ret
ENDP	ShowChar
ENDP	ShowDot

		ENDIF

; The command name and the archived file names are stored in upper case.
; I originally wanted to use lower case, but the process name stored in the
; last eight bytes of the memory arena header might need to be stored in
; upper case, and this would have required another conversion.	That is the
; only reason for choosing upper case for all file names.

PROC	ToUpper		near		; CS = PSP; DS and ES are undefined
		cmp	al,"a"
		jb	GotUpper
		cmp	al,"z"
		ja	GotUpper
		sub	al,"a"-"A"
GotUpper:	ret
ENDP	ToUpper

; Archived program image storage past end of stub ------------------------------

		MASM

prexp		MACRO	Text1,Exp,Text2		; Invoke in MASM mode only
		%OUT	Text1 &Exp Text2
		ENDM

		prexp	<Code size:> %($ - @curseg - 100h) < bytes>

		ORG	($ - @curseg + 15) AND 0FFF0h ; Align to 16 bytes

ArkParas	=	($ - @curseg) SHR 4
		prexp	<Stub size:> %(((ArkParas + ExeHdrParas) SHL 4) - COM_Image + 16) < bytes>
		IDEAL

ArkStart	DW	8 DUP(0)	; No initial entry - all blank

		ORG	8000h - 1
		DB	0		; Load image size is 7F00h bytes

		ENDS	PSP
		END	Main0

;-------------------------------------------------------------------------------
