	PAGE 60,132
	TITLE 'FILTX v1.2 - Filter ASCII text files for unwanted characters'
;
; Originally written in 8080 code for CP/M by Keith Petersen
; 05/02/85 Translated to 8086 code for MS-DOS by Dave Hardy
; 01/02/87 Fixed partial-sector read bug in READLP routine.  -DJH
;
; To assemble:
;	MASM FILTX2;			<--- Assemble it
;	LINK FILTX2;			<--- Link the object file
;	EXE2BIN FILTX2 FILTX2.COM	<--- Convert from .EXE to .COM format
;
; This program copies any ASCII file and filters out all imbedded control
; characters except CR, LF and TAB.  When a CR is encountered, a LF
; is automatically appended to make the file compatible with CP/M editors.
; If an "orphan" LF is encountered, a CR and LF are written to the output.
; The program also resets the high order bit of all characters to zero so
; that files created with WordStar or other text processing programs can
; be read properly by normal routines.  The end of the program is padded
; with continuous EOF (CTL-Z) characters to finish the sector.
;
; FILTX is also useful for fixing files which contain only CR's at the
; end of each line.
;
;
; To use:
;
;   	B>FILTX HELLO.TXT                 (1)
;   	B>FILTX HELLO.TXT NUNAME.DOC      (2)
;   	B>FILTX A:HELLO.TXT               (3)
;   	B>FILTX A:HELLO.TXT B:NUNAME.DOC  (4)
;
;   (1)  Cleans up a file named HELLO.TXT.  When finished the original
;	   file is named HELLO.BAK and the new file has the original name.
;   (2)  The original file keeps the original name.  The new file is now
;	   named NUNAME.DOC.
;   (3)  Just shows you can use two disks.  The backup file will be on
;	   the same disk as the original and assumes the original name.
;   (4)  The original file remains intact, the new file is on another
;	   drive and is named NUNAME.DOC.
;
; 
; Define write buffer size
BSIZE	EQU	1024*16		;set for 16k
;
;
; Miscellaneous equates
CR	EQU	0DH		;carriage return
EOF	EQU	1AH		;end of file - ^Z
LF	EQU	0AH		;line feed
RLEN	EQU	128		;record length
SPACE	EQU	20H		;space character
TAB	EQU	09H		;horizontal tab
;
;
; DOS equates
WBOOT	EQU	0		;warm boot entry address
WRCON	EQU	2		;write character to console
PRINT	EQU	9		;print string (DE) until '$'
OPEN	EQU	15		;open disk file
CLOSE	EQU	16		;close disk file
DELET	EQU	19		;delete file
READ	EQU	20		;read sequential file
WRITE	EQU	21		;disk file write
MAKE	EQU	22		;make file
RENAME	EQU	23		;rename a file
STDMA	EQU	26		;set dma address
;
;
; Program starts here
CODE	SEGMENT
;
	ASSUME	CS:CODE,DS:CODE,ES:CODE,SS:CODE
;
;
; Define FCBs
	ORG	5CH
FCB	DB	10H DUP(?)	;default file control block
FCB2	DB	10H DUP(?)	;second default file control block
FCB32	DB	4 DUP(?)
;
 	ORG	80H
TBUF	DB	10H DUP(?)	;default buffer address
;
	ORG	100H		;We're going to make a .COM file, not an .EXE
;	
;
START:	CALL	ILPRT		;print:
	DB	CR,LF,'FILTX v1.2 (MSDOS version) - '
	DB	'ASCII file filter utility',CR,LF,CR,LF,0
	MOV	BX,OFFSET FCB+1
	MOV	AL,[BX]
	CMP	AL,20H		;filename there?
	JZ	NOSTRT
	JMP	START1		;yes, go open it
NOSTRT:	CALL	EXIT		;print message then exit
	DB	'   To use: B>FILTX A:HELLO.TXT              1)',CR,LF
	DB	'           B>FILTX HELLO.TXT A:NAME.NEW     2)',CR,LF,CR,LF
	DB	'   1) uses original name for new file, '
	DB	'original now named .BAK',CR,LF
	DB	'   2) original file stays intact, new '
	DB	'file has new name','$'
;
;
; Open source file
;
START1:	MOV	BX,OFFSET FCB2+1	;destination file named?
	MOV	AL,[BX]
	CMP	AL,SPACE
	JNZ	START2		;if yes, save it
	MOV	BX,OFFSET FCB	;otherwise use same filename
	MOV	DX,OFFSET DEST	;  with .$$$ extent for now
	MOV	CH,9
	CALL	MOVE
	JMP	OPENIT
;
START2:	MOV	BX,OFFSET FCB2	;destination filename
	MOV	DX,OFFSET DEST	;destination 'FCB' in 'TPA'
	MOV	CH,16
	CALL	MOVE
;
OPENIT:	XOR	AL,AL
	MOV	FCB32,AL	;zero current record byte
	MOV	DEST+32,AL
	MOV	DX,OFFSET FCB
	MOV	AH,OPEN
	INT	21H
	INC	AL		;check for no open
	JZ	SNFERR
	JMP	MM07		;no error, continue
;
SNFERR:	CALL	EXIT
	DB	'++ SOURCE FILE NOT FOUND ++$'
;
;
; Output a character to the new file buffer - first, see if there is
; room in the buffer for this character. 
;
OUTCHR:	LAHF
	XCHG	AL,AH
	PUSH	AX
	XCHG	AL,AH		;store the character for now	
	MOV	BX,OTPSIZ	;get buffer size	
	XCHG	BX,DX		;put in 'DE'
	MOV	BX,OTPPTR	;now get the buffer pointers
	MOV	AL,BL		;check to see if room in buffer
	SUB	AL,DL
	MOV	AL,BH
	SBB	AL,DH
	JAE	NORM
	JMP	MM06		;if room, go store the character
;
NORM:	MOV	BX,OFFSET 0		;otherwise reset the pointers
	MOV	OTPPTR,BX	;store the new pointer address
;
MM2:	XCHG	BX,DX		;put pointer address into 'DE'
	MOV	BX,OTPSIZ	;get the buffer size
	MOV	AL,DL
	SUB	AL,BL
	MOV	AL,DH
	SBB	AL,BH
	JNAE	NOMM05
	JMP	MM05
;
NOMM05:	MOV	BX,OTPADR
	PUSHF
	ADD	BX,DX
	RCR	SI,1
	POPF
	RCL	SI,1
	XCHG	BX,DX
	MOV	AH,STDMA
	INT	21H
	MOV	DX,OFFSET DEST
	MOV	AH,WRITE
	INT	21H
	OR	AL,AL
	JNZ	MM03
	MOV	DX,OFFSET RLEN
	MOV	BX,OTPPTR
	PUSHF
	ADD	BX,DX
	RCR	SI,1
	POPF
	RCL	SI,1
	MOV	OTPPTR,BX
	JMP	MM2
;
MM03:	MOV	AH,PRINT
	MOV	DX,OFFSET MM04
	INT	21H
	POP	AX
	XCHG	AL,AH
	SAHF
	JMP	EXIT1
;
MM04	DB	CR,LF
	DB	'DISK FULL: OUTPUT'
	DB	'$'
;
MM05:	MOV	DX,OFFSET TBUF
	MOV	AH,STDMA
	INT	21H
	MOV	BX,OFFSET 0
	MOV	OTPPTR,BX
;
MM06:	XCHG	BX,DX
	MOV	BX,OTPADR
	PUSHF
	ADD	BX,DX
	RCR	SI,1
	POPF
	RCL	SI,1
	XCHG	BX,DX
	POP	AX
	XCHG	AL,AH
	SAHF			;get the character back
	XCHG	BX,DX
	MOV	[BX],AL		;store the character
	XCHG	BX,DX
	MOV	BX,OTPPTR	;get the buffer pointer
	PUSHF
	INC	BX
	POPF			;increment them
	MOV	OTPPTR,BX	;store the new pointer address
	RET
;
;
MM07:	XOR	AL,AL		;Init destination FCB
	MOV	DEST+12,AL
	MOV	DEST+32,AL
	MOV	BX,BSIZE	;Store buffer size into output size word
	MOV	OTPSIZ,BX
	MOV	BX,0		;Init output pointer
	MOV	OTPPTR,BX
	MOV	AH,MAKE		;Create the new output file (and automatically
	MOV	DX,OFFSET DEST	;delete any old one of the same name)
	INT	21H
	INC	AL
	JNZ	MM09		;Jump if able to create file, else error
	MOV	AH,PRINT
	MOV	DX,OFFSET MM08
	INT	21H
	JMP	EXIT1
;
MM08	DB	CR,LF
	DB	'NO DIR SPACE: OUTPUT'
	DB	'$'
;
MM09:	CALL	ILPRT		;print:
	DB	'Input and output files open',CR,LF,CR,LF,0
;
;
; Read sector from source file
;
;
; FOR DOS WITH A FUNCTION 14H (READ SEQUENTIAL) CALL:
;  ON RETURN:	A=0 IF SUCCESSFUL
;		A=1 IF NO DATA READ (EOF)
;		A=2 IF TRANSFER SEGMENT TOO SMALL (CAN'T HAPPEN HERE)
;		A=3 IF PARTIAL SECTOR READ
;
READLP:	MOV	DX,OFFSET 80H
	MOV	AH,STDMA
	INT	21H
	MOV	DX,OFFSET FCB
	MOV	AH,READ
	INT	21H
	OR	AL,AL		;read ok?
	JZ	WRDISK		;yes, send it to output
	CMP	AL,3		;Readok, but partial sector?
	JZ	WRDISK		;yes, send it to output
	CMP	AL,1		;end-of-file?
	JNZ	NTDONE		;Transfer done, close, exit
	JMP	TDONE
;
NTDONE:	CALL	ERXIT
	DB	'++ SOURCE FILE READ ERROR ++$'
;
;
; Write sector to output file (with buffering)
;
WRDISK:	MOV	BX,OFFSET 80H	;read buffer address
;
WRDLOP:	MOV	AL,BYTE PTR [BX]	;get byte from read buffer
	CMP	AL,1AH		;end of file marker ?
	JNZ	NTDON2
	JMP	TDONE		;transfer done, close, exit
NTDON2:	AND	AL,80H			;hi-bit turned on?
	JZ	WRDLP2			;no, don't count it
	PUSH	BX			;save input buffer address
	MOV	BX,HCOUNT		;get deleted hi-bit count
	PUSHF
	INC	BX
	POPF				;add one
	MOV	HCOUNT,BX		;save new count
	POP	BX			;get input buffer address back
;
WRDLP2:	MOV	AL,BYTE PTR [BX]	;get byte again
	AND	AL,7FH			;strip parity bit
	CMP	AL,7FH			;del (rubout) ?
	JNZ	NTIGN1
	JMP	IGNORE			;yes, ignore it
NTIGN1:	CMP	AL,' '			;space or above?
	JNAE	NOTSPC
	JMP	PUTCHR			;yes go write it
NOTSPC:	CMP	AL,CR			;carriage return ?
	JZ	NTSPAC
	JMP	WRDLP3			;skip line count
NTSPAC:	PUSH	BX			;save input buffer address
	MOV	BX,ICOUNT		;get input line count
	PUSHF
	INC	BX
	POPF				;add one
	MOV	ICOUNT,BX		;save new count
	POP	BX			;get input buffer address back
	JMP	WRCRLF			;yes go write cr and a lf
;
WRDLP3:	CMP	AL,LF			;line feed ?
	JNZ	NTLF
	JMP	WRLF			;yes process it
NTLF:	CMP	AL,TAB		;tab character ?
	JZ	PUTCHR		;yes, go write it
;
;
; Ignore character and add one to ignore count
;
IGNORE:	PUSH	BX			;save input buffer address
	MOV	BX,DCOUNT		;get delete counter
	INC	BX			;add one
	MOV	DCOUNT,BX		;save new count
	POP	BX			;get input buffer address back
	JMP	TSTEND		;ignore character and continue
;
;
; This routine checks to see if the last character was a LF.  If not,
; the current character is an "orphan" LF and we need to add a CR before
; it in the output file.
;
WRLF:	MOV	AL,LSTCHR		;get last character
	CMP	AL,LF		;was it a line feed?
	JNZ	NTLF2
	JMP	TSTEND		;yes, ignore this one
NTLF2:	PUSH	BX			;save input buffer address
	MOV	BX,OLFCNT		;get orphan lf count
	INC	BX			;add one
	MOV	OLFCNT,BX		;save new count
	POP	BX			;get input buffer address back
;
;
; Write a CR and LF to the file.
;
WRCRLF:	PUSH	BX			;save input buffer address
	MOV	BX,OCOUNT		;get output line count
	INC	BX		;add one
	MOV	OCOUNT,BX	;save new count
	MOV	AL,CR		;get a cr
	CALL	OUTCHR		;write cr to file
	POP	BX			;get input buffer address back
	MOV	AL,LF		;get a line feed, drop into putchr
;
;
; Write character to output buffer
;
PUTCHR:	MOV	 LSTCHR,AL		;save char for later
	PUSH	BX			;save input buffer address
	CALL	OUTCHR
	POP	BX			;get input buffer address back
;
TSTEND:	INC	BL			;done with sector?
	JZ	DNSEC
	JMP	WRDLOP			;no, get another byte
DNSEC:
	JMP	READLP			;go get another sector
;
;
; Transfer is done - close destination file
;
TDONE:	MOV	BX,OTPPTR
	MOV	AL,BL
	AND	AL,RLEN-1
	JNZ	MM10
	MOV	OTPSIZ,BX
;
MM10:	MOV	AL,EOF
	LAHF
	XCHG	AL,AH
	PUSH	AX
	XCHG	AL,AH
	CALL	OUTCHR
	POP	AX
	XCHG	AL,AH
	SAHF
	JZ	XTDONE
	JMP	TDONE
XTDONE:	MOV	AH,CLOSE
	MOV	DX,OFFSET DEST
	INT	21H
	INC	AL
	JNZ	MM12
	MOV	AH,PRINT
	MOV	DX,OFFSET MM11
	INT	21H
	JMP	MM12
;
MM11	DB	CR,LF
	DB	'CANNOT CLOSE OUTPUT'
	DB	'$'
;
MM12:	CALL	ILPRT		;print:
	DB	'Function complete.',CR,LF,CR,LF,0
	MOV	BX,ICOUNT		;get input line count
	CALL	DECOUT		;print it
	CALL	ILPRT		;print:
	DB	' input lines read'
	DB	CR,LF,0
	MOV	BX,OCOUNT		;get output line count
	CALL	DECOUT		;print it
	CALL	ILPRT		;print:
	DB	' output lines written with:'
	DB	CR,LF,TAB,0
	MOV	BX,DCOUNT		;get deleted char count
	CALL	DECOUT		;print it
	CALL	ILPRT		;print:
	DB	' CTL-characters deleted'
	DB	CR,LF,TAB,0
	MOV	BX,HCOUNT		;get deleted hi-order bit count
	CALL	DECOUT		;print it
	CALL	ILPRT		;print:
	DB	' high-order bits deleted'
	DB	CR,LF,TAB,0
	MOV	BX,OLFCNT		;get orphan lf count
	CALL	DECOUT		;print it
	CALL	ILPRT
	DB	' orphan LFs fixed',0
;
;
; Rename both files if no destination file name was specified
;
NUNAME:	MOV	AL,DEST+9
	CMP	AL,'$'
	JZ	NODEST
	JMP	EXIT1
NODEST:	MOV	BX,OFFSET FCB
	MOV	DX,OFFSET ONAME
	MOV	CH,16
	CALL	MOVE
	MOV	BX,OFFSET FCB+1
	MOV	DX,OFFSET ONAME+17
	MOV	CH,8
	CALL	MOVE
	MOV	BX,OFFSET FCB
	MOV	DX,OFFSET SNAME
	MOV	CH,9
	CALL	MOVE
	MOV	DX,OFFSET SNAME
	MOV	AH,DELET
	INT	21H
	MOV	DX,OFFSET ONAME
	MOV	AH,RENAME
	INT	21H
;
;
; Now rename the new file
;
	MOV	BX,OFFSET FCB+1
	MOV	DX,OFFSET DEST+17
	MOV	CH,16
	CALL	MOVE
	MOV	DX,OFFSET DEST
	MOV	AH,RENAME
	INT	21H
	JMP	EXIT1
;
;
; Erase the incomplete output file, then exit
;
ERXIT:	MOV	BX,OTPPTR
	MOV	AL,BL
	AND	AL,RLEN-1
	JNZ	MM13
	MOV	OTPSIZ,BX
;
MM13:	MOV	AL,EOF
	LAHF
	XCHG	AL,AH
	PUSH	AX
	XCHG	AL,AH
	CALL	OUTCHR
	POP	AX
	XCHG	AL,AH
	SAHF
	JNZ	ERXIT
	MOV	AH,CLOSE
	MOV	DX,OFFSET DEST
	INT	21H
	INC	AL
	JNZ	MM15
	MOV	AH,PRINT
	MOV	DX,OFFSET MM14
	INT	21H
	JMP	MM15
;
MM14	DB	CR,LF
	DB	'CANNOT CLOSE OUTPUT'
	DB	'$'
;
MM15:	MOV	AH,DELET
	MOV	DX,OFFSET DEST
	INT	21H
;
;
; Print message then exit to CP/M
;
EXIT:	POP	DX			;get message address
	MOV	AH,PRINT		;print message
	INT	21H
;
EXIT1:	INT	20H
;
;
; Inline print routine - prints string pointed to by stack until a zero
; is found.  Returns to caller at the next address after the zero ter-
; minator.
;
ILPRT:	POP	BX
ILPLP:	MOV	AL,BYTE PTR [BX]	;get char
	CALL	TYPIT		;output it
	INC	BX
	MOV	AL,BYTE PTR [BX]	;test
	OR	AL,AL		;..for end
	JNZ	ILPLP
	INC	BX		;restore ret address
	PUSH	BX
	RET			;return past the end of the message
;
;
MOVE:	MOV	AL,BYTE PTR [BX]
	XCHG	BX,DX
	MOV	[BX],AL
	XCHG	BX,DX
	INC	BX
	INC	DX
	DEC	CH			;decrement byte count
	JNZ	MOVE		;if not zero, move another byte
	RET
;
;
; Send character in A register to console
;
TYPIT:	PUSH	CX
	PUSH	DX
	PUSH	BX
	MOV	DL,AL			;char to e for cp/m
	MOV	AH,WRCON		;write to console
	INT	21H
	POP	BX
	POP	DX
	POP	CX
	RET
;
;
; Decimal output - print HL as decimal number with leading zero
; suppression.
;
DECOUT:	PUSH	CX
	PUSH	DX
	PUSH	BX
	MOV	CX,OFFSET -10
	MOV	DX,OFFSET -1
;
DECOU2:	ADD	BX,CX
	RCR	SI,1
	RCL	SI,1
	INC	DX
	JAE	NTDCO
	JMP	DECOU2
NTDCO:	MOV	CX,OFFSET 10
	ADD	BX,CX
	RCR	SI,1
	RCL	SI,1
	XCHG	BX,DX
	MOV	AL,BH
	OR	AL,BL
	JZ	NTDC2
	CALL	DECOUT
NTDC2:	MOV	AL,DL
	ADD	AL,'0'
	CALL	TYPIT
	POP	BX
	POP	DX
	POP	CX
	RET
;
;
; 'Declare' output file
;
DEST	DB	0,'        $$$',0,0,0,0,0,0,0
	DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0
ONAME	DB	0,'           ',0,0,0,0,0,'        BAK',0,0,0,0
SNAME	DB	0,'        BAK',0,0,0,0,0
;
OTPADR	DW	BUFFER		;Output buffer address
OTPSIZ	DW	BSIZE		;Output buffer size
OTPPTR	DW	0		;Output buffer pointer
;
LSTCHR	DB	0		;last character reminder
ICOUNT	DW	0		;input line counter
OCOUNT	DW	0		;output line counter
DCOUNT	DW	0		;deleted character counter
OLFCNT	DW	0		;orphan line feed counter
HCOUNT	DW	0		;hi-order bits zeroed counter
;
;
; Set write buffer to even page boundry
;
	ORG	($-START)+256-(($-START) MOD 256)+256
;
BUFFER	EQU	$		;write buffer starts here
STACK	EQU	BUFFER-2
;
CODE	ENDS
	END	START
