451 lines
11 KiB
C
451 lines
11 KiB
C
/* FILECOMP.C
|
||
|
||
file compare program, similar to unix diff
|
||
|
||
Copyright (c) Borland International 1987
|
||
All Rights Reserved.
|
||
*/
|
||
|
||
/**********************************************************************
|
||
|
||
The FILECOMP program implements Paul Heckel's algorithm from the
|
||
Communications of the Association for Computing Machinery,
|
||
April 1978 for detecting the differences between two files. This
|
||
algorithm has the advantage over more commonly used compare
|
||
algorithms that it is fast and can detect differences of an
|
||
arbitrary number of lines. It has the defect that it reads both
|
||
files twice if there are differences.
|
||
|
||
It uses getopt to parse command line options.
|
||
|
||
Compile with Turbo C,
|
||
|
||
tcc -O -Z filecomp getopt
|
||
|
||
You may also use the -p (pascal) option.
|
||
|
||
The command format is: FILECOMP [options] filespec1 filespec2
|
||
|
||
Options:
|
||
/f show full lines.
|
||
/t expand tabs before comparing.
|
||
/b ignore trailing blanks.
|
||
/w ignore spaces and tabs.
|
||
/y case insensitive compare.
|
||
|
||
Defaults are:
|
||
Brief show first 34 characters of lines.
|
||
No tabs don't expand tabs.
|
||
No trim don't ignore trailing blanks.
|
||
White compare spaces and tabs.
|
||
Case case sensitive compare.
|
||
|
||
Arbitrary path qualification is allowed in the filespecs. In addition,
|
||
the output can be redirected to the printer or a file with normal DOS
|
||
redirection conventions (e. g. >PRN).
|
||
|
||
By default (Brief), output is displayed with the two files side-by-side.
|
||
This is felt to make it easier to spot the location of differences, even
|
||
though only 34 characters of each record can be shown. The Full option
|
||
causes the whole record to be shown. Along with each record displayed is
|
||
its line number in the file it came from.
|
||
|
||
If either file contains tabs, they will not be expanded before
|
||
comparison unless the Tabs option is used. If Tabs is not used, any tabs
|
||
in displayed records will probably make the output appear strange, since
|
||
they will be expanded by DOS.
|
||
|
||
A design limitation of the program is that only the first 256
|
||
characters of each record are compared.
|
||
|
||
**********************************************************************/
|
||
|
||
|
||
#include <stdio.h>
|
||
#include <ctype.h> /* for toupper */
|
||
#include <string.h> /* for strcpy, strupr */
|
||
|
||
#define MAXLINES 5000
|
||
#define FULL 0x80
|
||
#define TABS 0x40
|
||
#define TRIM 0x20
|
||
#define WHITE 0x10
|
||
#define CASE 0x08
|
||
#define BLANK 0x04
|
||
|
||
#define TRUE 1
|
||
#define FALSE 0
|
||
|
||
long hash1[MAXLINES], hash2[MAXLINES];
|
||
unsigned char occ1[8192], occ2[8192];
|
||
int n1, n2;
|
||
FILE *file1, *file2;
|
||
char name[80], *s;
|
||
int different = 0;
|
||
|
||
unsigned char flag1 = WHITE;
|
||
|
||
void givehelp(void)
|
||
{
|
||
printf("Usage is: FILECOMP [options] filespec1 filespec2\n");
|
||
printf("Options:\n");
|
||
printf(" /f\tshow full lines.\n");
|
||
printf(" /t\texpand tabs before comparing.\n");
|
||
printf(" /b\tignore trailing blanks.\n");
|
||
printf(" /w\tignore spaces and tabs.\n");
|
||
printf(" /y\tcase insensitive compare.\n");
|
||
printf("filespec2 can be a drive or directory name.\n");
|
||
}
|
||
|
||
/* expand tabs */
|
||
void tabex(unsigned char *s1, unsigned char *s2)
|
||
{
|
||
int i;
|
||
unsigned char j;
|
||
for (i=j=0; s1[i]; i++) {
|
||
if (s1[i] != '\t') {
|
||
s2[j++] = s1[i];
|
||
continue;
|
||
}
|
||
do s2[j++] = ' '; while(j%8 != 0);
|
||
}
|
||
s2[j] = 0;
|
||
}
|
||
|
||
/* zap white space */
|
||
void zapwhite(unsigned char *s1, unsigned char *s2)
|
||
{
|
||
int i, j;
|
||
for (i=j=0; s1[i]; i++) {
|
||
if (s1[i] != ' ' && s1[i] != '\t')
|
||
s2[j++] = s1[i];
|
||
}
|
||
s2[j] = 0;
|
||
}
|
||
|
||
/* extract bits from the occurrence vector */
|
||
unsigned char getbits(unsigned char *array, unsigned long indx)
|
||
{
|
||
unsigned i, j;
|
||
indx &= 32767;
|
||
i = indx>>2;
|
||
j = indx - (i<<2);
|
||
return ((array[i]>>((3-j)<<1)) & 0x03);
|
||
}
|
||
|
||
/* store bits in the occurrence array */
|
||
void setbits(unsigned char *array, unsigned long indx, unsigned char x)
|
||
{
|
||
unsigned i, j, shift;
|
||
|
||
indx &= 32767;
|
||
i = indx>>2;
|
||
j = indx - (i<<2);
|
||
shift = (3-j)<<1;
|
||
array[i] &= ~(0x03<<shift);
|
||
array[i] |= x<<shift;
|
||
}
|
||
|
||
/* read in file, build hash & occurrence tables */
|
||
int input(FILE *file, long hashvect[], unsigned char occ[])
|
||
{
|
||
int i, j;
|
||
long h;
|
||
unsigned char bits, buffer[256], temp[256];
|
||
long hash();
|
||
|
||
for (i=0; i<MAXLINES; i++) {
|
||
if (flag1&WHITE) {
|
||
if (fgets(temp,256,file)==0) return i;
|
||
zapwhite(temp,buffer);
|
||
} else if ((flag1&TABS)==0) {
|
||
if (fgets(buffer,256,file)==0) return i;
|
||
} else {
|
||
if (fgets(temp,256,file)==0) return i;
|
||
tabex(temp,buffer);
|
||
}
|
||
if (flag1&CASE) strupr(buffer);
|
||
if (flag1&TRIM) {
|
||
for (j=0; j<256 && j>=0 && buffer[j] && buffer[j]!='\n'; j++);
|
||
for (j=j-1; j>=0 && buffer[j]==' ' ; j--);
|
||
buffer[j+1] = 0;
|
||
}
|
||
h = hash(buffer);
|
||
if (h<0)
|
||
hashvect[i] = h;
|
||
else
|
||
hashvect[i] = -h;
|
||
bits = getbits(occ,-hashvect[i]);
|
||
if (bits==0)
|
||
setbits(occ,-hashvect[i],1);
|
||
else if (bits==1)
|
||
setbits(occ,-hashvect[i],2);
|
||
}
|
||
printf("File truncated at %d lines.\n",MAXLINES);
|
||
return i-1;
|
||
}
|
||
|
||
/* hash a character string */
|
||
long hash(unsigned char *s)
|
||
{
|
||
long h=0, h1;
|
||
while (*s) {
|
||
h1 = h;
|
||
h = h<<1;
|
||
if (h1<0) h |= 1;
|
||
h ^= *s++;
|
||
}
|
||
if (h==0) h = 1;
|
||
return h;
|
||
}
|
||
|
||
/* display the results of comparison */
|
||
void output(int l1,int l2)
|
||
{
|
||
static int cl1 = 0, cl2 = 0;
|
||
char line[81];
|
||
unsigned int bi1=0, bi2=0;
|
||
unsigned char end1=1, end2=1;
|
||
int i;
|
||
char buffer1[256], buffer2[256], temp[256];
|
||
|
||
different = 1;
|
||
|
||
for (i=0; i<80; i++) line[i] = ' ';
|
||
line[80] = 0;
|
||
if (l1>=0) {
|
||
for (i=cl1; i<=l1; i++)
|
||
if((flag1&TABS)==0) fgets(buffer1,256,file1);
|
||
else {
|
||
fgets(temp,256,file1);
|
||
tabex(temp,buffer1);
|
||
}
|
||
cl1 = l1 + 1;
|
||
sprintf(line,"%4d ",l1+1);
|
||
line[5] = ' ';
|
||
for (i=0; buffer1[i+bi1] && buffer1[i+bi1]!='\n' && i<34; i++)
|
||
line[i+5] = buffer1[i+bi1];
|
||
if (i==34) {
|
||
bi1 += 34;
|
||
end1 = 0;
|
||
}
|
||
}
|
||
if (l2>=0) {
|
||
for (i=cl2; i<=l2; i++)
|
||
if((flag1&TABS)==0) fgets(buffer2,256,file2);
|
||
else {
|
||
fgets(temp,256,file2);
|
||
tabex(temp,buffer2);
|
||
}
|
||
cl2 = l2 + 1;
|
||
sprintf(line+40,"%4d ",l2+1);
|
||
line[45] = ' ';
|
||
for (i=0; buffer2[i+bi2] && buffer2[i+bi2]!='\n' && i<34; i++)
|
||
line[i+45] = buffer2[i+bi2];
|
||
if (i==34) {
|
||
bi2 += 34;
|
||
end2 = 0;
|
||
}
|
||
}
|
||
line[45+i] = '\n';
|
||
line[46+i] = 0;
|
||
fwrite(line,1,46+i,stdout);
|
||
if (flag1 & FULL) while (!end1 || !end2) {
|
||
for (i=0; i<80; i++) line[i] = ' ';
|
||
if (!end1) {
|
||
for (i=0; buffer1[i+bi1] && buffer1[i+bi1]!='\n' && i<34; i++)
|
||
line[i+5] = buffer1[i+bi1];
|
||
if (i==34) bi1 += 34; else end1 = 1;
|
||
}
|
||
if (!end2) {
|
||
for (i=0; buffer2[i+bi2] && buffer2[i+bi2]!='\n' && i<34; i++)
|
||
line[i+45] = buffer2[i+bi2];
|
||
if (i==34) bi2 += 34; else end2 = 1;
|
||
}
|
||
line[45+i] = '\n';
|
||
line[46+i] = 0;
|
||
fwrite(line,1,46+i,stdout);
|
||
}
|
||
}
|
||
|
||
/* match strings with specified minimum */
|
||
int match(unsigned char *s1, unsigned char *s2, unsigned char min)
|
||
{
|
||
unsigned int i;
|
||
for (i=0; *s1 && *s2; i++)
|
||
if (toupper(*s1++) != *s2++) return 0;
|
||
if (*s1==0) return i>=min;
|
||
return 0;
|
||
}
|
||
|
||
/* main program */
|
||
int cdecl main(int argc, char *argv[])
|
||
{
|
||
int i,j,k,opt,ifile;
|
||
unsigned char linked;
|
||
|
||
extern int getopt(int argc, char *argv[], char *optionS);
|
||
extern int opterr, optind;
|
||
|
||
if (argc<3) {
|
||
givehelp();
|
||
return 0;
|
||
}
|
||
|
||
/* get options */
|
||
|
||
opterr = FALSE; /* handle errors ourself */
|
||
while ((opt = getopt(argc, argv, "ftbwy")) != EOF) {
|
||
switch (opt) {
|
||
case '?':
|
||
printf("Invalid command line option\n");
|
||
givehelp();
|
||
return(1);
|
||
case 'f':
|
||
flag1 |= FULL; break;
|
||
case 't':
|
||
flag1 |= TABS; break;
|
||
case 'b':
|
||
flag1 |= TRIM; break;
|
||
case 'w':
|
||
flag1 &= ~WHITE; break;
|
||
case 'y':
|
||
flag1 |= CASE; break;
|
||
}
|
||
}
|
||
|
||
ifile = optind; /* index of first file parm */
|
||
|
||
/* step 1: read first file and hash it */
|
||
file1 = fopen(argv[ifile],"r");
|
||
if (file1==0) {
|
||
printf("Unable to open file '%s'\n",argv[ifile]);
|
||
return 2;
|
||
}
|
||
|
||
printf("Reading file '%s'.\n",argv[ifile]);
|
||
n1 = input(file1,hash1,occ1);
|
||
fseek(file1,0L,0);
|
||
|
||
/* get the file name, with dir name stripped off */
|
||
for (i=j=0; (k=argv[ifile][i]) != 0; ++i)
|
||
if (k == ':' || k == '\\' || k == '/') j = i + 1;
|
||
s = argv[1] + j;
|
||
|
||
/* if argv[ifile+1] ends in : or \, tack on 1st file name */
|
||
for (i=j=0; (k=argv[ifile+1][i]) != 0; ++i)
|
||
if (k == ':' || k == '\\' || k == '/') j = i + 1;
|
||
strcpy(name,argv[ifile+1]);
|
||
if (j == i) strcpy(name+j,s);
|
||
|
||
/* step 2: read second file and hash it */
|
||
file2 = fopen(name,"r");
|
||
if (file2==0) {
|
||
/* maybe argv[ifile] was a directory, so try again */
|
||
if (j != i) {
|
||
name[i] = '\\';
|
||
strcpy(name+i+1,s);
|
||
file2 = fopen(name,"r");
|
||
}
|
||
}
|
||
|
||
if (file2==0) {
|
||
printf("Unable to open file '%s'.\n",name);
|
||
return 2;
|
||
}
|
||
|
||
printf("Reading file '%s'.\n",name);
|
||
n2 = input(file2,hash2,occ2);
|
||
fseek(file2,0L,0);
|
||
|
||
/* step 3: identify lines that are unique in both files */
|
||
for (i=0; i<8192; i++) occ1[i] &= occ2[i];
|
||
|
||
/* step 4: link together matching unique lines */
|
||
for (i=0; i<n1; i++) {
|
||
if (getbits(occ1,-hash1[i])!=1) continue;
|
||
for (j=0; i+j<n2 || i-j>=0; j++) {
|
||
if (i+j<n2) if (hash2[i+j]==hash1[i]) {
|
||
hash1[i] = i+j;
|
||
hash2[i+j] = i;
|
||
break;
|
||
}
|
||
if (i-j>=0) if (hash2[i-j]==hash1[i]) {
|
||
hash1[i] = i-j;
|
||
hash2[i-j] = i;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
/* step 5: link the first and last lines, if possible */
|
||
if (hash1[0]<0 && hash1[0]==hash2[0]) hash1[0] = hash2[0] = 0;
|
||
if (hash1[n1-1]<0 && hash1[n1-1]==hash2[n2-1]) {
|
||
hash1[n1-1] = n2-1;
|
||
hash2[n2-1] = n1-1;
|
||
}
|
||
|
||
/* step 6: starting from linked lines, link following lines that match */
|
||
linked = 0;
|
||
for (i=0; i<n1; i++) {
|
||
if (hash1[i]>=0) linked = 1;
|
||
else if (linked==1) {
|
||
if (hash1[i]==hash2[hash1[i-1]+1]) {
|
||
hash1[i] = hash1[i-1]+1;
|
||
hash2[hash1[i]] = i;
|
||
}
|
||
else linked = 0;
|
||
}
|
||
}
|
||
|
||
/* step 7: link matching lines that precede linked lines */
|
||
linked = 0;
|
||
for (i=n1-1; i>=0; i--) {
|
||
if (hash1[i]>=0) linked = 1;
|
||
else if (linked==1) {
|
||
if (hash1[i]==hash2[hash1[i+1]-1]) {
|
||
hash1[i] = hash1[i+1] - 1;
|
||
hash2[hash1[i]] = i;
|
||
} else linked = 0;
|
||
}
|
||
}
|
||
|
||
/* step 8: display the results */
|
||
for (i=j=0; i<n1 && j<n2;) {
|
||
if (hash1[i]<j && hash2[j]<i) {
|
||
output(i++,j++);
|
||
continue;
|
||
}
|
||
if (hash1[i]<j) {
|
||
output(i++,-1);
|
||
continue;
|
||
}
|
||
if (hash2[j]<i) {
|
||
output(-1,j++);
|
||
continue;
|
||
}
|
||
if (hash1[i]==j) {
|
||
for (k=1; i+k<=n1 && j+k<=n2 && hash1[i+k]==j+k; k++);
|
||
printf("\n*** %d line(s) match. ***\n\n",k);
|
||
i += k;
|
||
j += k;
|
||
continue;
|
||
}
|
||
if (hash1[i]-j <= hash2[j]-i) {
|
||
for (k=j; k<hash1[i]; k++) output(-1,k);
|
||
j = hash1[i];
|
||
continue;
|
||
} else {
|
||
for (k=i; k<hash2[j]; k++) output(k,-1);
|
||
i = hash2[j];
|
||
continue;
|
||
}
|
||
}
|
||
if (i<n1) for (k=i; k<n1; k++) output(k,-1);
|
||
if (j<n2) for (k=j; k<n2; k++) output(-1,k);
|
||
fclose(file1);
|
||
fclose(file2);
|
||
return different;
|
||
}
|
||
/* end of program */
|
||
|