/* FILECOMP.C file compare program, similar to unix diff Copyright (c) Borland International 1987 All Rights Reserved. */ /********************************************************************** The FILECOMP program implements Paul Heckel's algorithm from the Communications of the Association for Computing Machinery, April 1978 for detecting the differences between two files. This algorithm has the advantage over more commonly used compare algorithms that it is fast and can detect differences of an arbitrary number of lines. It has the defect that it reads both files twice if there are differences. It uses getopt to parse command line options. Compile with Turbo C, tcc -O -Z filecomp getopt You may also use the -p (pascal) option. The command format is: FILECOMP [options] filespec1 filespec2 Options: /f show full lines. /t expand tabs before comparing. /b ignore trailing blanks. /w ignore spaces and tabs. /y case insensitive compare. Defaults are: Brief show first 34 characters of lines. No tabs don't expand tabs. No trim don't ignore trailing blanks. White compare spaces and tabs. Case case sensitive compare. Arbitrary path qualification is allowed in the filespecs. In addition, the output can be redirected to the printer or a file with normal DOS redirection conventions (e. g. >PRN). By default (Brief), output is displayed with the two files side-by-side. This is felt to make it easier to spot the location of differences, even though only 34 characters of each record can be shown. The Full option causes the whole record to be shown. Along with each record displayed is its line number in the file it came from. If either file contains tabs, they will not be expanded before comparison unless the Tabs option is used. If Tabs is not used, any tabs in displayed records will probably make the output appear strange, since they will be expanded by DOS. A design limitation of the program is that only the first 256 characters of each record are compared. **********************************************************************/ #include #include /* for toupper */ #include /* for strcpy, strupr */ #define MAXLINES 5000 #define FULL 0x80 #define TABS 0x40 #define TRIM 0x20 #define WHITE 0x10 #define CASE 0x08 #define BLANK 0x04 #define TRUE 1 #define FALSE 0 long hash1[MAXLINES], hash2[MAXLINES]; unsigned char occ1[8192], occ2[8192]; int n1, n2; FILE *file1, *file2; char name[80], *s; int different = 0; unsigned char flag1 = WHITE; void givehelp(void) { printf("Usage is: FILECOMP [options] filespec1 filespec2\n"); printf("Options:\n"); printf(" /f\tshow full lines.\n"); printf(" /t\texpand tabs before comparing.\n"); printf(" /b\tignore trailing blanks.\n"); printf(" /w\tignore spaces and tabs.\n"); printf(" /y\tcase insensitive compare.\n"); printf("filespec2 can be a drive or directory name.\n"); } /* expand tabs */ void tabex(unsigned char *s1, unsigned char *s2) { int i; unsigned char j; for (i=j=0; s1[i]; i++) { if (s1[i] != '\t') { s2[j++] = s1[i]; continue; } do s2[j++] = ' '; while(j%8 != 0); } s2[j] = 0; } /* zap white space */ void zapwhite(unsigned char *s1, unsigned char *s2) { int i, j; for (i=j=0; s1[i]; i++) { if (s1[i] != ' ' && s1[i] != '\t') s2[j++] = s1[i]; } s2[j] = 0; } /* extract bits from the occurrence vector */ unsigned char getbits(unsigned char *array, unsigned long indx) { unsigned i, j; indx &= 32767; i = indx>>2; j = indx - (i<<2); return ((array[i]>>((3-j)<<1)) & 0x03); } /* store bits in the occurrence array */ void setbits(unsigned char *array, unsigned long indx, unsigned char x) { unsigned i, j, shift; indx &= 32767; i = indx>>2; j = indx - (i<<2); shift = (3-j)<<1; array[i] &= ~(0x03<=0 && buffer[j] && buffer[j]!='\n'; j++); for (j=j-1; j>=0 && buffer[j]==' ' ; j--); buffer[j+1] = 0; } h = hash(buffer); if (h<0) hashvect[i] = h; else hashvect[i] = -h; bits = getbits(occ,-hashvect[i]); if (bits==0) setbits(occ,-hashvect[i],1); else if (bits==1) setbits(occ,-hashvect[i],2); } printf("File truncated at %d lines.\n",MAXLINES); return i-1; } /* hash a character string */ long hash(unsigned char *s) { long h=0, h1; while (*s) { h1 = h; h = h<<1; if (h1<0) h |= 1; h ^= *s++; } if (h==0) h = 1; return h; } /* display the results of comparison */ void output(int l1,int l2) { static int cl1 = 0, cl2 = 0; char line[81]; unsigned int bi1=0, bi2=0; unsigned char end1=1, end2=1; int i; char buffer1[256], buffer2[256], temp[256]; different = 1; for (i=0; i<80; i++) line[i] = ' '; line[80] = 0; if (l1>=0) { for (i=cl1; i<=l1; i++) if((flag1&TABS)==0) fgets(buffer1,256,file1); else { fgets(temp,256,file1); tabex(temp,buffer1); } cl1 = l1 + 1; sprintf(line,"%4d ",l1+1); line[5] = ' '; for (i=0; buffer1[i+bi1] && buffer1[i+bi1]!='\n' && i<34; i++) line[i+5] = buffer1[i+bi1]; if (i==34) { bi1 += 34; end1 = 0; } } if (l2>=0) { for (i=cl2; i<=l2; i++) if((flag1&TABS)==0) fgets(buffer2,256,file2); else { fgets(temp,256,file2); tabex(temp,buffer2); } cl2 = l2 + 1; sprintf(line+40,"%4d ",l2+1); line[45] = ' '; for (i=0; buffer2[i+bi2] && buffer2[i+bi2]!='\n' && i<34; i++) line[i+45] = buffer2[i+bi2]; if (i==34) { bi2 += 34; end2 = 0; } } line[45+i] = '\n'; line[46+i] = 0; fwrite(line,1,46+i,stdout); if (flag1 & FULL) while (!end1 || !end2) { for (i=0; i<80; i++) line[i] = ' '; if (!end1) { for (i=0; buffer1[i+bi1] && buffer1[i+bi1]!='\n' && i<34; i++) line[i+5] = buffer1[i+bi1]; if (i==34) bi1 += 34; else end1 = 1; } if (!end2) { for (i=0; buffer2[i+bi2] && buffer2[i+bi2]!='\n' && i<34; i++) line[i+45] = buffer2[i+bi2]; if (i==34) bi2 += 34; else end2 = 1; } line[45+i] = '\n'; line[46+i] = 0; fwrite(line,1,46+i,stdout); } } /* match strings with specified minimum */ int match(unsigned char *s1, unsigned char *s2, unsigned char min) { unsigned int i; for (i=0; *s1 && *s2; i++) if (toupper(*s1++) != *s2++) return 0; if (*s1==0) return i>=min; return 0; } /* main program */ int cdecl main(int argc, char *argv[]) { int i,j,k,opt,ifile; unsigned char linked; extern int getopt(int argc, char *argv[], char *optionS); extern int opterr, optind; if (argc<3) { givehelp(); return 0; } /* get options */ opterr = FALSE; /* handle errors ourself */ while ((opt = getopt(argc, argv, "ftbwy")) != EOF) { switch (opt) { case '?': printf("Invalid command line option\n"); givehelp(); return(1); case 'f': flag1 |= FULL; break; case 't': flag1 |= TABS; break; case 'b': flag1 |= TRIM; break; case 'w': flag1 &= ~WHITE; break; case 'y': flag1 |= CASE; break; } } ifile = optind; /* index of first file parm */ /* step 1: read first file and hash it */ file1 = fopen(argv[ifile],"r"); if (file1==0) { printf("Unable to open file '%s'\n",argv[ifile]); return 2; } printf("Reading file '%s'.\n",argv[ifile]); n1 = input(file1,hash1,occ1); fseek(file1,0L,0); /* get the file name, with dir name stripped off */ for (i=j=0; (k=argv[ifile][i]) != 0; ++i) if (k == ':' || k == '\\' || k == '/') j = i + 1; s = argv[1] + j; /* if argv[ifile+1] ends in : or \, tack on 1st file name */ for (i=j=0; (k=argv[ifile+1][i]) != 0; ++i) if (k == ':' || k == '\\' || k == '/') j = i + 1; strcpy(name,argv[ifile+1]); if (j == i) strcpy(name+j,s); /* step 2: read second file and hash it */ file2 = fopen(name,"r"); if (file2==0) { /* maybe argv[ifile] was a directory, so try again */ if (j != i) { name[i] = '\\'; strcpy(name+i+1,s); file2 = fopen(name,"r"); } } if (file2==0) { printf("Unable to open file '%s'.\n",name); return 2; } printf("Reading file '%s'.\n",name); n2 = input(file2,hash2,occ2); fseek(file2,0L,0); /* step 3: identify lines that are unique in both files */ for (i=0; i<8192; i++) occ1[i] &= occ2[i]; /* step 4: link together matching unique lines */ for (i=0; i=0; j++) { if (i+j=0) if (hash2[i-j]==hash1[i]) { hash1[i] = i-j; hash2[i-j] = i; break; } } } /* step 5: link the first and last lines, if possible */ if (hash1[0]<0 && hash1[0]==hash2[0]) hash1[0] = hash2[0] = 0; if (hash1[n1-1]<0 && hash1[n1-1]==hash2[n2-1]) { hash1[n1-1] = n2-1; hash2[n2-1] = n1-1; } /* step 6: starting from linked lines, link following lines that match */ linked = 0; for (i=0; i=0) linked = 1; else if (linked==1) { if (hash1[i]==hash2[hash1[i-1]+1]) { hash1[i] = hash1[i-1]+1; hash2[hash1[i]] = i; } else linked = 0; } } /* step 7: link matching lines that precede linked lines */ linked = 0; for (i=n1-1; i>=0; i--) { if (hash1[i]>=0) linked = 1; else if (linked==1) { if (hash1[i]==hash2[hash1[i+1]-1]) { hash1[i] = hash1[i+1] - 1; hash2[hash1[i]] = i; } else linked = 0; } } /* step 8: display the results */ for (i=j=0; i