// Converts a plain file to HTML, substituting special characters // function takes param1 file containing substitution data // and does substitution on all lines // param2 is the input file to be translated // param3 is the output file // the param1 file has the following format // line 1 -- a character string to be put at the start of every line // line 2 etc substitution data (see below) // line n -- **** marks the end of substitution data // line n+1 etc is then copied to the output #include #include #include #define BUFFSZ 100000 #define NKEYS 1000 char *big_buff; int verbose = 1; char sp50[] = " "; int main(int argc, char **argv) { FILE *fin = stdin; FILE *fsubs = NULL; FILE *fout = stdout; char *in_line, *ws, *k, **key, **value; int *len; // length values for each key int key_count = 0; int subs_count = 0; int linecount = 0; int i, j; int tabpos = 0; char sepchar = ':'; // separator character between key and substitution string int kc[256]; // key count position for each initial character int ks[256]; // key count position for each initial character char *linestart; big_buff = (char *)malloc(BUFFSZ); for ( i = 0; i<256; i++ ) // initialise counts to zero { kc[i] = 0; ks[i] = 0; } if ( argc < 3 ) { fprintf(stderr, "Usage: %s subs_file input_file [output_file]\n", *argv); exit(1); } fsubs = fopen(argv[1], "r"); if ( fsubs == NULL ) { perror(argv[1]); exit(1); } key = (char **)malloc(NKEYS * sizeof(char *)); value = (char **)malloc(NKEYS * sizeof(char *)); *big_buff = 'z'; // anything not white space linestart = strdup(fgets(big_buff + 1, BUFFSZ, fsubs)); k = linestart + strlen(linestart); while ( k >= linestart && *--k < ' ' ) ; *++k = 0; // chop of trailing CRLF -- null line just plants spurious zero while ( fgets((in_line = big_buff + 1), BUFFSZ, fsubs) != NULL && memcmp(in_line, "****", 4) != 0 ) { char *ii = in_line; while ( *++ii != 0 && *ii != sepchar ) ; if ( *ii != 0 ) // if we found colon before end of string { *ii = 0; // terminate string that is the key if ( *(ii-1) == ' ' ) // ignore a space before colon *(ii-1) = 0; if ( *++ii == ' ' ) // ignore a space after colon ii ++ ; k = ii + strlen(ii); while ( *--k <= ' ' ) ; *++k = 0; // strip trailing space key[key_count] = strdup(in_line); value[key_count++] = strdup(ii); } else if ( *in_line == sepchar ) // redefine sepchar sepchar = in_line[1]; else if ( *in_line == '\t' ) // tab position specifier { tabpos = atoi(in_line+1); // just remember where first tab should be fprintf(stderr, "Tabs at %d\n", tabpos); } } j = key_count; // sort the keys ... while ( (i = --j) >= 1 ) // ... in order of length to ensure ... { char *kj, *ki; while ( --i >= 0 ) // ... longest match first if ( *(kj = key[j]) < *(ki = key[i]) // sort first on initial letter ... || ( *kj == *ki && strlen(kj) < strlen(ki) ) ) // ... then on length { ws = key[j]; key[j] = key[i]; key[i] = ws; ws = value[j]; value[j] = value[i]; value[i] = ws; } } fprintf(stderr, "Sorted %d keys\n", key_count); len = (int *)malloc(key_count * sizeof(int)); for ( i = 0; i=0; i-- ) ks[*(key[i])&255] = i; // record the lowest count for each intial character if ( argc >= 4 && (fout = fopen(argv[3], "w")) == NULL ) { perror(argv[3]); fclose(fsubs); } else if ( (fin = fopen(argv[2], "r")) == NULL ) { perror(argv[2]); fclose(fsubs); fclose(fout); } else // fin and fout opene OK - no previous file found { if ( in_line != NULL ) // if header text supplied { while ( fgets((in_line = big_buff + 1), BUFFSZ, fsubs) != NULL ) fprintf(fout, "%s", in_line); } fclose(fsubs); fprintf(stderr, "Files opened OK\n"); while ( fgets((in_line = big_buff + 1), BUFFSZ, fin) != NULL ) { char *out_line = in_line + strlen(in_line); char *pin, *pout, *k; // used to point at input and end of output int line_len; while ( (*--out_line & 255) <= ' ' ) ; // chop trailing while space *++out_line = 0; // end the string // *++out_line = 0; // terminate null string sprintf(++out_line, linestart, linecount); // put in the start of line string line_len = strlen(in_line); if ( *in_line == 0 ) // blank line fprintf(fout, "\n"); else { linecount ++; // only count non-blank lines as in Walgol k = in_line; pin = k - 1; while ( *++pin != 0 ) // scan along the line { if ( *pin == '\t' && (j = tabpos - (pin - in_line)) > 0 ) { *pin = 0; // terminates k string strcat(out_line, k); // copy up to the tab strcat(out_line, sp50 + 50 - j); // ... and substitute spaces to implement the tab k = pin + 1; // move on past the tab character } else { j = kc[*pin&255]; i = 0 ; // ks[*pin&255]; while ( --j >= i && memcmp(pin, key[j], len[j]) != 0 ) ; if ( j >= 0 ) // if we found a match { *pin = 0; // terminates k string strcat(out_line, k); // copy up to the start of recognition strcat(out_line, value[j]); // ... and substitute the new string k = pin + strlen(key[j]); // move on past the recognised key pin = k - 1; // skip back for new scan subs_count ++; // keep a tally of number of changes } } } strcat(out_line, k); // copy rest of the line fprintf(fout, "%s\n", out_line); } } if ( fin != stdin ) fclose(fin); if ( fout != stdout ) fclose(fout); fprintf(stderr, "%d substitutions made\n", subs_count); } exit(0); }