I hate it when people double post, but this feels more like a good idea in a new one. CKing I blame you for putting the idea in my head.
This was tested locally and worked alright - it's not 100% secure or anything like that, but seems to be a good start. Basically, load the -d file into memory, check it against the -l file by traversing the linked list for each set of words, then write them to -o
Code:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
void help(void) {
printf(" \n"
" Dictionary Filter v0.01 \n"
" \n"
" Usage: ./filter -d <dictionary_source> -l <wordlist> -o <out_file> \n"
" \n"
" Warning: -o WILL overwrite whatever file you tell it to \n");
exit(-1);
}
typedef struct linked_list {
char word[8192];
struct linked_list *next;
} node;
int main(argc, argv)
int argc;
char * argv[];
{
FILE *fshort, *flong, *foutput;
int c;
char *dshort, *llong, *oout, line[8192];
node *head = NULL;
opterr = 0;
if ( argc != 7 ) {
help();
}
/*
* Get command line arguments
*/
while((c = getopt(argc, argv, "d:l:o:h")) != -1) {
switch (c) {
case 'd':
dshort = optarg;
break;
case 'l':
llong = optarg;
break;
case 'o':
oout = optarg;
break;
case 'h':
help();
break;
case '?':
if (optopt == 'd' || optopt == 'l' || optopt == 'o') {
printf("Option %c requires an argument.\n", optopt);
help();
}
else if ( isprint (optopt)) {
printf("Unknown option -%c\n", optopt);
help();
}
else {
help();
}
return 1;
default:
help();
}
}
/*
* Sanity checks
*/
if ( ! dshort || ! llong || ! oout ) {
help();
}
if ((fshort = fopen(dshort, "r")) == NULL ) {
fprintf(stderr, "Error: Can not read %s\n", dshort);
exit(-1);
}
if ((flong = fopen(llong, "r")) == NULL) {
fprintf(stderr, "Error: Can not read %s\n", llong);
exit(-1);
}
if ((foutput = fopen(oout, "w+")) == NULL) {
fprintf(stderr, "Error: Can not open %s for writing\n", oout);
exit(-1);
}
/*
* Now the tricky part
*
* TODO: Rework logic to remove duplicates from input file
*/
while ( fgets( line, sizeof(line), fshort) != NULL) {
if ( head == NULL ) {
head = (node *)malloc(sizeof(node));
if ( line == NULL ) {
for(c = 0; c < 8192; c++) {
line[c] = '\0';
}
}
strncpy(head->word, line, 8192);
head->next = NULL;
} else {
node *tmp = (node *)malloc(sizeof(node));
strncpy(tmp->word, line, 8192);
tmp->next = head;
head = tmp;
}
}
fclose(fshort);
/*
* No point if the input library is empty
*/
if ( head != NULL ) {
while ( fgets( line, sizeof(line), flong) != NULL) {
c = 0;
node *current = head;
do {
if ( ! strncmp(current->word, line, 8192) ) {
c = 1;
break;
}
current = current->next;
} while (current != head && current != NULL);
if ( c == 0 ) {
fprintf(foutput, "%s", line);
}
}
} else {
fprintf(stderr, "[-] Filter library is empty (-d), stopping processing\n");
fflush(stderr);
exit(1);
}
return 0;
}
Usage from your reference files above is:
./sorter -d list1 -l list2 -o list3