A while back, the HR department at my work asked me to investigate a company laptop that they suspected had been being used inappropriately. Part of what I needed to do was dig into a particular user's surfing history and I was surprised to find that there was next to nothing in the way of free tools for extracting data from Firefox's cache.

In the end, I was forced to use some VB app that comes with the CAINE live distro and is run under Wine (yuck!).

After doing a bit of research to understand how FF's cache format works, I wrote this little program. It's my own code, and still pretty much a beta, but I've lost interest in doing much more with it, so I'm donating it to the BT team for their consideration.


Code:
/* outfox.c
 * Version:     0.1b
 * Date:        20101215
 * Author:      Written for BackTrack Linux by c0rruption
 *
 * References:  http://www.securityfocus.com/infocus/1832 (factually inaccurate
 *                                here and there, but hugely helpful anyway)
 *
 *              mozilla/netwerk/cache/src/ from the mozilla source tree :)
 *
 * Usage:       outfox <input dir> <output dir>
 *
 * Extracts a mozilla cache to the given output directory.  
 */
 
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>

#include <string.h>

#include <limits.h>
#include <endian.h>

#include <errno.h>

#include <time.h>
#include <sys/stat.h>

#include <arpa/inet.h> // ntohl()
#include <libgen.h>    // basename()

#define min(a,b)                 (((a) < (b))? (a) : (b))

#define NBUCKETS                 (1 << 5)

#define loc_initialized(n)       ((n) & 0x80000000)
#define loc_generation(n)        ((n) & 0x000000ff)
#define loc_fileSelector(n)     (((n) & 0x30000000) >> 28)

#define loc_blockNumber(n)       ((n) & 0x00ffffff)
#define loc_blockSize(n)         (256 << (2 * (loc_fileSelector(n) - 1)))
#define loc_recordOffset(n)      (loc_blockNumber(n) * loc_blockSize(n) + 4096)


const int V_MAJOR = 0;
const int V_MINOR = 1;


typedef struct {
    uint32_t version;
    uint32_t dataSize;
    uint32_t entryCount;
    uint32_t isDirty;
    uint32_t recordCount;

    uint32_t evictionRank[NBUCKETS];
    uint32_t bucketUsage[NBUCKETS];
} mapHeader;

typedef struct {
    uint32_t hashNumber;
    uint32_t evictionRank;
    uint32_t dataLocation;
    uint32_t metaLocation;
} mapRecord;

typedef struct {
    uint32_t headerVersion;
    uint32_t metaLocation;
    int32_t  fetchCount;
    uint32_t lastFetched;
    uint32_t lastModified;
    uint32_t expirationTime;
    uint32_t dataSize;
    uint32_t keySize;       // includes terminating null byte
    uint32_t metaSize;      // includes terminating null byte
} metaDataRecordHeader;



size_t copyFile(FILE *infile, const char *ofname, size_t size) {
    FILE *outfile;
    char buf[1024];
    size_t n, sz = size;

    if ((outfile = fopen(ofname, "wb")) == NULL) {
        return 0;
    }

    while (sz && (n = fread(buf, 1, min(sizeof(buf), sz), infile)) != 0) {
        if (fwrite(buf, 1, n, outfile) != n) {
            fprintf(stderr, "%s: write error\n", ofname);
            return -1;
        }
        sz -= n;
    }

    fclose(outfile);
    return size;
}



FILE *openRecord(uint32_t hashNumber, uint32_t loc, const char *inputdir) {
    FILE *ret;
    char fname[PATH_MAX];

    int cachefile = loc_fileSelector(loc);

    if (cachefile == 0) {
        /* TODO: should I use "%08Xm%02d" for on-disk meta records? */
        snprintf(fname, sizeof(fname), "%s/%08Xd%02d", inputdir,
                                                       hashNumber,
                                                       loc_generation(loc));
    } else {
        snprintf(fname, sizeof(fname), "%s/_CACHE_%03d_", inputdir, 
                                                          cachefile);
    }

    if ((ret = fopen(fname, "rb")) == NULL) {
        perror(fname);
        return NULL;
    }

    if (cachefile && fseek(ret, loc_recordOffset(loc), SEEK_SET) != 0) {
        fprintf(stderr, "%s: couldn't seek to offset %lu: %s\n",
                        fname,
                        (unsigned long) loc_recordOffset(loc),
                        strerror(errno));

        fclose(ret);
        return NULL;
    }

    return ret;
}


char *getExtension(const char *url) {
    // TODO: fix this cheesy hack.
    // TODO: wide characters in URLs?
    char *ret = NULL;
#ifndef NO_EXTENSIONS
    char *ptr, *buf;

    if ((buf = strdup(url)) == NULL) {
        return NULL;
    }

    if ((ptr = strchr(buf, '?')) != NULL) {
        *ptr = 0;
    }

    if ((ptr = strchr(buf, '#')) != NULL) {
        *ptr = 0;
    }

    //  TODO: This is fine for http://foo.com/abd/def.ext or 
    //  even for http://foo.com/ but what about http://foo.com
    ptr = ((ptr = strrchr(buf, '/'))? ptr+1 : buf);

    // .torrent is the longest extension I can think of.
    // 12 chars seems a good max length.
    if ((ptr = strrchr(ptr, '.')) != NULL && strlen(ptr) < 12) {
        ret = strdup(ptr);
    }

    free(buf);
#endif
    return ret;
}

int readRecord(mapRecord *maprec, 
               FILE *indexFile, 
               const char *inputdir,
               const char *outputdir) {

    FILE *infile;
    char ofname[PATH_MAX];
    metaDataRecordHeader metarec;

    int retstatus = 1;

    char *ext;  // file extension
    static char *ext_default = "";

    infile = openRecord(maprec->hashNumber, maprec->metaLocation, inputdir);

    if (infile == NULL) {
        fprintf(stderr, "couldn't open metarecord: %08x\n", maprec->hashNumber);
        return 0;
    }

    fread(&metarec, 1, sizeof(metaDataRecordHeader), infile);

#if __BYTE_ORDER == __LITTLE_ENDIAN
    {
        int ei;
        uint32_t *endian_ptr = (uint32_t *) &metarec;
        for (ei = 0; ei < sizeof(metaDataRecordHeader) / 4; ++ei) {
            endian_ptr[ei] = ntohl(endian_ptr[ei]);
        }
    }
#endif

    /* read key data (metarec.keySize includes null terminator) */
    char *key = malloc(metarec.keySize);

    if (key == NULL) {
        fclose(infile);
        perror("malloc");
        return 0;
    }

    fread(key, metarec.keySize, 1, infile);
    if ((ext = getExtension(key)) == NULL) {
        ext = ext_default;
    }

    if (indexFile) {
        struct tm *tmptr;
        char dateFetched[16];  // ccyymmddhhmmss
        char dateModified[16]; // ccyymmddhhmmss

        if ((tmptr = localtime((time_t *) &metarec.lastFetched)) == NULL) {
            strcpy(dateFetched, "(unknown)");
        } else {
            strftime(dateFetched, sizeof(dateFetched), "%Y%m%d%H%M%S", tmptr);
        }

        if ((tmptr = localtime((time_t *) &metarec.lastModified)) == NULL) {
            strcpy(dateModified, "(unknown)");
        } else {
            strftime(dateModified, sizeof(dateModified), "%Y%m%d%H%M%S", tmptr);
        }

        fprintf(indexFile, "%s, %u, %s, %08X%s, %s\n", dateFetched,
                                                       metarec.fetchCount,
                                                       dateModified,
                                                       maprec->hashNumber,
                                                       ext,
                                                       key);
    }

    free(key);

    snprintf(ofname, sizeof(ofname), "%s/%08X%s.meta", outputdir,
                                                       maprec->hashNumber,
                                                       ext);

    size_t copied = copyFile(infile, ofname, metarec.metaSize - 1);
    fclose(infile);

    if (copied != metarec.metaSize - 1) {
        fprintf(stderr,
                "%08X: tried to copy %d bytes metadata, copied %d instead\n",
                maprec->hashNumber,
                metarec.metaSize - 1,
                copied);

        retstatus = 0;  // nonfatal. return an error, but proceed for now
    }

    /* OK, that's the metadata, now on to the datadata :) */

    snprintf(ofname, sizeof(ofname), "%s/%08X%s", outputdir,
                                                  maprec->hashNumber,
                                                  ext);

    if (ext && ext != ext_default) {
        free(ext);
    }

    infile = openRecord(maprec->hashNumber, maprec->dataLocation, inputdir);

    if (infile == NULL) {
        fprintf(stderr, "couldn't open datarecord: %08x\n", maprec->hashNumber);
        return 0;
    }

    copied = copyFile(infile, ofname, metarec.dataSize);
    fclose(infile);

    if (copied != metarec.dataSize) {
        fprintf(stderr,
                "%08X: tried to copy %d bytes data, copied %d instead\n",
                maprec->hashNumber,
                metarec.dataSize,
                copied);
        return 0;
    }

    return retstatus;
}