Skip to content

Commit

Permalink
email address extraction circuit + inputs setup but not passing
Browse files Browse the repository at this point in the history
  • Loading branch information
jp4g committed Oct 18, 2024
1 parent acfb3a4 commit 137a805
Show file tree
Hide file tree
Showing 20 changed files with 351 additions and 147 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ target
dist
.tsbuildinfo
zk-email-verify

!*/email-*.eml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[package]
name = "recipient_search"
name = "email_mask"
type = "bin"
authors = ["Mach 34"]
compiler_version = ">=0.34.0"
compiler_version = ">=0.35.0"

[dependencies]
zkemail = { path = "../../lib"}
60 changes: 60 additions & 0 deletions examples/email_mask/src/main.nr
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
use dep::zkemail::{
KEY_LIMBS_2048, dkim::RSAPubkey, headers::body_hash::get_body_hash,
standard_outputs, Sequence, masking::mask_text
};
use dep::std::{collections::bounded_vec::BoundedVec, hash::sha256_var};

global MAX_EMAIL_HEADER_LENGTH: u32 = 512;
global MAX_EMAIL_BODY_LENGTH: u32 = 1024;

/**
* Verify an arbitrary email signed by a 2048-bit RSA DKIM signature and mask outputs
*
* @param header - The email header, 0-padded at end to the MAX_EMAIL_HEADER_LENGTH
* @param body - The email body, 0-padded at end to the MAX_EMAIL_BODY_LENGTH
* @param pubkey - The DKIM RSA Public Key modulus and reduction parameter
* @param signature - The DKIM RSA Signature
* @param body_hash_index - The index of the body hash in the partial hash array
* @param dkim_header_sequence - The index and length of the DKIM header field
* @param header_mask - The mask for the header
* @param body_mask - The mask for the body
* @return -
* 0: Pedersen hash of DKIM public key (root of trust)
* 1: Pedersen hash of DKIM signature (email nullifier)
*/
fn main(
header: BoundedVec<u8, MAX_EMAIL_HEADER_LENGTH>,
body: BoundedVec<u8, MAX_EMAIL_BODY_LENGTH>,
pubkey: RSAPubkey<KEY_LIMBS_2048>,
signature: [Field; KEY_LIMBS_2048],
body_hash_index: u32,
dkim_header_sequence: Sequence,
header_mask: [bool; MAX_EMAIL_HEADER_LENGTH],
body_mask: [bool; MAX_EMAIL_BODY_LENGTH]
) -> pub ([Field; 2], [u8; MAX_EMAIL_HEADER_LENGTH], [u8; MAX_EMAIL_BODY_LENGTH]) {
// check the body and header lengths are within bounds
assert(header.len() <= MAX_EMAIL_HEADER_LENGTH);
assert(body.len() <= MAX_EMAIL_BODY_LENGTH);

// verify the dkim signature over the header
pubkey.verify_dkim_signature(header, signature);

// extract the body hash from the header
let signed_body_hash = get_body_hash(header, dkim_header_sequence, body_hash_index);

// hash the asserted body
let computed_body_hash: [u8; 32] = sha256_var(body.storage, body.len() as u64);

// compare the body hashes
assert(
signed_body_hash == computed_body_hash, "SHA256 hash computed over body does not match body hash found in DKIM-signed header"
);

// mask the header and body
let masked_header = mask_text(header, header_mask);
let masked_body = mask_text(body, body_mask);

// hash the pubkey and signature for the standard outputs
let standard_out = standard_outputs(pubkey.modulus, signature);
(standard_out, masked_header, masked_body)
}
8 changes: 8 additions & 0 deletions examples/extract_addresses/Nargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[package]
name = "extract_addresses"
type = "bin"
authors = ["Mach 34"]
compiler_version = ">=0.35.0"

[dependencies]
zkemail = { path = "../../lib"}
55 changes: 55 additions & 0 deletions examples/extract_addresses/src/main.nr
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
use dep::zkemail::{
KEY_LIMBS_2048, dkim::RSAPubkey,
headers::{body_hash::get_body_hash, email_address::get_email_address}, standard_outputs, Sequence,
MAX_EMAIL_ADDRESS_LENGTH
};
use dep::std::{collections::bounded_vec::BoundedVec, hash::sha256_var};

global MAX_EMAIL_HEADER_LENGTH: u32 = 512;
global MAX_EMAIL_BODY_LENGTH: u32 = 1024;

/**
* Verify an arbitrary email signed by a 2048-bit RSA DKIM signature and extract sender and recipient addresses
* @dev example of only constraining access to the header too
*
* @param header - The email header, 0-padded at end to the MAX_EMAIL_HEADER_LENGTH
* @param pubkey - The DKIM RSA Public Key modulus and reduction parameter
* @param signature - The DKIM RSA Signature
* @param from_header_sequence - The index and length of the "From" header field
* @param from_address_sequence - The index and length of the "From" email address
* @param to_header_sequence - The index and length of the "To" header field
* @param to_address_sequence - The index and length of the "To" email address
* @return -
* 0: Pedersen hash of DKIM public key (root of trust)
* 1: Pedersen hash of DKIM signature (email nullifier)
*/
fn main(
header: BoundedVec<u8, MAX_EMAIL_HEADER_LENGTH>,
pubkey: RSAPubkey<KEY_LIMBS_2048>,
signature: [Field; KEY_LIMBS_2048],
from_header_sequence: Sequence,
from_address_sequence: Sequence,
to_header_sequence: Sequence,
to_address_sequence: Sequence
) -> pub ([Field; 2], BoundedVec<u8, MAX_EMAIL_ADDRESS_LENGTH>, BoundedVec<u8, MAX_EMAIL_ADDRESS_LENGTH>) {
// check the body and header lengths are within bounds
assert(header.len() <= MAX_EMAIL_HEADER_LENGTH);

// verify the dkim signature over the header
pubkey.verify_dkim_signature(header, signature);

// extract to and from email addresses
let from = comptime {
"from".as_bytes()
};
let to = comptime {
"to".as_bytes()
};
// 16k gate cost? has to be able to be brought down
let from_address = get_email_address(header, from_header_sequence, from_address_sequence, from);
let to_address = get_email_address(header, to_header_sequence, to_address_sequence, to);

// hash the pubkey and signature for the standard outputs
let standard_out = standard_outputs(pubkey.modulus, signature);
(standard_out, from_address, to_address)
}
2 changes: 1 addition & 1 deletion examples/partial_hash/Nargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "partial_hash"
type = "bin"
authors = ["Mach 34"]
compiler_version = ">=0.34.0"
compiler_version = ">=0.35.0"

[dependencies]
zkemail = { path = "../../lib"}
2 changes: 1 addition & 1 deletion examples/partial_hash/src/main.nr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use dep::zkemail::{
KEY_LIMBS_2048, dkim::RSAPubkey, headers::get_body_hash,
KEY_LIMBS_2048, dkim::RSAPubkey, headers::body_hash::get_body_hash,
partial_hash::partial_sha256_var_end, standard_outputs, Sequence
};

Expand Down
46 changes: 0 additions & 46 deletions examples/recipient_search/src/main.nr

This file was deleted.

2 changes: 1 addition & 1 deletion examples/verify_email_1024_bit_dkim/Nargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "verify_email_1024_bit_dkim"
type = "bin"
authors = ["Mach 34"]
compiler_version = ">=0.34.0"
compiler_version = ">=0.35.0"

[dependencies]
zkemail = { path = "../../lib"}
2 changes: 1 addition & 1 deletion examples/verify_email_1024_bit_dkim/src/main.nr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use dep::zkemail::{
KEY_LIMBS_1024, dkim::RSAPubkey, headers::get_body_hash,
KEY_LIMBS_1024, dkim::RSAPubkey, headers::body_hash::get_body_hash,
standard_outputs, Sequence
};
use dep::std::{collections::bounded_vec::BoundedVec, hash::sha256_var};
Expand Down
2 changes: 1 addition & 1 deletion examples/verify_email_2048_bit_dkim/Nargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "verify_email_2048_bit_dkim"
type = "bin"
authors = ["Mach 34"]
compiler_version = ">=0.34.0"
compiler_version = ">=0.35.0"

[dependencies]
zkemail = { path = "../../lib"}
2 changes: 1 addition & 1 deletion examples/verify_email_2048_bit_dkim/src/main.nr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use dep::zkemail::{
KEY_LIMBS_2048, dkim::RSAPubkey, headers::get_body_hash,
KEY_LIMBS_2048, dkim::RSAPubkey, headers::body_hash::get_body_hash,
standard_outputs, Sequence
};
use dep::std::{collections::bounded_vec::BoundedVec, hash::sha256_var};
Expand Down
30 changes: 27 additions & 3 deletions js/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import {
verifyDKIMSignature,
} from "@zk-email/helpers/dist/dkim";
import * as NoirBignum from "@mach-34/noir-bignum-paramgen";
import { u8ToU32, getHeaderSequence } from "./utils";
import { u8ToU32, getHeaderSequence, getAddressHeaderSequence } from "./utils";
export { verifyDKIMSignature } from "@zk-email/helpers/dist/dkim";

// This file is essentially https://github.com/zkemail/zk-email-verify/blob/main/packages/helpers/src/input-generators.ts
Expand All @@ -28,19 +28,28 @@ export type BoundedVec = {
};

export type CircuitInput = {
// required inputs for all zkemail verifications
header: BoundedVec;
pubkey: {
modulus: string[];
redc: string[];
};
signature: string[];
dkim_header_sequence: Sequence;
// inputs used for verifying full or partial hash
body?: BoundedVec;
body_hash_index?: string;
// inputs used for only partial hash
partial_body_real_length?: string;
partial_body_hash?: string[];
header_mask: string[];
body_mask: string[];
// inputs used for only masking
header_mask?: string[];
body_mask?: string[];
// inputs used for address extraction
from_header_sequence?: Sequence;
from_address_sequence?: Sequence;
to_header_sequence?: Sequence;
to_address_sequence?: Sequence;
};

export type InputGenerationArgs = {
Expand All @@ -51,6 +60,9 @@ export type InputGenerationArgs = {
removeSoftLineBreaks?: boolean;
headerMask?: number[];
bodyMask?: number[];
// todo: probably move these out into a separate extended type?
extractFrom?: boolean;
extractTo?: boolean;
};

// copied without modification, but not publicly exported in original
Expand Down Expand Up @@ -193,6 +205,18 @@ export function generateEmailVerifierInputsFromDKIMResult(
circuitInputs.header_mask = params.headerMask.map((x) => x.toString());
if (params.bodyMask)
circuitInputs.body_mask = params.bodyMask.map((x) => x.toString());

// address extraction
if (params.extractFrom) {
const fromSequences = getAddressHeaderSequence(headers, "from");
circuitInputs.from_header_sequence = fromSequences[0];
circuitInputs.from_address_sequence = fromSequences[1];
}
if (params.extractTo) {
const toSequences = getAddressHeaderSequence(headers, "to");
circuitInputs.to_header_sequence = toSequences[0];
circuitInputs.to_address_sequence = toSequences[1];
}
}

return circuitInputs;
Expand Down
38 changes: 35 additions & 3 deletions js/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,38 @@ export function getHeaderSequence(
return { index: match.index!.toString(), length: match[0].length.toString() };
}

/**
* Get the index and length of a header field as well as the address in the field
* @dev only works for to, from. Not set up for cc
*
* @param header - the header to search for the field in
* @param headerField - the field name to search for
* @returns - the index and length of the field in the header and the index and length of the address in the field
*/
export function getAddressHeaderSequence(
header: Buffer,
headerField: string
) {
const regexPrefix = `[${headerField[0].toUpperCase()}${headerField[0].toLowerCase()}]${headerField
.slice(1)
.toLowerCase()}`;
const regex = new RegExp(
`${regexPrefix}:.*?<([^>]+)>|${regexPrefix}:.*?([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})`
);
const headerStr = header.toString();
const match = headerStr.match(regex);
if (match === null)
throw new Error(`Field "${headerField}" not found in header`);
if (match[1] === null && match[2] === null)
throw new Error(`Address not found in "${headerField}" field`);
const address = match[1] || match[2];
const addressIndex = headerStr.indexOf(address);
return [
{ index: match.index!.toString(), length: match[0].length.toString() },
{ index: addressIndex.toString(), length: address.length.toString() },
]
}

/**
* Build a ROM table for allowable email characters
*/
Expand All @@ -92,13 +124,13 @@ export function makeEmailAddressCharTable(): string {
for (let i = 0; i < procedingChars.length; i++) {
table[procedingChars.charCodeAt(i)] = 3;
}
let tableStr = `global EMAIL_ADDRESS_CHAR_TABLE: [u8; ${tableLength}] = [\n`
console.log()
let tableStr = `global EMAIL_ADDRESS_CHAR_TABLE: [u8; ${tableLength}] = [\n`;
console.log();
for (let i = 0; i < table.length; i += 10) {
const end = i + 10 < table.length ? i + 10 : table.length;
tableStr += ` ${table.slice(i, end).join(", ")},\n`;
}
return tableStr += "];";
return (tableStr += "];");
}

// export function computeStandardOutputs(email: Buffer): Promise<[bigint, bigint]> {
Expand Down
Loading

0 comments on commit 137a805

Please sign in to comment.