#include typedef struct { uint64_t hi; uint64_t lo; } uint128_t; uint64_t mulhu(uint64_t, uint64_t); void umul128(uint128_t *, uint128_t *); // Henry Warren, "Hacker's Delight", 8-2 High-Order Half of 64-Bit Product uint64_t mulhu(uint64_t u, uint64_t v) { uint64_t u0, u1, v0, v1, w0, w1, w2, t; u0 = u & 0xFFFFFFFF; u1 = u >> 32; v0 = v & 0xFFFFFFFF; v1 = v >> 32; w0 = u0 * v0; t = u1 * v0 + (w0 >> 32); w1 = t & 0xFFFFFFFF; w2 = t >> 32; w1 = u0 * v1 + w1; return u1 * v1 + w2 + (w1 >> 32); } // Algorithm: Peter Norton, "Advanced Assembly Language", 1991, pp. 229-230 void umul128(uint128_t *y, uint128_t *z) { uint128_t a, b, c, d; a.hi = mulhu(z->hi, y->hi); a.lo = z->hi * y->hi; // a = Z1 * Y1 b.hi = mulhu(z->lo, y->hi); b.lo = z->lo * y->hi; // b = Z0 * Y1 c.hi = mulhu(z->hi, y->lo); c.lo = z->hi * y->lo; // c = Z1 * Y0 d.hi = mulhu(z->lo, y->lo); d.lo = z->lo * y->lo; // d = Z0 * Y0 d.hi += b.lo; // C = (Z0 * Y0).hi + (Z0 * Y1).lo if (d.hi < b.lo) b.hi++; // b.hi > 0; -1 * -1 = FFFFFFFFFFFFFFFE0000000000000001 b.hi += c.hi; // B = (Z0 * Y1).hi + (Z1 * Y0).hi if (b.hi < c.hi) a.hi++; // A = (Z1 * Y1).hi d.hi += c.lo; // C = (Z0 * Y0).hi + (Z0 * Y1).lo + (Z1 * Y0).lo if (d.hi < c.lo && !++b.hi) a.hi++; b.hi += a.lo; // B = (Z0 * Y1).hi + (Z1 * Y0).hi + (Z1 * Y1).lo if (b.hi < a.lo) a.hi++; y->hi = a.hi; // A y->lo = b.hi; // B z->hi = d.hi; // C z->lo = d.lo; // D }