; Multiply the 128-bit unsigned numbers in registers RCX:RDX (Y1:Y0) and RSI:RDI ; (Z1:Z0) and return the 256-bit result in registers RCX:RDX:RSI:RDI (A:B:C:D). ; Algorithm: Peter Norton, "Advanced Assembly Language", 1991, pp. 229-230. option prologue:none, epilogue:none .code umul128 proc MOV R10,RDI ; Z0 MULX R9,RAX,RSI ; R9:RAX = Z1 * Y0 MULX R8,RDI,RDI ; R8:RDI = Z0 * Y0 MOV RDX,RCX ; Y1 MULX RCX,R11,RSI; RCX:R11 = Z1 * Y1 MULX RDX,RSI,R10; RDX:RSI = Z0 * Y1 ADD RSI,R8 ; C = High(Z0 * Y0) + Low (Z0 * Y1) ADC RDX,R9 ; B = High(Z0 * Y1) + High(Z1 * Y0) ADC RCX,0 ; A = High(Z1 * Y1) ADD RSI,RAX ; C = High(Z0 * Y0) + Low (Z0 * Y1) + Low(Z1 * Y0) ADC RDX,R11 ; B = High(Z0 * Y1) + High(Z1 * Y0) + Low(Z1 * Y1) ADC RCX,0 RET umul128 endp end