; Return in the RDX:RAX registers the quotient, and write at the address ; passed in [RSP+8] the 128-bit remainder of the division of the number in ; the RCX:RDX:RSI:RDI registers to the number in the R9:R8 registers. option prologue:none, epilogue:none .code ; http://codereview.stackexchange.com/questions/67962 udiv256 proc _udiv256 proc XCHG RAX,RDI XCHG RDX,RSI ;// q = RDX:RAX, r = RCX:RSI SHL RAX,1 ; q = a_lo << 1; RCL RDX,1 ; carry = a_lo >> 127; MOV RDI,RCX ; r = a_hi; // RDI:RSI MOV ECX,128 ;// LOOP requires the counter to be in ECX .repeat RCL RSI,1 ; temp = r >> 127; r = RCL RDI,1 ; r << 1 | carry; carry = temp; .if (carry? || R9 < RDI || equal? && R8 <= RSI);if (!carry) SUB RSI,R8; { if (r < b) goto shift; SBB RDI,R9; carry++; } r -= b; STC ;// Restore CF .endif RCL RAX,1 ; shift: temp = q >> 127; q = RCL RDX,1 ; q << 1 | carry; carry = temp; .untilcxz ;// Repeat 128 times MOV RCX,[RSP+8] MOV [RCX],RSI ; *rem = r; MOV [RCX+8],RDI RET ; return q; _udiv256 endp udiv256 endp end