| /* |
| * udivdi3.S - unsigned long long division |
| * |
| * Copyright 2003-2007 Analog Devices Inc. |
| * Enter bugs at http://blackfin.uclinux.org/ |
| * |
| * Licensed under the GPLv2 or later. |
| */ |
| |
| #include <linux/linkage.h> |
| |
| #define CARRY AC0 |
| |
| #ifdef CONFIG_ARITHMETIC_OPS_L1 |
| .section .l1.text |
| #else |
| .text |
| #endif |
| |
| |
| ENTRY(___udivdi3) |
| R3 = [SP + 12]; |
| [--SP] = (R7:4, P5:3); |
| |
| /* Attempt to use divide primitive first; these will handle |
| ** most cases, and they're quick - avoids stalls incurred by |
| ** testing for identities. |
| */ |
| |
| R4 = R2 | R3; |
| CC = R4 == 0; |
| IF CC JUMP .LDIV_BY_ZERO; |
| |
| R4.H = 0x8000; |
| R4 >>>= 16; // R4 now 0xFFFF8000 |
| R5 = R0 | R2; // If either dividend or |
| R4 = R5 & R4; // divisor have bits in |
| CC = R4; // top half or low half's sign |
| IF CC JUMP .LIDENTS; // bit, skip builtins. |
| R4 = R1 | R3; // Also check top halves |
| CC = R4; |
| IF CC JUMP .LIDENTS; |
| |
| /* Can use the builtins. */ |
| |
| AQ = CC; // Clear AQ (CC==0) |
| DIVQ(R0, R2); |
| DIVQ(R0, R2); |
| DIVQ(R0, R2); |
| DIVQ(R0, R2); |
| DIVQ(R0, R2); |
| DIVQ(R0, R2); |
| DIVQ(R0, R2); |
| DIVQ(R0, R2); |
| DIVQ(R0, R2); |
| DIVQ(R0, R2); |
| DIVQ(R0, R2); |
| DIVQ(R0, R2); |
| DIVQ(R0, R2); |
| DIVQ(R0, R2); |
| DIVQ(R0, R2); |
| DIVQ(R0, R2); |
| DIVQ(R0, R2); |
| R0 = R0.L (Z); |
| R1 = 0; |
| (R7:4, P5:3) = [SP++]; |
| RTS; |
| |
| .LIDENTS: |
| /* Test for common identities. Value to be returned is |
| ** placed in R6,R7. |
| */ |
| // Check for 0/y, return 0 |
| R4 = R0 | R1; |
| CC = R4 == 0; |
| IF CC JUMP .LRETURN_R0; |
| |
| // Check for x/x, return 1 |
| R6 = R0 - R2; // If x == y, then both R6 and R7 will be zero |
| R7 = R1 - R3; |
| R4 = R6 | R7; // making R4 zero. |
| R6 += 1; // which would now make R6:R7==1. |
| CC = R4 == 0; |
| IF CC JUMP .LRETURN_IDENT; |
| |
| // Check for x/1, return x |
| R6 = R0; |
| R7 = R1; |
| CC = R3 == 0; |
| IF !CC JUMP .Lnexttest; |
| CC = R2 == 1; |
| IF CC JUMP .LRETURN_IDENT; |
| |
| .Lnexttest: |
| R4.L = ONES R2; // check for div by power of two which |
| R5.L = ONES R3; // can be done using a shift |
| R6 = PACK (R5.L, R4.L); |
| CC = R6 == 1; |
| IF CC JUMP .Lpower_of_two_upper_zero; |
| R6 = PACK (R4.L, R5.L); |
| CC = R6 == 1; |
| IF CC JUMP .Lpower_of_two_lower_zero; |
| |
| // Check for x < y, return 0 |
| R6 = 0; |
| R7 = R6; |
| CC = R1 < R3 (IU); |
| IF CC JUMP .LRETURN_IDENT; |
| CC = R1 == R3; |
| IF !CC JUMP .Lno_idents; |
| CC = R0 < R2 (IU); |
| IF CC JUMP .LRETURN_IDENT; |
| |
| .Lno_idents: // Idents don't match. Go for the full operation |
| |
| |
| // If X, or X and Y have high bit set, it'll affect the |
| // results, so shift right one to stop this. Note: we've already |
| // checked that X >= Y, so Y's msb won't be set unless X's |
| // is. |
| |
| R4 = 0; |
| CC = R1 < 0; |
| IF !CC JUMP .Lx_msb_clear; |
| CC = !CC; // 1 -> 0; |
| R1 = ROT R1 BY -1; // Shift X >> 1 |
| R0 = ROT R0 BY -1; // lsb -> CC |
| BITSET(R4,31); // to record only x msb was set |
| CC = R3 < 0; |
| IF !CC JUMP .Ly_msb_clear; |
| CC = !CC; |
| R3 = ROT R3 BY -1; // Shift Y >> 1 |
| R2 = ROT R2 BY -1; |
| BITCLR(R4,31); // clear bit to record only x msb was set |
| |
| .Ly_msb_clear: |
| .Lx_msb_clear: |
| // Bit 31 in R4 indicates X msb set, but Y msb wasn't, and no bits |
| // were lost, so we should shift result left by one. |
| |
| [--SP] = R4; // save for later |
| |
| // In the loop that follows, each iteration we add |
| // either Y' or -Y' to the Remainder. We compute the |
| // negated Y', and store, for convenience. Y' goes |
| // into P0:P1, while -Y' goes into P2:P3. |
| |
| P0 = R2; |
| P1 = R3; |
| R2 = -R2; |
| CC = CARRY; |
| CC = !CC; |
| R4 = CC; |
| R3 = -R3; |
| R3 = R3 - R4; |
| |
| R6 = 0; // remainder = 0 |
| R7 = R6; |
| |
| [--SP] = R2; P2 = SP; |
| [--SP] = R3; P3 = SP; |
| [--SP] = R6; P5 = SP; // AQ = 0 |
| [--SP] = P1; |
| |
| /* In the loop that follows, we use the following |
| ** register assignments: |
| ** R0,R1 X, workspace |
| ** R2,R3 Y, workspace |
| ** R4,R5 partial Div |
| ** R6,R7 partial remainder |
| ** P5 AQ |
| ** The remainder and div form a 128-bit number, with |
| ** the remainder in the high 64-bits. |
| */ |
| R4 = R0; // Div = X' |
| R5 = R1; |
| R3 = 0; |
| |
| P4 = 64; // Iterate once per bit |
| LSETUP(.LULST,.LULEND) LC0 = P4; |
| .LULST: |
| /* Shift Div and remainder up by one. The bit shifted |
| ** out of the top of the quotient is shifted into the bottom |
| ** of the remainder. |
| */ |
| CC = R3; |
| R4 = ROT R4 BY 1; |
| R5 = ROT R5 BY 1 || // low q to high q |
| R2 = [P5]; // load saved AQ |
| R6 = ROT R6 BY 1 || // high q to low r |
| R0 = [P2]; // load -Y' |
| R7 = ROT R7 BY 1 || // low r to high r |
| R1 = [P3]; |
| |
| // Assume add -Y' |
| CC = R2 < 0; // But if AQ is set... |
| IF CC R0 = P0; // then add Y' instead |
| IF CC R1 = P1; |
| |
| R6 = R6 + R0; // Rem += (Y' or -Y') |
| CC = CARRY; |
| R0 = CC; |
| R7 = R7 + R1; |
| R7 = R7 + R0 (NS) || |
| R1 = [SP]; |
| // Set the next AQ bit |
| R1 = R7 ^ R1; // from Remainder and Y' |
| R1 = R1 >> 31 || // Negate AQ's value, and |
| [P5] = R1; // save next AQ |
| BITTGL(R1, 0); // add neg AQ to the Div |
| .LULEND: R4 = R4 + R1; |
| |
| R6 = [SP + 16]; |
| |
| R0 = R4; |
| R1 = R5; |
| CC = BITTST(R6,30); // Just set CC=0 |
| R4 = ROT R0 BY 1; // but if we had to shift X, |
| R5 = ROT R1 BY 1; // and didn't shift any bits out, |
| CC = BITTST(R6,31); // then the result will be half as |
| IF CC R0 = R4; // much as required, so shift left |
| IF CC R1 = R5; // one space. |
| |
| SP += 20; |
| (R7:4, P5:3) = [SP++]; |
| RTS; |
| |
| .Lpower_of_two: |
| /* Y has a single bit set, which means it's a power of two. |
| ** That means we can perform the division just by shifting |
| ** X to the right the appropriate number of bits |
| */ |
| |
| /* signbits returns the number of sign bits, minus one. |
| ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need |
| ** to shift right n-signbits spaces. It also means 0x80000000 |
| ** is a special case, because that *also* gives a signbits of 0 |
| */ |
| .Lpower_of_two_lower_zero: |
| R7 = 0; |
| R6 = R1 >> 31; |
| CC = R3 < 0; |
| IF CC JUMP .LRETURN_IDENT; |
| |
| R2.L = SIGNBITS R3; |
| R2 = R2.L (Z); |
| R2 += -62; |
| (R7:4, P5:3) = [SP++]; |
| JUMP ___lshftli; |
| |
| .Lpower_of_two_upper_zero: |
| CC = R2 < 0; |
| IF CC JUMP .Lmaxint_shift; |
| |
| R2.L = SIGNBITS R2; |
| R2 = R2.L (Z); |
| R2 += -30; |
| (R7:4, P5:3) = [SP++]; |
| JUMP ___lshftli; |
| |
| .Lmaxint_shift: |
| R2 = -31; |
| (R7:4, P5:3) = [SP++]; |
| JUMP ___lshftli; |
| |
| .LRETURN_IDENT: |
| R0 = R6; |
| R1 = R7; |
| .LRETURN_R0: |
| (R7:4, P5:3) = [SP++]; |
| RTS; |
| .LDIV_BY_ZERO: |
| R0 = ~R2; |
| R1 = R0; |
| (R7:4, P5:3) = [SP++]; |
| RTS; |
| |
| ENDPROC(___udivdi3) |
| |
| |
| ENTRY(___lshftli) |
| CC = R2 == 0; |
| IF CC JUMP .Lfinished; // nothing to do |
| CC = R2 < 0; |
| IF CC JUMP .Lrshift; |
| R3 = 64; |
| CC = R2 < R3; |
| IF !CC JUMP .Lretzero; |
| |
| // We're shifting left, and it's less than 64 bits, so |
| // a valid result will be returned. |
| |
| R3 >>= 1; // R3 now 32 |
| CC = R2 < R3; |
| |
| IF !CC JUMP .Lzerohalf; |
| |
| // We're shifting left, between 1 and 31 bits, which means |
| // some of the low half will be shifted into the high half. |
| // Work out how much. |
| |
| R3 = R3 - R2; |
| |
| // Save that much data from the bottom half. |
| |
| P1 = R7; |
| R7 = R0; |
| R7 >>= R3; |
| |
| // Adjust both parts of the parameter. |
| |
| R0 <<= R2; |
| R1 <<= R2; |
| |
| // And include the bits moved across. |
| |
| R1 = R1 | R7; |
| R7 = P1; |
| RTS; |
| |
| .Lzerohalf: |
| // We're shifting left, between 32 and 63 bits, so the |
| // bottom half will become zero, and the top half will |
| // lose some bits. How many? |
| |
| R2 = R2 - R3; // N - 32 |
| R1 = LSHIFT R0 BY R2.L; |
| R0 = R0 - R0; |
| RTS; |
| |
| .Lretzero: |
| R0 = R0 - R0; |
| R1 = R0; |
| .Lfinished: |
| RTS; |
| |
| .Lrshift: |
| // We're shifting right, but by how much? |
| R2 = -R2; |
| R3 = 64; |
| CC = R2 < R3; |
| IF !CC JUMP .Lretzero; |
| |
| // Shifting right less than 64 bits, so some result bits will |
| // be retained. |
| |
| R3 >>= 1; // R3 now 32 |
| CC = R2 < R3; |
| IF !CC JUMP .Lsignhalf; |
| |
| // Shifting right between 1 and 31 bits, so need to copy |
| // data across words. |
| |
| P1 = R7; |
| R3 = R3 - R2; |
| R7 = R1; |
| R7 <<= R3; |
| R1 >>= R2; |
| R0 >>= R2; |
| R0 = R7 | R0; |
| R7 = P1; |
| RTS; |
| |
| .Lsignhalf: |
| // Shifting right between 32 and 63 bits, so the top half |
| // will become all zero-bits, and the bottom half is some |
| // of the top half. But how much? |
| |
| R2 = R2 - R3; |
| R0 = R1; |
| R0 >>= R2; |
| R1 = 0; |
| RTS; |
| |
| ENDPROC(___lshftli) |