// file kernel/n/alpha/burnikel.S: Burnikel-Ziegler division
/*-----------------------------------------------------------------------+
 |  Copyright 2005-2006, Michel Quercia (michel.quercia@prepas.org)      |
 |                                                                       |
 |  This file is part of Numerix. Numerix is free software; you can      |
 |  redistribute it and/or modify it under the terms of the GNU Lesser   |
 |  General Public License as published by the Free Software Foundation; |
 |  either version 2.1 of the License, or (at your option) any later     |
 |  version.                                                             |
 |                                                                       |
 |  The Numerix Library is distributed in the hope that it will be       |
 |  useful, but WITHOUT ANY WARRANTY; without even the implied warranty  |
 |  of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU  |
 |  Lesser General Public License for more details.                      |
 |                                                                       |
 |  You should have received a copy of the GNU Lesser General Public     |
 |  License along with the GNU MP Library; see the file COPYING. If not, |
 |  write to the Free Software Foundation, Inc., 59 Temple Place -       |
 |  Suite 330, Boston, MA 02111-1307, USA.                               |
 +-----------------------------------------------------------------------+
 |                                                                       |
 |                     Division de Burnikel et Ziegler                   |
 |                                                                       |
 +-----------------------------------------------------------------------*/

   # void xn(burnidiv)(chiffre *a, long lc, chiffre *b, long lb, chiffre *c)
   #
   # entre :
   # a = naturel de longueur lc+lb
   # b = naturel de longueur lb
   # c = naturel de longueur lc
   # 
   # contraintes :
   # lb >= 2, lc > 0, le bit de poids frt de b est non nul,
   # a < BASE^lc*b
   # a,b,c non confondus
   # 
   # sortie :
   # a <- a mod b
   # c <- floor(a/b)

#ifdef assembly_sn_burnidiv
#define L(x) .Lsn_burnidiv_##x

        .align 5
#ifdef debug_burnidiv
        .globl sn_burnidiv_buggy
        .ent   sn_burnidiv_buggy
sn_burnidiv_buggy:
        .frame $30,0,$26,0
        .prologue 1
	ldgp   $gp,  0($27)
#else
        .globl sn_burnidiv
        .ent   sn_burnidiv
sn_burnidiv:
        .frame $30,0,$26,0
        .prologue 1
	ldgp   $gp,  0($27)
L(nogp):
#endif

	cmpule $19,  burnidiv_lim,    $0 # petite division => div_n2
	cmpule $17,  div_small_c_lim, $1
	bis    $0,   $1,   $0
	bne    $0,   .Lsn_div_n2_nogp
	
	# cas rcursif, dcoupe b en deux et divise par tranches de q chiffres
	#define _x_  64($30)
	#define _a_  56($30)
	#define _b_  48($30)
	#define _c_  40($30)
	#define _lc_ 32($30)
	#define _p_  24($30)
	#define _q_  16($30)
	#define _r_   8($30)
	#define _ra_  0($30)

	s8addq $19, 72,  $0     # alloue lb+8 chiffres dans la pile
	bic    $0,  15,  $0
	subq   $30, $0,  $30
	stq    $26, _ra_	# sauve l adresse de retour

	srl    $19, 1,   $0	# r0  <- p = lb/2
	subq   $19, $0,  $19    # r19 <- q = lb-p
	bis    $17, $17, $2     # r2  <- lc
	cmpult $19, $17, $1
	cmovne $1,  $19, $17    # r17 <- r = min(lc,q)
	subq   $2,  $17, $2     # lc -= r
	s8addq $2,  $16, $16    # a += lc
	s8addq $2,  $20, $20    # c += lc

	stq    $18, _b_
	stq    $0,  _p_

	# boucle sur les tranches
	.align 5
L(loop):
	# compare a[p+r..p+q+r-1] et b[p..p+q-1]
	stq    $2,  _lc_
	stq    $16, _a_
	stq    $17, _r_
	ldq    $18, _b_
	stq    $19, _q_
	stq    $20, _c_
	ldq    $0,  _p_
	s8addq $0,  $16, $16	# r16 <- &a[p]
	s8addq $0,  $18, $18	# r18 <- &b[p]
	s8addq $17, $16, $0     # r0  <- &a[p+q+r]
	s8addq $19, $0,  $0
	s8addq $19, $18, $1     # r1  <- &b[p+q]
	bis    $19, $19, $2     # r2  <- q
1:
	lda    $2,  -1($2)
	ldq    $3 , -8($0)
	ldq    $4,  -8($1)
	cmpult $3,  $4,  $3
	lda    $0,  -8($0)
	lda    $1,  -8($1)
	bne    $3,  L(small_a)
	bne    $2,  1b

	# ici a[p+r..p+q+1] = b[p..p+q-1], quotient <- BASE^r-1
	s8addq $17, $16, $0     # r0 <- &a[p+r]
	bis    $19, $19, $1     # r1 <- q
	.align 5
1:
	stq    $31, 0($0)       # a[p+r..p+q+r-1] <- 0
	lda    $0,  8($0)
	lda    $1,  -1($1)
	bne    $1,  1b
	bis    $17, $17, $1     # r1 <- r
	subq   $31, 1,   $0     # r0 <- BASE-1
	.align 5
1:
	stq    $0,  0($20)      # c <- BASE^r - 1
	lda    $20, 8($20)
	lda    $1,  -1($1)
	bne    $1,  1b
	addq   $19, $17, $17    # r17 <- q+r
	bsr    $26, .Lsn_inc_nogp
	br     $31, L(mulsub)

	# ici a[p+r..p+q+r-1] < b[p..p+q-1], divise a1 par b1
	.align 5
L(small_a):
	bsr    $26, L(nogp)

	# a0 + BASE^p*r1 -= c*b0
L(mulsub):
	ldq    $16, _c_
	ldq    $17, _r_
	ldq    $18, _b_
	ldq    $19, _p_
	lda    $20, _x_
	cmpult $17, $19, $0	# si r < p, change les oprandes
	beq    $0,  1f
	bis    $16, $16, $0
	bis    $18, $18, $16
	bis    $0,  $0,  $18
	bis    $17, $17, $0
	bis    $19, $19, $17
	bis    $0,  $0,  $19
1:
	jsr    $26, .Lsn_toommul_nogp # x <- c*b0
	ldq    $16, _a_
	lda    $18, _x_
	ldq    $0,  _p_
	ldq    $1,  _q_
	ldq    $2,  _r_
	addq   $0,  $1,  $17	# r17 <- lb+1
	addq   $17, 1,   $17
	addq   $0,  $2,  $19    # r19 <- p+r
	bsr    $26, .Lsn_dec_nogp

	# corrige si < 0
	beq    $0,  L(next)
1:
	ldq    $20, _c_
	ldq    $2,  _r_
	addq   $31, 1,   $0
	bsr    $27, sn_decloop  # c--
	ldq    $16, _a_
	ldq    $18, _b_
	ldq    $0,  _p_
	ldq    $1,  _q_
	addq   $0,  $1,  $19	# r19 <- lb
	addq   $19, 1,   $17    # r17 <- lb+1
	bsr    $26, .Lsn_inc_nogp # a += b
	beq    $0,  1b

	# tranche suivante
L(next):
	ldq    $2,  _lc_
	ldq    $19, _q_
	ldq    $16, _a_
	ldq    $20, _c_
	cmpult $19, $2,  $1     # r <- min(lc,q)
	cmoveq $1,  $2,  $17
	cmovne $1,  $19, $17
	sll    $17, 3,   $0	# r0 <- 8r
	subq   $16, $0,  $16    # a -= r
	subq   $20, $0,  $20    # c -= r
	subq   $2,  $17, $2     # lc -= r
	bgt    $17, L(loop)

	# termin
	ldq    $26, _ra_
	ldq    $0,  _p_         # nettoie la pile
	addq   $0,  $19, $0
	s8addq $0,  72,  $0
	bic    $0,  15,  $0
	addq   $30, $0,  $30
	ret    $31, ($26),1

	#undef _x_
	#undef _a_
	#undef _b_
	#undef _c_
	#undef _lc_
	#undef _p_
	#undef _q_
	#undef _r_
	#undef _ra_

#ifdef debug_burnidiv
	.end sn_burnidiv_buggy
#else
	.end sn_burnidiv
#endif
#undef L
#endif /* assembly_sn_burnidiv */
#if !defined(assembly_sn_burnidiv) || defined(debug_burnidiv)
	REPLACE(sn_burnidiv)
#endif
