diff options
Diffstat (limited to 'src/cmd/8g/ggen.c')
| -rw-r--r-- | src/cmd/8g/ggen.c | 841 | 
1 files changed, 407 insertions, 434 deletions
| diff --git a/src/cmd/8g/ggen.c b/src/cmd/8g/ggen.c index 6a4570199..70148106c 100644 --- a/src/cmd/8g/ggen.c +++ b/src/cmd/8g/ggen.c @@ -27,6 +27,9 @@ void  markautoused(Prog* p)  {  	for (; p; p = p->link) { +		if (p->as == ATYPE) +			continue; +  		if (p->from.type == D_AUTO && p->from.node)  			p->from.node->used = 1; @@ -39,12 +42,21 @@ markautoused(Prog* p)  void  fixautoused(Prog* p)  { -	for (; p; p = p->link) { +	Prog **lp; + +	for (lp=&p; (p=*lp) != P; ) { +		if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) { +			*lp = p->link; +			continue; +		} +  		if (p->from.type == D_AUTO && p->from.node)  			p->from.offset += p->from.node->stkdelta;  		if (p->to.type == D_AUTO && p->to.node)  			p->to.offset += p->to.node->stkdelta; + +		lp = &p->link;  	}  } @@ -59,6 +71,10 @@ clearfat(Node *nl)  		dump("\nclearfat", nl);  	w = nl->type->width; +	// Avoid taking the address for simple enough types. +	if(componentgen(N, nl)) +		return; +  	c = w % 4;	// bytes  	q = w / 4;	// quads @@ -90,15 +106,17 @@ clearfat(Node *nl)  /*   * generate:   *	call f + *	proc=-1	normal call but no return   *	proc=0	normal call   *	proc=1	goroutine run in new proc   *	proc=2	defer call save away stack +  *	proc=3	normal call to C pointer (not Go func value)   */  void  ginscall(Node *f, int proc)  {  	Prog *p; -	Node reg, con; +	Node reg, r1, con;  	switch(proc) {  	default: @@ -106,8 +124,25 @@ ginscall(Node *f, int proc)  		break;  	case 0:	// normal call -		p = gins(ACALL, N, f); -		afunclit(&p->to); +	case -1:	// normal call but no return +		if(f->op == ONAME && f->class == PFUNC) { +			p = gins(ACALL, N, f); +			afunclit(&p->to, f); +			if(proc == -1 || noreturn(p)) +				gins(AUNDEF, N, N); +			break; +		} +		nodreg(®, types[tptr], D_DX); +		nodreg(&r1, types[tptr], D_BX); +		gmove(f, ®); +		reg.op = OINDREG; +		gmove(®, &r1); +		reg.op = OREGISTER; +		gins(ACALL, ®, &r1); +		break; +	 +	case 3:	// normal call of c function pointer +		gins(ACALL, N, f);  		break;  	case 1:	// call in new proc (go) @@ -125,7 +160,7 @@ ginscall(Node *f, int proc)  		if(proc == 2) {  			nodreg(®, types[TINT64], D_AX);  			gins(ATESTL, ®, ®); -			patch(gbranch(AJNE, T), retpc); +			patch(gbranch(AJNE, T, -1), retpc);  		}  		break;  	} @@ -139,7 +174,7 @@ void  cgen_callinter(Node *n, Node *res, int proc)  {  	Node *i, *f; -	Node tmpi, nodo, nodr, nodsp; +	Node tmpi, nodi, nodo, nodr, nodsp;  	i = n->left;  	if(i->op != ODOTINTER) @@ -159,25 +194,35 @@ cgen_callinter(Node *n, Node *res, int proc)  	genlist(n->list);		// assign the args -	// Can regalloc now; i is known to be addable, -	// so the agen will be easy. -	regalloc(&nodr, types[tptr], res); -	regalloc(&nodo, types[tptr], &nodr); -	nodo.op = OINDREG; - -	agen(i, &nodr);		// REG = &inter +	// i is now addable, prepare an indirected +	// register to hold its address. +	igen(i, &nodi, res);		// REG = &inter  	nodindreg(&nodsp, types[tptr], D_SP); -	nodo.xoffset += widthptr; -	cgen(&nodo, &nodsp);	// 0(SP) = 4(REG) -- i.data +	nodi.type = types[tptr]; +	nodi.xoffset += widthptr; +	cgen(&nodi, &nodsp);	// 0(SP) = 4(REG) -- i.data -	nodo.xoffset -= widthptr; -	cgen(&nodo, &nodr);	// REG = 0(REG) -- i.tab +	regalloc(&nodo, types[tptr], res); +	nodi.type = types[tptr]; +	nodi.xoffset -= widthptr; +	cgen(&nodi, &nodo);	// REG = 0(REG) -- i.tab +	regfree(&nodi); +	regalloc(&nodr, types[tptr], &nodo);  	if(n->left->xoffset == BADWIDTH)  		fatal("cgen_callinter: badwidth"); +	nodo.op = OINDREG;  	nodo.xoffset = n->left->xoffset + 3*widthptr + 8; -	cgen(&nodo, &nodr);	// REG = 20+offset(REG) -- i.tab->fun[f] +	 +	if(proc == 0) { +		// plain call: use direct c function pointer - more efficient +		cgen(&nodo, &nodr);	// REG = 20+offset(REG) -- i.tab->fun[f] +		proc = 3; +	} else { +		// go/defer. generate go func value. +		gins(ALEAL, &nodo, &nodr);	// REG = &(20+offset(REG)) -- i.tab->fun[f] +	}  	// BOTCH nodr.type = fntype;  	nodr.type = n->left->type; @@ -486,7 +531,7 @@ dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)  	int check;  	Node n1, t1, t2, t3, t4, n4, nz;  	Type *t, *t0; -	Prog *p1, *p2, *p3; +	Prog *p1, *p2;  	// Have to be careful about handling  	// most negative int divided by -1 correctly. @@ -535,23 +580,22 @@ dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)  		regalloc(&n1, t, N);  	gmove(&t2, &n1);  	gmove(&t1, ax); -	p3 = P; +	p2 = P;  	if(check) {  		nodconst(&n4, t, -1);  		gins(optoas(OCMP, t), &n1, &n4); -		p1 = gbranch(optoas(ONE, t), T); -		nodconst(&n4, t, -1LL<<(t->width*8-1)); -		gins(optoas(OCMP, t), ax, &n4); -		p2 = gbranch(optoas(ONE, t), T); -		if(op == ODIV) -			gmove(&n4, res); -		if(op == OMOD) { +		p1 = gbranch(optoas(ONE, t), T, +1); +		if(op == ODIV) { +			// a / (-1) is -a. +			gins(optoas(OMINUS, t), N, ax); +			gmove(ax, res); +		} else { +			// a % (-1) is 0.  			nodconst(&n4, t, 0);  			gmove(&n4, res);  		} -		p3 = gbranch(AJMP, T); +		p2 = gbranch(AJMP, T, 0);  		patch(p1, pc); -		patch(p2, pc);  	}  	if(!issigned[t->etype]) {  		nodconst(&nz, t, 0); @@ -566,7 +610,7 @@ dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)  	else  		gmove(dx, res);  	if(check) -		patch(p3, pc); +		patch(p2, pc);  }  static void @@ -630,7 +674,7 @@ cgen_div(int op, Node *nl, Node *nr, Node *res)   *	res = nl >> nr   */  void -cgen_shift(int op, Node *nl, Node *nr, Node *res) +cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)  {  	Node n1, n2, nt, cx, oldcx, hi, lo;  	int a, w; @@ -651,7 +695,7 @@ cgen_shift(int op, Node *nl, Node *nr, Node *res)  		gmove(&n2, &n1);  		sc = mpgetfix(nr->val.u.xval);  		if(sc >= nl->type->width*8) { -			// large shift gets 2 shifts by width +			// large shift gets 2 shifts by width-1  			gins(a, ncon(w-1), &n1);  			gins(a, ncon(w-1), &n1);  		} else @@ -689,27 +733,39 @@ cgen_shift(int op, Node *nl, Node *nr, Node *res)  	}  	// test and fix up large shifts -	if(nr->type->width > 4) { -		// delayed reg alloc -		nodreg(&n1, types[TUINT32], D_CX); -		regalloc(&n1, types[TUINT32], &n1);		// to hold the shift type in CX -		split64(&nt, &lo, &hi); -		gmove(&lo, &n1); -		gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0)); -		p2 = gbranch(optoas(ONE, types[TUINT32]), T); -		gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w)); -		p1 = gbranch(optoas(OLT, types[TUINT32]), T); -		patch(p2, pc); -	} else { -		gins(optoas(OCMP, nr->type), &n1, ncon(w)); -		p1 = gbranch(optoas(OLT, types[TUINT32]), T); -	} -	if(op == ORSH && issigned[nl->type->etype]) { -		gins(a, ncon(w-1), &n2); +	if(bounded) { +		if(nr->type->width > 4) { +			// delayed reg alloc +			nodreg(&n1, types[TUINT32], D_CX); +			regalloc(&n1, types[TUINT32], &n1);		// to hold the shift type in CX +			split64(&nt, &lo, &hi); +			gmove(&lo, &n1); +			splitclean(); +		}  	} else { -		gmove(ncon(0), &n2); +		if(nr->type->width > 4) { +			// delayed reg alloc +			nodreg(&n1, types[TUINT32], D_CX); +			regalloc(&n1, types[TUINT32], &n1);		// to hold the shift type in CX +			split64(&nt, &lo, &hi); +			gmove(&lo, &n1); +			gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0)); +			p2 = gbranch(optoas(ONE, types[TUINT32]), T, +1); +			gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w)); +			p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1); +			splitclean(); +			patch(p2, pc); +		} else { +			gins(optoas(OCMP, nr->type), &n1, ncon(w)); +			p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1); +		} +		if(op == ORSH && issigned[nl->type->etype]) { +			gins(a, ncon(w-1), &n2); +		} else { +			gmove(ncon(0), &n2); +		} +		patch(p1, pc);  	} -	patch(p1, pc);  	gins(a, &n1, &n2);  	if(oldcx.op != 0) @@ -724,444 +780,361 @@ cgen_shift(int op, Node *nl, Node *nr, Node *res)  /*   * generate byte multiply:   *	res = nl * nr - * no byte multiply instruction so have to do - * 16-bit multiply and take bottom half. + * there is no 2-operand byte multiply instruction so + * we do a full-width multiplication and truncate afterwards.   */  void  cgen_bmul(int op, Node *nl, Node *nr, Node *res)  { -	Node n1b, n2b, n1w, n2w; +	Node n1, n2, nt, *tmp;  	Type *t;  	int a; -	if(nl->ullman >= nr->ullman) { -		regalloc(&n1b, nl->type, res); -		cgen(nl, &n1b); -		regalloc(&n2b, nr->type, N); -		cgen(nr, &n2b); -	} else { -		regalloc(&n2b, nr->type, N); -		cgen(nr, &n2b); -		regalloc(&n1b, nl->type, res); -		cgen(nl, &n1b); -	} - -	// copy from byte to short registers -	t = types[TUINT16]; +	// copy from byte to full registers +	t = types[TUINT32];  	if(issigned[nl->type->etype]) -		t = types[TINT16]; - -	regalloc(&n2w, t, &n2b); -	cgen(&n2b, &n2w); +		t = types[TINT32]; -	regalloc(&n1w, t, &n1b); -	cgen(&n1b, &n1w); +	// largest ullman on left. +	if(nl->ullman < nr->ullman) { +		tmp = nl; +		nl = nr; +		nr = tmp; +	} +	tempname(&nt, nl->type); +	cgen(nl, &nt); +	regalloc(&n1, t, res); +	cgen(nr, &n1); +	regalloc(&n2, t, N); +	gmove(&nt, &n2);  	a = optoas(op, t); -	gins(a, &n2w, &n1w); -	cgen(&n1w, &n1b); -	cgen(&n1b, res); - -	regfree(&n1w); -	regfree(&n2w); -	regfree(&n1b); -	regfree(&n2b); +	gins(a, &n2, &n1); +	regfree(&n2); +	gmove(&n1, res); +	regfree(&n1);  } -static int -regcmp(const void *va, const void *vb) +/* + * generate high multiply: + *   res = (nl*nr) >> width + */ +void +cgen_hmul(Node *nl, Node *nr, Node *res)  { -	Node *ra, *rb; - -	ra = (Node*)va; -	rb = (Node*)vb; -	return ra->local - rb->local; -} +	Type *t; +	int a; +	Node n1, n2, ax, dx; -static	Prog*	throwpc; +	t = nl->type; +	a = optoas(OHMUL, t); +	// gen nl in n1. +	tempname(&n1, t); +	cgen(nl, &n1); +	// gen nr in n2. +	regalloc(&n2, t, res); +	cgen(nr, &n2); + +	// multiply. +	nodreg(&ax, t, D_AX); +	gmove(&n2, &ax); +	gins(a, &n1, N); +	regfree(&n2); -// We're only going to bother inlining if we can -// convert all the arguments to 32 bits safely.  Can we? -static int -fix64(NodeList *nn, int n) -{ -	NodeList *l; -	Node *r; -	int i; -	 -	l = nn; -	for(i=0; i<n; i++) { -		r = l->n->right; -		if(is64(r->type) && !smallintconst(r)) { -			if(r->op == OCONV) -				r = r->left; -			if(is64(r->type)) -				return 0; -		} -		l = l->next; +	if(t->width == 1) { +		// byte multiply behaves differently. +		nodreg(&ax, t, D_AH); +		nodreg(&dx, t, D_DL); +		gmove(&ax, &dx);  	} -	return 1; +	nodreg(&dx, t, D_DX); +	gmove(&dx, res);  } +static void cgen_float387(Node *n, Node *res); +static void cgen_floatsse(Node *n, Node *res); + +/* + * generate floating-point operation. + */  void -getargs(NodeList *nn, Node *reg, int n) +cgen_float(Node *n, Node *res)  { -	NodeList *l; -	Node *r; -	int i; - -	throwpc = nil; - -	l = nn; -	for(i=0; i<n; i++) { -		r = l->n->right; -		if(is64(r->type)) { -			if(r->op == OCONV) -				r = r->left; -			else if(smallintconst(r)) -				r->type = types[TUINT32]; -			if(is64(r->type)) -				fatal("getargs"); +	Node *nl; +	Node n1, n2; +	Prog *p1, *p2, *p3; + +	nl = n->left; +	switch(n->op) { +	case OEQ: +	case ONE: +	case OLT: +	case OLE: +	case OGE: +		p1 = gbranch(AJMP, T, 0); +		p2 = pc; +		gmove(nodbool(1), res); +		p3 = gbranch(AJMP, T, 0); +		patch(p1, pc); +		bgen(n, 1, 0, p2); +		gmove(nodbool(0), res); +		patch(p3, pc); +		return; + +	case OPLUS: +		cgen(nl, res); +		return; + +	case OCONV: +		if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) { +			cgen(nl, res); +			return;  		} -		if(!smallintconst(r) && !isslice(r->type)) { -			if(i < 3)	// AX CX DX -				nodreg(reg+i, r->type, D_AX+i); -			else -				reg[i].op = OXXX; -			regalloc(reg+i, r->type, reg+i); -			cgen(r, reg+i); -		} else -			reg[i] = *r; -		if(reg[i].local != 0) -			yyerror("local used"); -		reg[i].local = l->n->left->xoffset; -		l = l->next; + +		tempname(&n2, n->type); +		mgen(nl, &n1, res); +		gmove(&n1, &n2); +		gmove(&n2, res); +		mfree(&n1); +		return;  	} -	qsort((void*)reg, n, sizeof(*reg), regcmp); -	for(i=0; i<n; i++) -		reg[i].local = 0; + +	if(use_sse) +		cgen_floatsse(n, res); +	else +		cgen_float387(n, res);  } -void -cmpandthrow(Node *nl, Node *nr) +// floating-point.  387 (not SSE2) +static void +cgen_float387(Node *n, Node *res)  { -	vlong cl; -	Prog *p1; -	int op; -	Node *c, n1; -	Type *t; +	Node f0, f1; +	Node *nl, *nr; -	op = OLE; -	if(smallintconst(nl)) { -		cl = mpgetfix(nl->val.u.xval); -		if(cl == 0) -			return; -		if(smallintconst(nr)) -			return; -		// put the constant on the right -		op = brrev(op); -		c = nl; -		nl = nr; -		nr = c; -	} -	 -	// Arguments are known not to be 64-bit, -	// but they might be smaller than 32 bits. -	// Check if we need to use a temporary. -	// At least one of the arguments is 32 bits -	// (the len or cap) so one temporary suffices. -	n1.op = OXXX; -	t = types[TUINT32]; -	if(nl->type->width != t->width) { -		regalloc(&n1, t, nl); -		gmove(nl, &n1); -		nl = &n1; -	} else if(nr->type->width != t->width) { -		regalloc(&n1, t, nr); -		gmove(nr, &n1); -		nr = &n1; -	} -	gins(optoas(OCMP, t), nl, nr); -	if(n1.op != OXXX) -		regfree(&n1); -	if(throwpc == nil) { -		p1 = gbranch(optoas(op, t), T); -		throwpc = pc; -		ginscall(panicslice, 0); -		patch(p1, pc); +	nl = n->left; +	nr = n->right; +	nodreg(&f0, nl->type, D_F0); +	nodreg(&f1, n->type, D_F0+1); +	if(nr != N) +		goto flt2; + +	// unary +	cgen(nl, &f0); +	if(n->op != OCONV && n->op != OPLUS) +		gins(foptoas(n->op, n->type, 0), N, N); +	gmove(&f0, res); +	return; + +flt2:	// binary +	if(nl->ullman >= nr->ullman) { +		cgen(nl, &f0); +		if(nr->addable) +			gins(foptoas(n->op, n->type, 0), nr, &f0); +		else { +			cgen(nr, &f0); +			gins(foptoas(n->op, n->type, Fpop), &f0, &f1); +		}  	} else { -		op = brcom(op); -		p1 = gbranch(optoas(op, t), T); -		patch(p1, throwpc); +		cgen(nr, &f0); +		if(nl->addable) +			gins(foptoas(n->op, n->type, Frev), nl, &f0); +		else { +			cgen(nl, &f0); +			gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1); +		}  	} -} +	gmove(&f0, res); +	return; -int -sleasy(Node *n) -{ -	if(n->op != ONAME) -		return 0; -	if(!n->addable) -		return 0; -	return 1;  } -// generate inline code for -//	slicearray -//	sliceslice -//	arraytoslice -int -cgen_inline(Node *n, Node *res) +static void +cgen_floatsse(Node *n, Node *res)  { -	Node nodes[5]; -	Node n1, n2, nres, ntemp; -	vlong v; -	int i, narg, nochk; - -	if(n->op != OCALLFUNC) -		goto no; -	if(!n->left->addable) -		goto no; -	if(n->left->sym == S) -		goto no; -	if(n->left->sym->pkg != runtimepkg) -		goto no; -	if(strcmp(n->left->sym->name, "slicearray") == 0) -		goto slicearray; -	if(strcmp(n->left->sym->name, "sliceslice") == 0) { -		narg = 4; -		goto sliceslice; -	} -	if(strcmp(n->left->sym->name, "sliceslice1") == 0) { -		narg = 3; -		goto sliceslice; -	} -	goto no; - -slicearray: -	if(!sleasy(res)) -		goto no; -	if(!fix64(n->list, 5)) -		goto no; -	getargs(n->list, nodes, 5); - -	// if(hb[3] > nel[1]) goto throw -	cmpandthrow(&nodes[3], &nodes[1]); - -	// if(lb[2] > hb[3]) goto throw -	cmpandthrow(&nodes[2], &nodes[3]); - -	// len = hb[3] - lb[2] (destroys hb) -	n2 = *res; -	n2.xoffset += Array_nel; -	n2.type = types[TUINT32]; - -	if(smallintconst(&nodes[3]) && smallintconst(&nodes[2])) { -		v = mpgetfix(nodes[3].val.u.xval) - -			mpgetfix(nodes[2].val.u.xval); -		nodconst(&n1, types[TUINT32], v); -		gins(optoas(OAS, types[TUINT32]), &n1, &n2); -	} else { -		regalloc(&n1, types[TUINT32], &nodes[3]); -		gmove(&nodes[3], &n1); -		if(!smallintconst(&nodes[2]) || mpgetfix(nodes[2].val.u.xval) != 0) -			gins(optoas(OSUB, types[TUINT32]), &nodes[2], &n1); -		gins(optoas(OAS, types[TUINT32]), &n1, &n2); -		regfree(&n1); -	} +	Node *nl, *nr, *r; +	Node n1, n2, nt; +	int a; -	// cap = nel[1] - lb[2] (destroys nel) -	n2 = *res; -	n2.xoffset += Array_cap; -	n2.type = types[TUINT32]; +	nl = n->left; +	nr = n->right; +	switch(n->op) { +	default: +		dump("cgen_floatsse", n); +		fatal("cgen_floatsse %O", n->op); +		return; -	if(smallintconst(&nodes[1]) && smallintconst(&nodes[2])) { -		v = mpgetfix(nodes[1].val.u.xval) - -			mpgetfix(nodes[2].val.u.xval); -		nodconst(&n1, types[TUINT32], v); -		gins(optoas(OAS, types[TUINT32]), &n1, &n2); -	} else { -		regalloc(&n1, types[TUINT32], &nodes[1]); -		gmove(&nodes[1], &n1); -		if(!smallintconst(&nodes[2]) || mpgetfix(nodes[2].val.u.xval) != 0) -			gins(optoas(OSUB, types[TUINT32]), &nodes[2], &n1); -		gins(optoas(OAS, types[TUINT32]), &n1, &n2); -		regfree(&n1); +	case OMINUS: +	case OCOM: +		nr = nodintconst(-1); +		convlit(&nr, n->type); +		a = foptoas(OMUL, nl->type, 0); +		goto sbop; + +	// symmetric binary +	case OADD: +	case OMUL: +		a = foptoas(n->op, nl->type, 0); +		goto sbop; + +	// asymmetric binary +	case OSUB: +	case OMOD: +	case ODIV: +		a = foptoas(n->op, nl->type, 0); +		goto abop;  	} -	// if slice could be too big, dereference to -	// catch nil array pointer. -	if(nodes[0].op == OREGISTER && nodes[0].type->type->width >= unmappedzero) { -		n2 = nodes[0]; -		n2.xoffset = 0; -		n2.op = OINDREG; -		n2.type = types[TUINT8]; -		gins(ATESTB, nodintconst(0), &n2); +sbop:	// symmetric binary +	if(nl->ullman < nr->ullman || nl->op == OLITERAL) { +		r = nl; +		nl = nr; +		nr = r;  	} -	// ary = old[0] + (lb[2] * width[4]) (destroys old) -	n2 = *res; -	n2.xoffset += Array_array; -	n2.type = types[tptr]; - -	if(smallintconst(&nodes[2]) && smallintconst(&nodes[4])) { -		v = mpgetfix(nodes[2].val.u.xval) * -			mpgetfix(nodes[4].val.u.xval); -		if(v != 0) { -			nodconst(&n1, types[tptr], v); -			gins(optoas(OADD, types[tptr]), &n1, &nodes[0]); -		} +abop:	// asymmetric binary +	if(nl->ullman >= nr->ullman) { +		tempname(&nt, nl->type); +		cgen(nl, &nt); +		mgen(nr, &n2, N); +		regalloc(&n1, nl->type, res); +		gmove(&nt, &n1); +		gins(a, &n2, &n1); +		gmove(&n1, res); +		regfree(&n1); +		mfree(&n2);  	} else { -		regalloc(&n1, types[tptr], &nodes[2]); -		gmove(&nodes[2], &n1); -		if(!smallintconst(&nodes[4]) || mpgetfix(nodes[4].val.u.xval) != 1) -			gins(optoas(OMUL, types[tptr]), &nodes[4], &n1); -		gins(optoas(OADD, types[tptr]), &n1, &nodes[0]); +		regalloc(&n2, nr->type, res); +		cgen(nr, &n2); +		regalloc(&n1, nl->type, N); +		cgen(nl, &n1); +		gins(a, &n2, &n1); +		regfree(&n2); +		gmove(&n1, res);  		regfree(&n1);  	} -	gins(optoas(OAS, types[tptr]), &nodes[0], &n2); - -	for(i=0; i<5; i++) { -		if(nodes[i].op == OREGISTER) -			regfree(&nodes[i]); -	} -	return 1; +	return; +} -sliceslice: -	if(!fix64(n->list, narg)) -		goto no; -	nochk = n->etype;  // skip bounds checking -	ntemp.op = OXXX; -	if(!sleasy(n->list->n->right)) { -		Node *n0; -		 -		n0 = n->list->n->right; -		tempname(&ntemp, res->type); -		cgen(n0, &ntemp); -		n->list->n->right = &ntemp; -		getargs(n->list, nodes, narg); -		n->list->n->right = n0; -	} else -		getargs(n->list, nodes, narg); +void +bgen_float(Node *n, int true, int likely, Prog *to) +{ +	int et, a; +	Node *nl, *nr, *r; +	Node n1, n2, n3, tmp, t1, t2, ax; +	Prog *p1, *p2; -	nres = *res;		// result -	if(!sleasy(res)) { -		if(ntemp.op == OXXX) -			tempname(&ntemp, res->type); -		nres = ntemp; +	nl = n->left; +	nr = n->right; +	a = n->op; +	if(!true) { +		// brcom is not valid on floats when NaN is involved. +		p1 = gbranch(AJMP, T, 0); +		p2 = gbranch(AJMP, T, 0); +		patch(p1, pc); +		// No need to avoid re-genning ninit. +		bgen_float(n, 1, -likely, p2); +		patch(gbranch(AJMP, T, 0), to); +		patch(p2, pc); +		return;  	} -	if(narg == 3) {	// old[lb:] -		// move width to where it would be for old[lb:hb] -		nodes[3] = nodes[2]; -		nodes[2].op = OXXX; -		 -		// if(lb[1] > old.nel[0]) goto throw; -		n2 = nodes[0]; -		n2.xoffset += Array_nel; -		n2.type = types[TUINT32]; -		if(!nochk) -			cmpandthrow(&nodes[1], &n2); - -		// ret.nel = old.nel[0]-lb[1]; -		n2 = nodes[0]; -		n2.xoffset += Array_nel; -		n2.type = types[TUINT32]; -	 -		regalloc(&n1, types[TUINT32], N); -		gins(optoas(OAS, types[TUINT32]), &n2, &n1); -		if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0) -			gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1); -	 -		n2 = nres; -		n2.xoffset += Array_nel; -		n2.type = types[TUINT32]; -		gins(optoas(OAS, types[TUINT32]), &n1, &n2); -		regfree(&n1); -	} else {	// old[lb:hb] -		n2 = nodes[0]; -		n2.xoffset += Array_cap; -		n2.type = types[TUINT32]; -		if (!nochk) { -			// if(hb[2] > old.cap[0]) goto throw; -			cmpandthrow(&nodes[2], &n2); -			// if(lb[1] > hb[2]) goto throw; -			cmpandthrow(&nodes[1], &nodes[2]); -		} - -		// ret.len = hb[2]-lb[1]; (destroys hb[2]) -		n2 = nres; -		n2.xoffset += Array_nel; -		n2.type = types[TUINT32]; - -		if(smallintconst(&nodes[2]) && smallintconst(&nodes[1])) { -			v = mpgetfix(nodes[2].val.u.xval) - -				mpgetfix(nodes[1].val.u.xval); -			nodconst(&n1, types[TUINT32], v); -			gins(optoas(OAS, types[TUINT32]), &n1, &n2); -		} else { -			regalloc(&n1, types[TUINT32], &nodes[2]); -			gmove(&nodes[2], &n1); -			if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0) -				gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1); -			gins(optoas(OAS, types[TUINT32]), &n1, &n2); -			regfree(&n1); -		} +	if(use_sse) +		goto sse; +	else +		goto x87; + +x87: +	a = brrev(a);	// because the args are stacked +	if(a == OGE || a == OGT) { +		// only < and <= work right with NaN; reverse if needed +		r = nr; +		nr = nl; +		nl = r; +		a = brrev(a);  	} -	// ret.cap = old.cap[0]-lb[1]; (uses hb[2]) -	n2 = nodes[0]; -	n2.xoffset += Array_cap; -	n2.type = types[TUINT32]; - -	regalloc(&n1, types[TUINT32], &nodes[2]); -	gins(optoas(OAS, types[TUINT32]), &n2, &n1); -	if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0) -		gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1); - -	n2 = nres; -	n2.xoffset += Array_cap; -	n2.type = types[TUINT32]; -	gins(optoas(OAS, types[TUINT32]), &n1, &n2); -	regfree(&n1); - -	// ret.array = old.array[0]+lb[1]*width[3]; (uses lb[1]) -	n2 = nodes[0]; -	n2.xoffset += Array_array; -	n2.type = types[tptr]; - -	regalloc(&n1, types[tptr], &nodes[1]); -	if(smallintconst(&nodes[1]) && smallintconst(&nodes[3])) { -		gins(optoas(OAS, types[tptr]), &n2, &n1); -		v = mpgetfix(nodes[1].val.u.xval) * -			mpgetfix(nodes[3].val.u.xval); -		if(v != 0) { -			nodconst(&n2, types[tptr], v); -			gins(optoas(OADD, types[tptr]), &n2, &n1); +	nodreg(&tmp, nr->type, D_F0); +	nodreg(&n2, nr->type, D_F0 + 1); +	nodreg(&ax, types[TUINT16], D_AX); +	et = simsimtype(nr->type); +	if(et == TFLOAT64) { +		if(nl->ullman > nr->ullman) { +			cgen(nl, &tmp); +			cgen(nr, &tmp); +			gins(AFXCHD, &tmp, &n2); +		} else { +			cgen(nr, &tmp); +			cgen(nl, &tmp);  		} +		gins(AFUCOMIP, &tmp, &n2); +		gins(AFMOVDP, &tmp, &tmp);	// annoying pop but still better than STSW+SAHF  	} else { -		gmove(&nodes[1], &n1); -		if(!smallintconst(&nodes[3]) || mpgetfix(nodes[3].val.u.xval) != 1) -			gins(optoas(OMUL, types[tptr]), &nodes[3], &n1); -		gins(optoas(OADD, types[tptr]), &n2, &n1); +		// TODO(rsc): The moves back and forth to memory +		// here are for truncating the value to 32 bits. +		// This handles 32-bit comparison but presumably +		// all the other ops have the same problem. +		// We need to figure out what the right general +		// solution is, besides telling people to use float64. +		tempname(&t1, types[TFLOAT32]); +		tempname(&t2, types[TFLOAT32]); +		cgen(nr, &t1); +		cgen(nl, &t2); +		gmove(&t2, &tmp); +		gins(AFCOMFP, &t1, &tmp); +		gins(AFSTSW, N, &ax); +		gins(ASAHF, N, N);  	} -	n2 = nres; -	n2.xoffset += Array_array; -	n2.type = types[tptr]; -	gins(optoas(OAS, types[tptr]), &n1, &n2); -	regfree(&n1); +	goto ret; -	for(i=0; i<4; i++) { -		if(nodes[i].op == OREGISTER) -			regfree(&nodes[i]); +sse: +	if(!nl->addable) { +		tempname(&n1, nl->type); +		cgen(nl, &n1); +		nl = &n1; +	} +	if(!nr->addable) { +		tempname(&tmp, nr->type); +		cgen(nr, &tmp); +		nr = &tmp; +	} +	regalloc(&n2, nr->type, N); +	gmove(nr, &n2); +	nr = &n2; + +	if(nl->op != OREGISTER) { +		regalloc(&n3, nl->type, N); +		gmove(nl, &n3); +		nl = &n3;  	} -	if(!sleasy(res)) { -		cgen(&nres, res); +	if(a == OGE || a == OGT) { +		// only < and <= work right with NaN; reverse if needed +		r = nr; +		nr = nl; +		nl = r; +		a = brrev(a);  	} -	return 1; -no: -	return 0; +	gins(foptoas(OCMP, nr->type, 0), nl, nr); +	if(nl->op == OREGISTER) +		regfree(nl); +	regfree(nr); + +ret: +	if(a == OEQ) { +		// neither NE nor P +		p1 = gbranch(AJNE, T, -likely); +		p2 = gbranch(AJPS, T, -likely); +		patch(gbranch(AJMP, T, 0), to); +		patch(p1, pc); +		patch(p2, pc); +	} else if(a == ONE) { +		// either NE or P +		patch(gbranch(AJNE, T, likely), to); +		patch(gbranch(AJPS, T, likely), to); +	} else +		patch(gbranch(optoas(a, nr->type), T, likely), to); +  } | 
