summaryrefslogtreecommitdiff
path: root/apps/plugins/sdl/progs/quake/d_polyse.c
diff options
context:
space:
mode:
authorFranklin Wei <franklin@rockbox.org>2019-08-09 20:08:10 -0400
committerFranklin Wei <franklin@rockbox.org>2019-08-09 21:27:42 -0400
commitee70dad305a94709c877e776d723aee64d023cb5 (patch)
tree4483d1bd7b96e080dcb496e0665252e970d8cdbf /apps/plugins/sdl/progs/quake/d_polyse.c
parent3fffabf50d5597812ffa4381ac70dd6e2af6b392 (diff)
downloadrockbox-ee70dad305a94709c877e776d723aee64d023cb5.tar.gz
rockbox-ee70dad305a94709c877e776d723aee64d023cb5.zip
quake: merge some fixed-point optimizations from PocketQuake
The PocketQuake project, available below, has some nice fixed-point code: https://web.archive.org/web/20150412233306/http://quake.pocketmatrix.com/PocketQuake0062_src.zip I'd like to see most of them merged into our Quake port. This commit gives +0.9FPS on ipod6g. (A big change given that it was running at only 9.7FPS to begin with!) Change-Id: I91931bdd5c22f14fb28616de938a03b4e7d7b076
Diffstat (limited to 'apps/plugins/sdl/progs/quake/d_polyse.c')
-rw-r--r--apps/plugins/sdl/progs/quake/d_polyse.c99
1 files changed, 98 insertions, 1 deletions
diff --git a/apps/plugins/sdl/progs/quake/d_polyse.c b/apps/plugins/sdl/progs/quake/d_polyse.c
index 9acd34b102..f02f9a016e 100644
--- a/apps/plugins/sdl/progs/quake/d_polyse.c
+++ b/apps/plugins/sdl/progs/quake/d_polyse.c
@@ -122,6 +122,7 @@ void D_PolysetScanLeftEdge (int height);
122D_PolysetDraw 122D_PolysetDraw
123================ 123================
124*/ 124*/
125#ifndef USE_PQ_OPT
125void D_PolysetDraw (void) 126void D_PolysetDraw (void)
126{ 127{
127 spanpackage_t spans[DPS_MAXSPANS + 1 + 128 spanpackage_t spans[DPS_MAXSPANS + 1 +
@@ -140,7 +141,21 @@ void D_PolysetDraw (void)
140 D_DrawNonSubdiv (); 141 D_DrawNonSubdiv ();
141 } 142 }
142} 143}
144#else
145//JB: Optimization
146//Dan East: May result in image quality loss. Actual performance gain not verified.
147void D_PolysetDraw (void)
148{
149 spanpackage_t spans[DPS_MAXSPANS + 1 +
150 ((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1];
151 // one extra because of cache line pretouching
143 152
153 a_spans = (spanpackage_t *)
154 (((long)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1));
155
156 D_DrawNonSubdiv ();
157}
158#endif
144 159
145/* 160/*
146================ 161================
@@ -528,6 +543,7 @@ void D_PolysetSetUpForLineScan(fixed8_t startvertu, fixed8_t startvertv,
528D_PolysetCalcGradients 543D_PolysetCalcGradients
529================ 544================
530*/ 545*/
546#ifndef USE_PQ_OPT4
531void D_PolysetCalcGradients (int skinwidth) 547void D_PolysetCalcGradients (int skinwidth)
532{ 548{
533 float xstepdenominv, ystepdenominv, t0, t1; 549 float xstepdenominv, ystepdenominv, t0, t1;
@@ -583,7 +599,88 @@ void D_PolysetCalcGradients (int skinwidth)
583 599
584 a_ststepxwhole = skinwidth * (r_tstepx >> 16) + (r_sstepx >> 16); 600 a_ststepxwhole = skinwidth * (r_tstepx >> 16) + (r_sstepx >> 16);
585} 601}
586 602#else
603void D_PolysetCalcGradients (int skinwidth)
604{
605 //Dan East: Fixed point conversion
606 int p01_minus_p21, p11_minus_p21, p00_minus_p20, p10_minus_p20, t0, t1;
607 int tmp, ydenom;
608
609 //float xstepdenominv = (float)(1.0 / (float)d_xdenom);
610 //float ystepdenominv = -xstepdenominv;
611 //int checkx, checky;
612
613 p00_minus_p20 = (r_p0[0] - r_p2[0]);
614 p01_minus_p21 = (r_p0[1] - r_p2[1]);
615 p10_minus_p20 = (r_p1[0] - r_p2[0]);
616 p11_minus_p21 = (r_p1[1] - r_p2[1]);
617
618 //xstepdenominv = d_xdenom;
619 ydenom=-d_xdenom;
620 //ystepdenominv = -xstepdenominv;
621
622 t0 = (r_p0[4] - r_p2[4]);
623 t1 = (r_p1[4] - r_p2[4]);
624 //TODO: Ceil has been removed
625 tmp=t1 * p01_minus_p21 - t0 * p11_minus_p21;
626 r_lstepx = tmp / d_xdenom;
627 if (tmp%d_xdenom) r_lstepx++;
628
629 tmp=t1 * p00_minus_p20 - t0 * p10_minus_p20;
630 r_lstepy = tmp / ydenom;
631 if (tmp%ydenom) r_lstepy++;
632 /*
633 checkx = (int)ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
634 checky = (int)ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
635 if (checkx-r_lstepx>1||checkx-r_lstepx<-1)
636 r_lstepx=r_lstepx;
637 if (checky-r_lstepy>1||checky-r_lstepy<-1)
638 r_lstepy=r_lstepy;
639 */
640 t0 = (r_p0[2] - r_p2[2]);
641 t1 = (r_p1[2] - r_p2[2]);
642 r_sstepx = (t1 * p01_minus_p21 - t0 * p11_minus_p21) / d_xdenom;
643 r_sstepy = (t1 * p00_minus_p20 - t0* p10_minus_p20) / ydenom;
644 /*
645 checkx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
646 checky = (int)((t1 * p00_minus_p20 - t0* p10_minus_p20) * ystepdenominv);
647 if (checkx-r_sstepx>1||checkx-r_sstepx<-1)
648 r_lstepx=r_lstepx;
649 if (checky-r_sstepy>1||checky-r_sstepy<-1)
650 r_lstepy=r_lstepy;
651 */
652
653 t0 = (r_p0[3] - r_p2[3]);
654 t1 = (r_p1[3] - r_p2[3]);
655 r_tstepx = (t1 * p01_minus_p21 - t0 * p11_minus_p21) / d_xdenom;
656 r_tstepy = (t1 * p00_minus_p20 - t0 * p10_minus_p20) / ydenom;
657 /*
658 checkx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
659 checky = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
660 if (checkx-r_tstepx>1||checkx-r_tstepx<-1)
661 r_lstepx=r_lstepx;
662 if (checky-r_tstepy>1||checky-r_tstepy<-1)
663 r_lstepy=r_lstepy;
664 */
665
666 t0 = (r_p0[5] - r_p2[5]);
667 t1 = (r_p1[5] - r_p2[5]);
668 r_zistepx = (t1 * p01_minus_p21 - t0 * p11_minus_p21) / d_xdenom;
669 r_zistepy = (t1 * p00_minus_p20 - t0 * p10_minus_p20) / ydenom;
670 /*
671 checkx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
672 checky = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
673 if (checkx-r_zistepx>1||checkx-r_zistepx<-1)
674 r_lstepx=checkx;
675 if (checky-r_zistepy>1||checky-r_zistepy<-1)
676 r_lstepy=checky;
677 */
678
679 a_sstepxfrac = r_sstepx & 0xFFFF;
680 a_tstepxfrac = r_tstepx & 0xFFFF;
681 a_ststepxwhole = skinwidth * (r_tstepx >> 16) + (r_sstepx >> 16);
682}
683#endif
587#endif // !id386 684#endif // !id386
588 685
589 686