diff options
author | Franklin Wei <franklin@rockbox.org> | 2019-08-09 20:08:10 -0400 |
---|---|---|
committer | Franklin Wei <franklin@rockbox.org> | 2019-08-09 21:27:42 -0400 |
commit | ee70dad305a94709c877e776d723aee64d023cb5 (patch) | |
tree | 4483d1bd7b96e080dcb496e0665252e970d8cdbf /apps/plugins/sdl/progs/quake/d_polyse.c | |
parent | 3fffabf50d5597812ffa4381ac70dd6e2af6b392 (diff) | |
download | rockbox-ee70dad305a94709c877e776d723aee64d023cb5.tar.gz rockbox-ee70dad305a94709c877e776d723aee64d023cb5.zip |
quake: merge some fixed-point optimizations from PocketQuake
The PocketQuake project, available below, has some nice fixed-point code:
https://web.archive.org/web/20150412233306/http://quake.pocketmatrix.com/PocketQuake0062_src.zip
I'd like to see most of them merged into our Quake port. This commit
gives +0.9FPS on ipod6g. (A big change given that it was running at
only 9.7FPS to begin with!)
Change-Id: I91931bdd5c22f14fb28616de938a03b4e7d7b076
Diffstat (limited to 'apps/plugins/sdl/progs/quake/d_polyse.c')
-rw-r--r-- | apps/plugins/sdl/progs/quake/d_polyse.c | 99 |
1 files changed, 98 insertions, 1 deletions
diff --git a/apps/plugins/sdl/progs/quake/d_polyse.c b/apps/plugins/sdl/progs/quake/d_polyse.c index 9acd34b102..f02f9a016e 100644 --- a/apps/plugins/sdl/progs/quake/d_polyse.c +++ b/apps/plugins/sdl/progs/quake/d_polyse.c | |||
@@ -122,6 +122,7 @@ void D_PolysetScanLeftEdge (int height); | |||
122 | D_PolysetDraw | 122 | D_PolysetDraw |
123 | ================ | 123 | ================ |
124 | */ | 124 | */ |
125 | #ifndef USE_PQ_OPT | ||
125 | void D_PolysetDraw (void) | 126 | void D_PolysetDraw (void) |
126 | { | 127 | { |
127 | spanpackage_t spans[DPS_MAXSPANS + 1 + | 128 | spanpackage_t spans[DPS_MAXSPANS + 1 + |
@@ -140,7 +141,21 @@ void D_PolysetDraw (void) | |||
140 | D_DrawNonSubdiv (); | 141 | D_DrawNonSubdiv (); |
141 | } | 142 | } |
142 | } | 143 | } |
144 | #else | ||
145 | //JB: Optimization | ||
146 | //Dan East: May result in image quality loss. Actual performance gain not verified. | ||
147 | void D_PolysetDraw (void) | ||
148 | { | ||
149 | spanpackage_t spans[DPS_MAXSPANS + 1 + | ||
150 | ((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1]; | ||
151 | // one extra because of cache line pretouching | ||
143 | 152 | ||
153 | a_spans = (spanpackage_t *) | ||
154 | (((long)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1)); | ||
155 | |||
156 | D_DrawNonSubdiv (); | ||
157 | } | ||
158 | #endif | ||
144 | 159 | ||
145 | /* | 160 | /* |
146 | ================ | 161 | ================ |
@@ -528,6 +543,7 @@ void D_PolysetSetUpForLineScan(fixed8_t startvertu, fixed8_t startvertv, | |||
528 | D_PolysetCalcGradients | 543 | D_PolysetCalcGradients |
529 | ================ | 544 | ================ |
530 | */ | 545 | */ |
546 | #ifndef USE_PQ_OPT4 | ||
531 | void D_PolysetCalcGradients (int skinwidth) | 547 | void D_PolysetCalcGradients (int skinwidth) |
532 | { | 548 | { |
533 | float xstepdenominv, ystepdenominv, t0, t1; | 549 | float xstepdenominv, ystepdenominv, t0, t1; |
@@ -583,7 +599,88 @@ void D_PolysetCalcGradients (int skinwidth) | |||
583 | 599 | ||
584 | a_ststepxwhole = skinwidth * (r_tstepx >> 16) + (r_sstepx >> 16); | 600 | a_ststepxwhole = skinwidth * (r_tstepx >> 16) + (r_sstepx >> 16); |
585 | } | 601 | } |
586 | 602 | #else | |
603 | void D_PolysetCalcGradients (int skinwidth) | ||
604 | { | ||
605 | //Dan East: Fixed point conversion | ||
606 | int p01_minus_p21, p11_minus_p21, p00_minus_p20, p10_minus_p20, t0, t1; | ||
607 | int tmp, ydenom; | ||
608 | |||
609 | //float xstepdenominv = (float)(1.0 / (float)d_xdenom); | ||
610 | //float ystepdenominv = -xstepdenominv; | ||
611 | //int checkx, checky; | ||
612 | |||
613 | p00_minus_p20 = (r_p0[0] - r_p2[0]); | ||
614 | p01_minus_p21 = (r_p0[1] - r_p2[1]); | ||
615 | p10_minus_p20 = (r_p1[0] - r_p2[0]); | ||
616 | p11_minus_p21 = (r_p1[1] - r_p2[1]); | ||
617 | |||
618 | //xstepdenominv = d_xdenom; | ||
619 | ydenom=-d_xdenom; | ||
620 | //ystepdenominv = -xstepdenominv; | ||
621 | |||
622 | t0 = (r_p0[4] - r_p2[4]); | ||
623 | t1 = (r_p1[4] - r_p2[4]); | ||
624 | //TODO: Ceil has been removed | ||
625 | tmp=t1 * p01_minus_p21 - t0 * p11_minus_p21; | ||
626 | r_lstepx = tmp / d_xdenom; | ||
627 | if (tmp%d_xdenom) r_lstepx++; | ||
628 | |||
629 | tmp=t1 * p00_minus_p20 - t0 * p10_minus_p20; | ||
630 | r_lstepy = tmp / ydenom; | ||
631 | if (tmp%ydenom) r_lstepy++; | ||
632 | /* | ||
633 | checkx = (int)ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv); | ||
634 | checky = (int)ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv); | ||
635 | if (checkx-r_lstepx>1||checkx-r_lstepx<-1) | ||
636 | r_lstepx=r_lstepx; | ||
637 | if (checky-r_lstepy>1||checky-r_lstepy<-1) | ||
638 | r_lstepy=r_lstepy; | ||
639 | */ | ||
640 | t0 = (r_p0[2] - r_p2[2]); | ||
641 | t1 = (r_p1[2] - r_p2[2]); | ||
642 | r_sstepx = (t1 * p01_minus_p21 - t0 * p11_minus_p21) / d_xdenom; | ||
643 | r_sstepy = (t1 * p00_minus_p20 - t0* p10_minus_p20) / ydenom; | ||
644 | /* | ||
645 | checkx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv); | ||
646 | checky = (int)((t1 * p00_minus_p20 - t0* p10_minus_p20) * ystepdenominv); | ||
647 | if (checkx-r_sstepx>1||checkx-r_sstepx<-1) | ||
648 | r_lstepx=r_lstepx; | ||
649 | if (checky-r_sstepy>1||checky-r_sstepy<-1) | ||
650 | r_lstepy=r_lstepy; | ||
651 | */ | ||
652 | |||
653 | t0 = (r_p0[3] - r_p2[3]); | ||
654 | t1 = (r_p1[3] - r_p2[3]); | ||
655 | r_tstepx = (t1 * p01_minus_p21 - t0 * p11_minus_p21) / d_xdenom; | ||
656 | r_tstepy = (t1 * p00_minus_p20 - t0 * p10_minus_p20) / ydenom; | ||
657 | /* | ||
658 | checkx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv); | ||
659 | checky = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv); | ||
660 | if (checkx-r_tstepx>1||checkx-r_tstepx<-1) | ||
661 | r_lstepx=r_lstepx; | ||
662 | if (checky-r_tstepy>1||checky-r_tstepy<-1) | ||
663 | r_lstepy=r_lstepy; | ||
664 | */ | ||
665 | |||
666 | t0 = (r_p0[5] - r_p2[5]); | ||
667 | t1 = (r_p1[5] - r_p2[5]); | ||
668 | r_zistepx = (t1 * p01_minus_p21 - t0 * p11_minus_p21) / d_xdenom; | ||
669 | r_zistepy = (t1 * p00_minus_p20 - t0 * p10_minus_p20) / ydenom; | ||
670 | /* | ||
671 | checkx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv); | ||
672 | checky = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv); | ||
673 | if (checkx-r_zistepx>1||checkx-r_zistepx<-1) | ||
674 | r_lstepx=checkx; | ||
675 | if (checky-r_zistepy>1||checky-r_zistepy<-1) | ||
676 | r_lstepy=checky; | ||
677 | */ | ||
678 | |||
679 | a_sstepxfrac = r_sstepx & 0xFFFF; | ||
680 | a_tstepxfrac = r_tstepx & 0xFFFF; | ||
681 | a_ststepxwhole = skinwidth * (r_tstepx >> 16) + (r_sstepx >> 16); | ||
682 | } | ||
683 | #endif | ||
587 | #endif // !id386 | 684 | #endif // !id386 |
588 | 685 | ||
589 | 686 | ||