summaryrefslogtreecommitdiff
path: root/apps/plugins/sdl
diff options
context:
space:
mode:
authorFranklin Wei <franklin@rockbox.org>2019-08-09 22:51:15 -0400
committerFranklin Wei <franklin@rockbox.org>2019-08-09 23:05:46 -0400
commit49dd38c18488b15f8be9833e7339580444be7210 (patch)
treefacb7547e2ec2e8fd6995d1a38a3088cdbe7930d /apps/plugins/sdl
parent4397194110e67084ce7de179f0d0f094dee4dada (diff)
downloadrockbox-49dd38c18488b15f8be9833e7339580444be7210.tar.gz
rockbox-49dd38c18488b15f8be9833e7339580444be7210.zip
quake: further optimize D_DrawSpans8
Found on http://forums.insideqc.com/viewtopic.php?f=3&t=3327 Adds 1.1FPS (+8%) on ipod6g. Change-Id: I608588ff99d70ff5ce28d2c649afa4b10038cc03
Diffstat (limited to 'apps/plugins/sdl')
-rw-r--r--apps/plugins/sdl/progs/quake/d_scan.c349
1 files changed, 171 insertions, 178 deletions
diff --git a/apps/plugins/sdl/progs/quake/d_scan.c b/apps/plugins/sdl/progs/quake/d_scan.c
index a2e58649e3..748194d01b 100644
--- a/apps/plugins/sdl/progs/quake/d_scan.c
+++ b/apps/plugins/sdl/progs/quake/d_scan.c
@@ -383,194 +383,187 @@ void D_DrawSpans8 (espan_t *pspan)
383 } while ((pspan = pspan->pnext) != NULL); 383 } while ((pspan = pspan->pnext) != NULL);
384} 384}
385#else 385#else
386
387static int sdivzorig, sdivzstepv, sdivzstepu, sdivz8stepu;
388static int tdivzorig, tdivzstepv, tdivzstepu, tdivz8stepu;
389static int zi8stepu;
390static float last = 0;
391
392/*============================================== 386/*==============================================
393// UpdateFixedPointVars 387// Fixed-point D_DrawSpans
388//PocketQuake- Dan East
389//fixed-point conversion- Jacco Biker
390//unrolled- mh, MK, qbism
394//============================================*/ 391//============================================*/
395void UpdateFixedPointVars( int all ) 392int sdivzorig, sdivzstepv, sdivzstepu, sdivzstepu_fix;
396{ 393int tdivzorig, tdivzstepv, tdivzstepu, tdivzstepu_fix;
397 // JB: Store texture transformation matrix in fixed point vars 394int d_zistepu_fxp, d_zistepv_fxp, d_ziorigin_fxp;
398 if (all) 395int zistepu_fix;
399 {
400/*
401 sdivzorig = (int)(524288.0f * d_sdivzorigin); // 13.19 fixed point
402 tdivzorig = (int)(524288.0f * d_tdivzorigin);
403 sdivzstepv = (int)(524288.0f * d_sdivzstepv);
404 tdivzstepv = (int)(524288.0f * d_tdivzstepv);
405 sdivzstepu = (int)(524288.0f * d_sdivzstepu);
406 sdivz8stepu = sdivzstepu*8;
407 tdivzstepu = (int)(524288.0f * d_tdivzstepu);
408 tdivz8stepu = tdivzstepu*8;
409*/
410 396
411 sdivzorig = (int)(4194304.0f * d_sdivzorigin); // 10.22 fixed point 397#define FIXPOINTDIV 4194304.0f //qbism
412 tdivzorig = (int)(4194304.0f * d_tdivzorigin);
413 sdivzstepv = (int)(4194304.0f * d_sdivzstepv);
414 tdivzstepv = (int)(4194304.0f * d_tdivzstepv);
415 sdivzstepu = (int)(4194304.0f * d_sdivzstepu);
416 sdivz8stepu = sdivzstepu*8;
417 tdivzstepu = (int)(4194304.0f * d_tdivzstepu);
418 tdivz8stepu = tdivzstepu*8;
419 398
420 } 399//524288.0f is 13.19 fixed point
421/* 400// 2097152.0f is 11.21
422 ziorig = (int)(524288.0f * d_ziorigin); // 13.19 fixed point 401//4194304.0f is 10.22 (this is what PocketQuake used)
423 zistepv = (int)(524288.0f * d_zistepv ); 402//8388608.0f is 9.23
424 zistepu = (int)(524288.0f * d_zistepu );
425*/
426#ifndef USE_PQ_OPT3
427 d_ziorigin_fxp = (int)(4194304.0f * d_ziorigin); // 10.22 fixed point
428 d_zistepv_fxp = (int)(4194304.0f * d_zistepv );
429 d_zistepu_fxp = (int)(4194304.0f * d_zistepu );
430#endif
431 403
432 zi8stepu = d_zistepu_fxp * 8; 404void UpdateFixedPointVars16( int all )
433 last = d_zistepv; 405{
406 // JB: Store texture transformation matrix in fixed point vars
407 if (all)
408 {
409 sdivzorig = (int)(FIXPOINTDIV * d_sdivzorigin);
410 tdivzorig = (int)(FIXPOINTDIV * d_tdivzorigin);
411 sdivzstepv = (int)(FIXPOINTDIV * d_sdivzstepv);
412 tdivzstepv = (int)(FIXPOINTDIV * d_tdivzstepv);
413 sdivzstepu = (int)(FIXPOINTDIV * d_sdivzstepu);
414 sdivzstepu_fix = sdivzstepu*16;
415 tdivzstepu = (int)(FIXPOINTDIV * d_tdivzstepu);
416 tdivzstepu_fix = tdivzstepu*16;
417
418
419 }
420 d_ziorigin_fxp = (int)(FIXPOINTDIV * d_ziorigin);
421 d_zistepv_fxp = (int)(FIXPOINTDIV * d_zistepv );
422 d_zistepu_fxp = (int)(FIXPOINTDIV * d_zistepu );
423
424 zistepu_fix = d_zistepu_fxp * 16;
434} 425}
435 426
436void D_DrawSpans8 (espan_t *pspan) 427void D_DrawSpans8 (espan_t *pspan) //qbism from PocketQuake
437{ 428{
438 int count, spancount, spancountminus1; 429 int count, spancount, spancountminus1;
439 unsigned char *pbase, *pdest; 430 unsigned char *pbase, *pdest;
440 fixed16_t s1, t1; 431 fixed16_t s, t;
441 int zi, sdivz, tdivz, sstep, tstep; 432 int zi, sdivz, tdivz, sstep, tstep;
442 int snext, tnext; 433 int snext, tnext;
443 pbase = (unsigned char *)cacheblock; 434 pbase = (unsigned char *)cacheblock;
444 //Jacco Biker's fixed point conversion 435 //Jacco Biker's fixed point conversion
445 436
446 // Recalc fixed point values 437 // Recalc fixed point values
447 UpdateFixedPointVars( 1 ); 438 UpdateFixedPointVars16( 1 );
448 do 439 do
449 { 440 {
450 pdest = (unsigned char *)((byte *)d_viewbuffer + (screenwidth * pspan->v) + pspan->u); 441 pdest = (unsigned char *)((byte *)d_viewbuffer + (screenwidth * pspan->v) + pspan->u);
451 count = pspan->count; 442
452 // calculate the initial s/z, t/z, 1/z, s, and t and clamp 443 // calculate the initial s/z, t/z, 1/z, s, and t and clamp
453 sdivz = sdivzorig + pspan->v * sdivzstepv + pspan->u * sdivzstepu; 444 sdivz = sdivzorig + pspan->v * sdivzstepv + pspan->u * sdivzstepu;
454 tdivz = tdivzorig + pspan->v * tdivzstepv + pspan->u * tdivzstepu; 445 tdivz = tdivzorig + pspan->v * tdivzstepv + pspan->u * tdivzstepu;
455 zi = d_ziorigin_fxp + pspan->v * d_zistepv_fxp + pspan->u * d_zistepu_fxp; 446 zi = d_ziorigin_fxp + pspan->v * d_zistepv_fxp + pspan->u * d_zistepu_fxp;
456 if (zi == 0) zi = 1; 447 if (zi == 0) zi = 1;
457 s1 = (((sdivz << 8) / zi) << 8) + sadjust; // 5.27 / 13.19 = 24.8 >> 8 = 16.16 448 s = (((sdivz << 8) / zi) << 8) + sadjust; // 5.27 / 13.19 = 24.8 >> 8 = 16.16
458 if (s1 > bbextents) s1 = bbextents; else if (s1 < 0) s1 = 0; 449 if (s > bbextents) s = bbextents; else if (s < 0) s = 0;
459 t1 = (((tdivz << 8) / zi) << 8) + tadjust; 450 t = (((tdivz << 8) / zi) << 8) + tadjust;
460 if (t1 > bbextentt) t1 = bbextentt; else if (t1 < 0) t1 = 0; 451 if (t > bbextentt) t = bbextentt; else if (t < 0) t = 0;
461 // calculate final s/z, t/z, 1/z, s, and t and clamp 452
462 //sdivz += sdivzstepu * (count - 1); 453 //End Jacco Biker mod
463 //tdivz += tdivzstepu * (count - 1); 454
464 //zi += d_zistepu_fxp * (count - 1); 455 // Manoel Kasimier - begin
465 //if (zi == 0) zi = 1; 456 count = pspan->count >> 4;
466#if 0 457 spancount = pspan->count % 16;
467 s2 = (((sdivz << 8) / zi) << 8) + sadjust; 458 // Manoel Kasimier - end
468 if (s2 > bbextents) s2 = bbextents; else if (s2 < 8) s2 = 8; 459
469 t2 = (((tdivz << 8) / zi) << 8) + tadjust; 460 while (count-- >0) // Manoel Kasimier
470 if (t2 > bbextentt) t2 = bbextentt; else if (t2 < 8) t2 = 8; 461 {
471 if (count > 1) 462 // calculate s/z, t/z, zi->fixed s and t at far end of span,
472 { 463 // calculate s and t steps across span by shifting
473 sstep = (s2 - s1) / (count - 1); 464 sdivz += sdivzstepu_fix;
474 tstep = (t2 - t1) / (count - 1); 465 tdivz += tdivzstepu_fix;
475 } 466 zi += zistepu_fix;
476#else 467 if (!zi) zi = 1;
477 //End Jacco Biker mod 468
478 //Dan East: Fixed point conversion for perspective correction 469 snext = (((sdivz<<8)/zi)<<8)+sadjust;
479 do 470 if (snext > bbextents)
480 { 471 snext = bbextents;
481 // calculate s and t at the far end of the span 472 else if (snext < 16)
482 if (count >= 8) 473 snext = 16; // prevent round-off error on <0 steps from causing overstepping & running off the edge of the texture
483 spancount = 8; 474
484 else 475 tnext = (((tdivz<<8)/zi)<<8) + tadjust;
485 spancount = count; 476 if (tnext > bbextentt)
486 477 tnext = bbextentt;
487 count -= spancount; 478 else if (tnext < 16)
488 479 tnext = 16; // guard against round-off error on <0 steps
489 if (count) 480
490 { 481 sstep = (snext - s) >> 4;
491 // calculate s/z, t/z, zi->fixed s and t at far end of span, 482 tstep = (tnext - t) >> 4;
492 // calculate s and t steps across span by shifting 483
493 sdivz += sdivz8stepu; 484 pdest += 16;
494 tdivz += tdivz8stepu; 485 pdest[-16] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
495 zi += zi8stepu; 486 pdest[-15] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
496 if (!zi) zi = 1; 487 pdest[-14] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
497 //z = zi; 488 pdest[-13] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
498 //z = (float)0x10000 / zi; // prescale to 16.16 fixed-point 489 pdest[-12] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
499 snext = (((sdivz<<8)/zi)<<8)+sadjust; 490 pdest[-11] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
500 //snext = (int)(sdivz * z) + sadjust; 491 pdest[-10] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
501 if (snext > bbextents) 492 pdest[ -9] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
502 snext = bbextents; 493 pdest[ -8] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
503 else if (snext < 8) 494 pdest[ -7] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
504 snext = 8; // prevent round-off error on <0 steps from 495 pdest[ -6] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
505 // from causing overstepping & running off the 496 pdest[ -5] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
506 // edge of the texture 497 pdest[ -4] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
507 498 pdest[ -3] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
508 tnext = (((tdivz<<8)/zi)<<8) + tadjust; 499 pdest[ -2] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
509 if (tnext > bbextentt) 500 pdest[ -1] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
510 tnext = bbextentt; 501 // Manoel Kasimier - end
511 else if (tnext < 8) 502
512 tnext = 8; // guard against round-off error on <0 steps 503 s = snext;
513 504 t = tnext;
514 sstep = (snext - s1) >> 3; 505 // Manoel Kasimier - begin
515 tstep = (tnext - t1) >> 3; 506 }
516 } 507 if (spancount > 0)
517 else 508 {
518 { 509 // Manoel Kasimier - end
519 // calculate s/z, t/z, zi->fixed s and t at last pixel in span (so 510 // calculate s/z, t/z, zi->fixed s and t at last pixel in span (so
520 // can't step off polygon), clamp, calculate s and t steps across 511 // can't step off polygon), clamp, calculate s and t steps across
521 // span by division, biasing steps low so we don't run off the 512 // span by division, biasing steps low so we don't run off the
522 // texture 513 // texture
523 spancountminus1 = spancount - 1; 514
524 sdivz += sdivzstepu * spancountminus1; 515 spancountminus1 = spancount - 1;
525 tdivz += tdivzstepu * spancountminus1; 516 sdivz += sdivzstepu * spancountminus1;
526 zi += d_zistepu_fxp * spancountminus1; 517 tdivz += tdivzstepu * spancountminus1;
527 if (!zi) zi = 1; 518 zi += d_zistepu_fxp * spancountminus1;
528 //z = zi;//(float)0x10000 / zi; // prescale to 16.16 fixed-point 519 //if (!zi) zi = 1;
529 snext = (((sdivz<<8) / zi)<<8) + sadjust; 520 //z = zi;//(float)0x10000 / zi; // prescale to 16.16 fixed-point
530 if (snext > bbextents) 521 snext = (((sdivz<<8) / zi)<<8) + sadjust;
531 snext = bbextents; 522 if (snext > bbextents)
532 else if (snext < 8) 523 snext = bbextents;
533 snext = 8; // prevent round-off error on <0 steps from 524 else if (snext < 16)
534 // from causing overstepping & running off the 525 snext = 16; // prevent round-off error on <0 steps from causing overstepping & running off the edge of the texture
535 // edge of the texture 526
536 527 tnext = (((tdivz<<8) / zi)<<8) + tadjust;
537 tnext = (((tdivz<<8) / zi)<<8) + tadjust; 528 if (tnext > bbextentt)
538 if (tnext > bbextentt) 529 tnext = bbextentt;
539 tnext = bbextentt; 530 else if (tnext < 16)
540 else if (tnext < 8) 531 tnext = 16; // guard against round-off error on <0 steps
541 tnext = 8; // guard against round-off error on <0 steps 532
542 533 if (spancount > 1)
543 if (spancount > 1) 534 {
544 { 535 sstep = ((snext - s)) / ((spancount - 1));
545 sstep = ((snext - s1)) / ((spancount - 1)); 536 tstep = ((tnext - t)) / ((spancount - 1));
546 tstep = ((tnext - t1)) / ((spancount - 1)); 537 }
547 } 538
548 } 539
549 do 540 pdest += spancount;
550 { 541 switch (spancount)
551 *pdest++ = *(pbase + (s1 >> 16) + (t1 >> 16) * cachewidth); 542 {
552 s1 += sstep; 543 case 16: pdest[-16] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
553 t1 += tstep; 544 case 15: pdest[-15] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
554 } while (--spancount > 0); 545 case 14: pdest[-14] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
555 546 case 13: pdest[-13] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
556 s1 = snext; 547 case 12: pdest[-12] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
557 t1 = tnext; 548 case 11: pdest[-11] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
558 549 case 10: pdest[-10] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
559 } while (count > 0); 550 case 9: pdest[ -9] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
560#endif 551 case 8: pdest[ -8] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
561#if 0 552 case 7: pdest[ -7] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
562 // Draw span 553 case 6: pdest[ -6] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
563 for ( i = 0; i < count; i++ ) 554 case 5: pdest[ -5] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
564 { 555 case 4: pdest[ -4] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
565 *pdest++ = *(pbase + (s1 >> 16) + (t1 >> 16) * cachewidth); 556 case 3: pdest[ -3] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
566 s1 += sstep; 557 case 2: pdest[ -2] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
567 t1 += tstep; 558 case 1: pdest[ -1] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
568 } 559 break;
569#endif 560 }
570 } while ((pspan = pspan->pnext) != NULL); 561
562 }
563 } while ((pspan = pspan->pnext) != NULL);
571} 564}
572 565
573#endif //USE_PQ_OPT5 566#endif
574 567
575#endif // !id386 568#endif // !id386
576 569