diff options
author | Franklin Wei <franklin@rockbox.org> | 2019-08-09 20:08:10 -0400 |
---|---|---|
committer | Franklin Wei <franklin@rockbox.org> | 2019-08-09 21:27:42 -0400 |
commit | ee70dad305a94709c877e776d723aee64d023cb5 (patch) | |
tree | 4483d1bd7b96e080dcb496e0665252e970d8cdbf /apps/plugins/sdl/progs/quake/r_draw.c | |
parent | 3fffabf50d5597812ffa4381ac70dd6e2af6b392 (diff) | |
download | rockbox-ee70dad305a94709c877e776d723aee64d023cb5.tar.gz rockbox-ee70dad305a94709c877e776d723aee64d023cb5.zip |
quake: merge some fixed-point optimizations from PocketQuake
The PocketQuake project, available below, has some nice fixed-point code:
https://web.archive.org/web/20150412233306/http://quake.pocketmatrix.com/PocketQuake0062_src.zip
I'd like to see most of them merged into our Quake port. This commit
gives +0.9FPS on ipod6g. (A big change given that it was running at
only 9.7FPS to begin with!)
Change-Id: I91931bdd5c22f14fb28616de938a03b4e7d7b076
Diffstat (limited to 'apps/plugins/sdl/progs/quake/r_draw.c')
-rw-r--r-- | apps/plugins/sdl/progs/quake/r_draw.c | 313 |
1 files changed, 313 insertions, 0 deletions
diff --git a/apps/plugins/sdl/progs/quake/r_draw.c b/apps/plugins/sdl/progs/quake/r_draw.c index 8789cc015a..c4bf95f112 100644 --- a/apps/plugins/sdl/progs/quake/r_draw.c +++ b/apps/plugins/sdl/progs/quake/r_draw.c | |||
@@ -44,6 +44,10 @@ clipplane_t *entity_clipplanes; | |||
44 | clipplane_t view_clipplanes[4]; | 44 | clipplane_t view_clipplanes[4]; |
45 | clipplane_t world_clipplanes[16]; | 45 | clipplane_t world_clipplanes[16]; |
46 | 46 | ||
47 | #ifdef USE_PQ_OPT2 | ||
48 | clipplane_fxp_t view_clipplanes_fxp[4]; | ||
49 | #endif | ||
50 | |||
47 | medge_t *r_pedge; | 51 | medge_t *r_pedge; |
48 | 52 | ||
49 | qboolean r_leftclipped, r_rightclipped; | 53 | qboolean r_leftclipped, r_rightclipped; |
@@ -67,6 +71,11 @@ float r_nearzi; | |||
67 | float r_u1, r_v1, r_lzi1; | 71 | float r_u1, r_v1, r_lzi1; |
68 | int r_ceilv1; | 72 | int r_ceilv1; |
69 | 73 | ||
74 | #ifdef USE_PQ_OPT1 | ||
75 | int r_u1_fxp, r_v1_fxp, r_lzi1_fxp; | ||
76 | extern int modelorg_fxp[3]; | ||
77 | #endif | ||
78 | |||
70 | qboolean r_lastvertvalid; | 79 | qboolean r_lastvertvalid; |
71 | 80 | ||
72 | 81 | ||
@@ -250,6 +259,306 @@ void R_EmitEdge (mvertex_t *pv0, mvertex_t *pv1) | |||
250 | removeedges[v2] = edge; | 259 | removeedges[v2] = edge; |
251 | } | 260 | } |
252 | 261 | ||
262 | #ifdef USE_PQ_OPT1 | ||
263 | void R_EmitEdge_fxp (mvertex_t *pv0, mvertex_t *pv1) | ||
264 | { | ||
265 | edge_t *edge, *pcheck; | ||
266 | int u_check; | ||
267 | //float u, u_step; | ||
268 | int u_fxp, u_step_fxp; | ||
269 | //vec3_t local, transformed; | ||
270 | int local_fxp[3], transformed_fxp[3]; | ||
271 | float *world; | ||
272 | int v, v2, ceilv0; | ||
273 | //float scale, lzi0, u0, v0; | ||
274 | int scale_fxp, scale2_fxp, lzi0_fxp, u0_fxp, v0_fxp; | ||
275 | int side; | ||
276 | |||
277 | if (r_lastvertvalid) | ||
278 | { | ||
279 | u0_fxp = r_u1_fxp; | ||
280 | v0_fxp = r_v1_fxp; | ||
281 | lzi0_fxp = r_lzi1_fxp; | ||
282 | //lzi0 = r_lzi1; | ||
283 | ceilv0 = r_ceilv1; | ||
284 | } | ||
285 | else | ||
286 | { | ||
287 | //world_fxp=(int)(pv0->position[0]*(float)(2^16)); | ||
288 | world = &pv0->position[0]; | ||
289 | |||
290 | // transform and project | ||
291 | //VectorSubtract (world, modelorg, local); | ||
292 | //Vector Subtract (and convert) | ||
293 | local_fxp[0]=((int)(world[0]*(524288.0)))-modelorg_fxp[0]; | ||
294 | local_fxp[1]=((int)(world[1]*(524288.0)))-modelorg_fxp[1]; | ||
295 | local_fxp[2]=((int)(world[2]*(524288.0)))-modelorg_fxp[2]; | ||
296 | |||
297 | //TransformVector (local, transformed); | ||
298 | //transformed_fxp[0] = (int)(local_fxp[0]*vright[0])+(int)(local_fxp[1]*vright[1])+(int)(local_fxp[2]*vright[2]); | ||
299 | //transformed_fxp[1] = (int)(local_fxp[0]*vup[0])+(int)(local_fxp[1]*vup[1])+(int)(local_fxp[2]*vup[2]); | ||
300 | //transformed_fxp[2] = (int)(local_fxp[0]*vpn[0])+(int)(local_fxp[1]*vpn[1])+(int)(local_fxp[2]*vpn[2]); | ||
301 | |||
302 | transformed_fxp[0] = local_fxp[0]/vright_fxp[0]+local_fxp[1]/vright_fxp[1]+local_fxp[2]/vright_fxp[2]; | ||
303 | //transformed_fxp[0]*=256; | ||
304 | transformed_fxp[1] = local_fxp[0]/vup_fxp[0]+local_fxp[1]/vup_fxp[1]+local_fxp[2]/vup_fxp[2]; | ||
305 | //transformed_fxp[1]*=256; | ||
306 | transformed_fxp[2] = local_fxp[0]/vpn_fxp[0]+local_fxp[1]/vpn_fxp[1]+local_fxp[2]/vpn_fxp[2]; | ||
307 | transformed_fxp[2]*=256; | ||
308 | |||
309 | if (transformed_fxp[2] < (int)(NEAR_CLIP*1048576.0)) | ||
310 | transformed_fxp[2] = (int)(NEAR_CLIP*1048576.0); | ||
311 | |||
312 | transformed_fxp[0]/=16; | ||
313 | transformed_fxp[1]/=16; | ||
314 | transformed_fxp[2]/=2048; | ||
315 | |||
316 | lzi0_fxp=transformed_fxp[2]; | ||
317 | //lzi0 = (float)(1.0 / transformed[2]); | ||
318 | |||
319 | // FIXME: build x/yscale into transform? | ||
320 | //scale = xscale * lzi0; | ||
321 | //u0 = (xcenter + scale*transformed[0]); | ||
322 | |||
323 | scale_fxp=xscale_fxp/transformed_fxp[2]; //9.23 / 24.8 = 17.15 | ||
324 | scale2_fxp=transformed_fxp[0]*(scale_fxp); // 25.7 * 17.15 = 10.22 | ||
325 | |||
326 | if (transformed_fxp[0]<0) { | ||
327 | if (scale2_fxp>0) scale2_fxp=-511*4194304; | ||
328 | } else { | ||
329 | if (scale2_fxp<0) scale2_fxp=511*4194304; | ||
330 | } | ||
331 | |||
332 | u0_fxp=scale2_fxp+xcenter_fxp; | ||
333 | |||
334 | if (u0_fxp < r_refdef_fvrectx_adj_fxp) | ||
335 | u0_fxp = r_refdef_fvrectx_adj_fxp; | ||
336 | if (u0_fxp > r_refdef_fvrectright_adj_fxp) | ||
337 | u0_fxp = r_refdef_fvrectright_adj_fxp; | ||
338 | |||
339 | //scale = yscale * lzi0; | ||
340 | //v0 = (ycenter - scale*transformed[1]); | ||
341 | scale_fxp=yscale_fxp/transformed_fxp[2]; //9.23 / 24.8 = 17.15 | ||
342 | scale2_fxp=transformed_fxp[1]*(scale_fxp); // 25.7 * 17.15 = 10.22 | ||
343 | |||
344 | if (transformed_fxp[1]<0) { | ||
345 | if (scale2_fxp>0) scale2_fxp=-511*4194304; | ||
346 | } else { | ||
347 | if (scale2_fxp<0) scale2_fxp=511*4194304; //255*8388608; | ||
348 | } | ||
349 | |||
350 | v0_fxp = ycenter_fxp-scale2_fxp; | ||
351 | |||
352 | if (v0_fxp < r_refdef_fvrecty_adj_fxp) | ||
353 | v0_fxp = r_refdef_fvrecty_adj_fxp; | ||
354 | if (v0_fxp > r_refdef_fvrectbottom_adj_fxp) | ||
355 | v0_fxp = r_refdef_fvrectbottom_adj_fxp; | ||
356 | |||
357 | ceilv0 = v0_fxp/4194304; | ||
358 | if (v0_fxp&0x3FFFFF) ceilv0++; | ||
359 | } | ||
360 | |||
361 | //world(pv1->position[0]*(float)(2^16)); | ||
362 | world = &pv1->position[0]; | ||
363 | |||
364 | // transform and project | ||
365 | //VectorSubtract (world, modelorg, local); | ||
366 | //Vector Subtract (and convert) | ||
367 | local_fxp[0]=((int)(world[0]*(524288.0)))-modelorg_fxp[0]; | ||
368 | local_fxp[1]=((int)(world[1]*(524288.0)))-modelorg_fxp[1]; | ||
369 | local_fxp[2]=((int)(world[2]*(524288.0)))-modelorg_fxp[2]; | ||
370 | |||
371 | //TransformVector (local, transformed); | ||
372 | //transformed_fxp[0] = ((int)(local_fxp[0]*vright[0]))+((int)(local_fxp[1]*vright[1]))+((int)(local_fxp[2]*vright[2])); | ||
373 | //transformed_fxp[1] = ((int)(local_fxp[0]*vup[0]))+((int)(local_fxp[1]*vup[1]))+((int)(local_fxp[2]*vup[2])); | ||
374 | //transformed_fxp[2] = ((int)(local_fxp[0]*vpn[0]))+((int)(local_fxp[1]*vpn[1]))+((int)(local_fxp[2]*vpn[2])); | ||
375 | |||
376 | transformed_fxp[0] = local_fxp[0]/vright_fxp[0]+local_fxp[1]/vright_fxp[1]+local_fxp[2]/vright_fxp[2]; | ||
377 | //transformed_fxp[0]*=256; | ||
378 | transformed_fxp[1] = local_fxp[0]/vup_fxp[0]+local_fxp[1]/vup_fxp[1]+local_fxp[2]/vup_fxp[2]; | ||
379 | //transformed_fxp[1]*=256; | ||
380 | transformed_fxp[2] = local_fxp[0]/vpn_fxp[0]+local_fxp[1]/vpn_fxp[1]+local_fxp[2]/vpn_fxp[2]; | ||
381 | transformed_fxp[2]*=256; | ||
382 | |||
383 | //transformed_fxp[2]=-transformed_fxp[2]; | ||
384 | //if (transformed[2] < NEAR_CLIP) | ||
385 | // transformed[2] = (float)NEAR_CLIP; | ||
386 | if (transformed_fxp[2] < (int)(NEAR_CLIP*524288.0)) | ||
387 | transformed_fxp[2] = (int)(NEAR_CLIP*524288.0); | ||
388 | |||
389 | transformed_fxp[0]/=16; | ||
390 | transformed_fxp[1]/=16; | ||
391 | transformed_fxp[2]/=2048; | ||
392 | |||
393 | r_lzi1_fxp=transformed_fxp[2]; | ||
394 | //r_lzi1 = (float)(1.0 / transformed[2]); | ||
395 | //scale = xscale * r_lzi1; | ||
396 | |||
397 | scale_fxp=xscale_fxp/transformed_fxp[2]; //9.23 / 24.8 = 17.15 | ||
398 | scale2_fxp=transformed_fxp[0]*(scale_fxp); // 24.8 * 17.15 = 9.23 //21.11 | ||
399 | |||
400 | if (transformed_fxp[0]<0) { | ||
401 | if (scale2_fxp>0) scale2_fxp=-511*4194304; | ||
402 | } else { | ||
403 | if (scale2_fxp<0) scale2_fxp=511*4194304; | ||
404 | } | ||
405 | |||
406 | //r_u1 = (xcenter + scale*transformed[0]); | ||
407 | r_u1_fxp = xcenter_fxp + scale2_fxp; | ||
408 | if (r_u1_fxp < r_refdef_fvrectx_adj_fxp) | ||
409 | r_u1_fxp = r_refdef_fvrectx_adj_fxp; | ||
410 | if (r_u1_fxp > r_refdef_fvrectright_adj_fxp) | ||
411 | r_u1_fxp = r_refdef_fvrectright_adj_fxp; | ||
412 | |||
413 | //scale = yscale * r_lzi1; | ||
414 | //r_v1 = (ycenter - scale*transformed[1]); | ||
415 | |||
416 | scale_fxp=yscale_fxp/transformed_fxp[2]; //9.23 / 24.8 = 17.15 | ||
417 | scale2_fxp=transformed_fxp[1]*(scale_fxp); // 23.9 * 17.15 = 9.23 //21.11 | ||
418 | |||
419 | if (transformed_fxp[1]<0) { | ||
420 | if (scale2_fxp>0) scale2_fxp=-511*4194304; | ||
421 | } else { | ||
422 | if (scale2_fxp<0) scale2_fxp=511*4194304; | ||
423 | } | ||
424 | |||
425 | r_v1_fxp = ycenter_fxp - scale2_fxp; | ||
426 | if (r_v1_fxp < r_refdef_fvrecty_adj_fxp) | ||
427 | r_v1_fxp = r_refdef_fvrecty_adj_fxp; | ||
428 | if (r_v1_fxp > r_refdef_fvrectbottom_adj_fxp) | ||
429 | r_v1_fxp = r_refdef_fvrectbottom_adj_fxp; | ||
430 | |||
431 | //if (r_lzi1 > lzi0) | ||
432 | // lzi0 = r_lzi1; | ||
433 | if (r_lzi1_fxp < lzi0_fxp) | ||
434 | lzi0_fxp = r_lzi1_fxp; | ||
435 | |||
436 | //if (lzi0 > r_nearzi) // for mipmap finding | ||
437 | // r_nearzi = lzi0; | ||
438 | if (128.0/lzi0_fxp > r_nearzi) { // for mipmap finding | ||
439 | //if (!lzi0_fxp) r_nearzi=0; | ||
440 | //else | ||
441 | r_nearzi = (float)(128.0/lzi0_fxp); | ||
442 | } | ||
443 | |||
444 | // for right edges, all we want is the effect on 1/z | ||
445 | if (r_nearzionly) | ||
446 | return; | ||
447 | |||
448 | r_emitted = 1; | ||
449 | |||
450 | //r_ceilv1 = (int) ceil(r_v1); | ||
451 | r_ceilv1 = r_v1_fxp/4194304; | ||
452 | if (r_v1_fxp&0x3FFFFF) r_ceilv1++; | ||
453 | |||
454 | |||
455 | // create the edge | ||
456 | if (ceilv0 == r_ceilv1) | ||
457 | { | ||
458 | // we cache unclipped horizontal edges as fully clipped | ||
459 | if (cacheoffset != 0x7FFFFFFF) | ||
460 | { | ||
461 | cacheoffset = FULLY_CLIPPED_CACHED | | ||
462 | (r_framecount & FRAMECOUNT_MASK); | ||
463 | } | ||
464 | |||
465 | return; // horizontal edge | ||
466 | } | ||
467 | |||
468 | side = ceilv0 > r_ceilv1; | ||
469 | |||
470 | edge = edge_p++; | ||
471 | |||
472 | edge->owner = NULL; | ||
473 | |||
474 | edge->owner = r_pedge; | ||
475 | |||
476 | edge->nearzi = (float)(128.0/lzi0_fxp); | ||
477 | |||
478 | { | ||
479 | //float tmp; | ||
480 | if (side == 0) | ||
481 | { | ||
482 | //int tmp; | ||
483 | // trailing edge (go from p1 to p2) | ||
484 | v = ceilv0; | ||
485 | v2 = r_ceilv1 - 1; | ||
486 | |||
487 | edge->surfs[0] = surface_p - surfaces; | ||
488 | edge->surfs[1] = 0; | ||
489 | |||
490 | //u_step = ((r_u1 - u0) / (r_v1 - v0)); | ||
491 | //u = u0 + ((float)v - v0) * u_step; | ||
492 | |||
493 | u_step_fxp=(r_u1_fxp - u0_fxp) / ((r_v1_fxp - v0_fxp)>>10); //10.22 / 15.12 = 22.10 | ||
494 | u_fxp = u0_fxp + ((v*4194304 - v0_fxp)>>12 * u_step_fxp>>12); | ||
495 | |||
496 | //tmp=(((r_u1_fxp - u0_fxp)/8388608.0) / ((r_v1_fxp - v0_fxp)/8388608.0)); | ||
497 | //u_step_fxp=(int)(tmp*8388608.0); | ||
498 | //u_fxp = u0_fxp + (((float)v - v0_fxp/8388608.0) * tmp)*8388608.0; | ||
499 | } | ||
500 | else | ||
501 | { | ||
502 | //int tmp; | ||
503 | // leading edge (go from p2 to p1) | ||
504 | v2 = ceilv0 - 1; | ||
505 | v = r_ceilv1; | ||
506 | |||
507 | edge->surfs[0] = 0; | ||
508 | edge->surfs[1] = surface_p - surfaces; | ||
509 | |||
510 | //u_step = ((u0 - r_u1) / (v0 - r_v1)); | ||
511 | //u = r_u1 + ((float)v - r_v1) * u_step; | ||
512 | |||
513 | u_step_fxp = (u0_fxp - r_u1_fxp) / ((v0_fxp - r_v1_fxp)>>10); | ||
514 | u_fxp = r_u1_fxp + ((v*4194304 - r_v1_fxp)>>12 * u_step_fxp>>12); | ||
515 | |||
516 | //tmp=(((u0_fxp - r_u1_fxp)/8388608.0) / ((v0_fxp - r_v1_fxp)/8388608.0)); | ||
517 | //u_step_fxp = (int)(tmp*8388608.0); | ||
518 | //u_fxp = r_u1_fxp + (((float)v - r_v1_fxp/8388608.0) * tmp)*8388608.0; | ||
519 | } | ||
520 | //edge->u_step = tmp*0x100000; | ||
521 | //edge->u = (int)(/*(u_fxp/65536)*0x100000*/u_fxp/4 + 0xFFFFF); | ||
522 | |||
523 | edge->u_step = u_step_fxp*1024;///16; //tmp*0x100000; | ||
524 | edge->u = (int)(/*(u_fxp/65536)*0x100000*/u_fxp/4 + 0xFFFFF); | ||
525 | } | ||
526 | |||
527 | // we need to do this to avoid stepping off the edges if a very nearly | ||
528 | // horizontal edge is less than epsilon above a scan, and numeric error causes | ||
529 | // it to incorrectly extend to the scan, and the extension of the line goes off | ||
530 | // the edge of the screen | ||
531 | // FIXME: is this actually needed? | ||
532 | if (edge->u < r_refdef.vrect_x_adj_shift20) | ||
533 | edge->u = r_refdef.vrect_x_adj_shift20; | ||
534 | if (edge->u > r_refdef.vrectright_adj_shift20) | ||
535 | edge->u = r_refdef.vrectright_adj_shift20; | ||
536 | |||
537 | // | ||
538 | // sort the edge in normally | ||
539 | // | ||
540 | u_check = edge->u; | ||
541 | if (edge->surfs[0]) | ||
542 | u_check++; // sort trailers after leaders | ||
543 | |||
544 | if (!newedges[v] || newedges[v]->u >= u_check) | ||
545 | { | ||
546 | edge->next = newedges[v]; | ||
547 | newedges[v] = edge; | ||
548 | } | ||
549 | else | ||
550 | { | ||
551 | pcheck = newedges[v]; | ||
552 | while (pcheck->next && pcheck->next->u < u_check) | ||
553 | pcheck = pcheck->next; | ||
554 | edge->next = pcheck->next; | ||
555 | pcheck->next = edge; | ||
556 | } | ||
557 | |||
558 | edge->nextremove = removeedges[v2]; | ||
559 | removeedges[v2] = edge; | ||
560 | } | ||
561 | #endif | ||
253 | 562 | ||
254 | /* | 563 | /* |
255 | ================ | 564 | ================ |
@@ -349,7 +658,11 @@ static inline void R_ClipEdge (mvertex_t *pv0, mvertex_t *pv1, clipplane_t *clip | |||
349 | } | 658 | } |
350 | 659 | ||
351 | // add the edge | 660 | // add the edge |
661 | #ifndef USE_PQ_OPT1 | ||
352 | R_EmitEdge (pv0, pv1); | 662 | R_EmitEdge (pv0, pv1); |
663 | #else | ||
664 | R_EmitEdge_fxp (pv0, pv1); | ||
665 | #endif | ||
353 | } | 666 | } |
354 | 667 | ||
355 | #endif // !id386 | 668 | #endif // !id386 |