From 29ab31e8f1c68dd89dad1e9a92fe3c8a8dd223a9 Mon Sep 17 00:00:00 2001 From: Karl Kurbjun Date: Thu, 20 Apr 2006 19:39:56 +0000 Subject: Optimizations for doom: coldfire asm drawspan routine = not much, fixed point multiply changes = not much, H300 asm lcd update = some, IRAM sound updates and simplifications = more git-svn-id: svn://svn.rockbox.org/rockbox/trunk@9747 a1c6a512-1295-4272-9138-f99709370657 --- apps/plugins/doom/i_sound.c | 44 +++++++++++++------------------------ apps/plugins/doom/i_video.c | 48 +++++++++++++++++++++++++++++++++++----- apps/plugins/doom/m_fixed.h | 12 +++++----- apps/plugins/doom/r_draw.c | 53 ++++++++++++++++++++++++++++++++++++--------- 4 files changed, 107 insertions(+), 50 deletions(-) diff --git a/apps/plugins/doom/i_sound.c b/apps/plugins/doom/i_sound.c index 7579ada5b5..271444527f 100644 --- a/apps/plugins/doom/i_sound.c +++ b/apps/plugins/doom/i_sound.c @@ -48,11 +48,11 @@ // mixing buffer, and the samplerate of the raw data. // Needed for calling the actual sound output. -#define SAMPLECOUNT 512 +#define SAMPLECOUNT 512 -#define NUM_CHANNELS 16 +#define NUM_CHANNELS 16 // It is 2 for 16bit, and 2 for two channels. -#define BUFMUL 4 +#define BUFMUL 2 #define MIXBUFFERSIZE (SAMPLECOUNT*BUFMUL) #if (CONFIG_KEYPAD == IPOD_3G_PAD) || (CONFIG_KEYPAD == IPOD_4G_PAD) @@ -66,7 +66,7 @@ // Basically, samples from all active internal channels // are modifed and added, and stored in the buffer // that is submitted to the audio device. -signed short *mixbuffer=NULL; +signed short mixbuffer[MIXBUFFERSIZE] IBSS_ATTR; typedef struct { // SFX id of the playing sound effect. @@ -91,7 +91,7 @@ typedef struct { int *rightvol_lookup; } channel_info_t; -channel_info_t channelinfo[NUM_CHANNELS]; +channel_info_t channelinfo[NUM_CHANNELS] IBSS_ATTR; int *vol_lookup; // Volume lookups. @@ -355,13 +355,6 @@ int I_SoundIsPlaying(int handle) // This function currently supports only 16bit. // -bool swap=0; -bool lastswap=1; - // Pointers in global mixbuffer, left, right, end. - signed short* leftout; - signed short* rightout; - signed short* leftend; - void I_UpdateSound( void ) { // Mix current sound data. @@ -370,25 +363,26 @@ void I_UpdateSound( void ) register int dl; register int dr; + // Pointers in global mixbuffer, left, right, end. + signed short* leftout; + signed short* rightout; + signed short* leftend; + // Step in mixbuffer, left and right, thus two. int step; // Mixing channel index. int chan; - if(lastswap==swap) - return; - lastswap=swap; - // Left and right channel // are in global mixbuffer, alternating. - leftout = (swap ? mixbuffer : mixbuffer + SAMPLECOUNT*2); - rightout = (swap ? mixbuffer : mixbuffer + SAMPLECOUNT*2)+1; + leftout = mixbuffer; + rightout = mixbuffer +1; step = 2; // Determine end, for left channel only // (right channel is implicit). - leftend = (swap ? mixbuffer : mixbuffer + SAMPLECOUNT*2) + SAMPLECOUNT*step; + leftend = mixbuffer + SAMPLECOUNT*step; // Mix sounds into the mixing buffer. // Loop over step*SAMPLECOUNT, @@ -467,15 +461,10 @@ void I_UpdateSound( void ) void get_more(unsigned char** start, size_t* size) { - // This code works fine, the only problem is that doom runs slower then the sound - // updates (sometimes). This code forces the update if the sound hasn't been - // remixed. - if(lastswap!=swap) - I_UpdateSound(); // Force sound update (We don't want stutters) + I_UpdateSound(); // Force sound update - *start = (unsigned char*)((swap ? mixbuffer : mixbuffer + SAMPLECOUNT*2)); + *start = (unsigned char*)(mixbuffer); *size = SAMPLECOUNT*2*sizeof(short); - swap=!swap; } @@ -520,9 +509,6 @@ void I_InitSound() printf( " pre-cached all sound data\n"); - if(mixbuffer==NULL) - mixbuffer=malloc(sizeof(short)*MIXBUFFERSIZE); - // Now initialize mixbuffer with zero. for ( i = 0; i< MIXBUFFERSIZE; i++ ) mixbuffer[i] = 0; diff --git a/apps/plugins/doom/i_video.c b/apps/plugins/doom/i_video.c index a4db5f2672..db6adfa75e 100644 --- a/apps/plugins/doom/i_video.c +++ b/apps/plugins/doom/i_video.c @@ -16,7 +16,10 @@ * GNU General Public License for more details. * * $Log$ - * Revision 1.15 2006/04/16 23:14:04 kkurbjun + * Revision 1.16 2006/04/20 19:39:56 kkurbjun + * Optimizations for doom: coldfire asm drawspan routine = not much, fixed point multiply changes = not much, H300 asm lcd update = some, IRAM sound updates and simplifications = more + * + * Revision 1.15 2006-04-16 23:14:04 kkurbjun * Fix run so that it stays enabled across level loads. Removed some unused code and added some back in for hopeful future use. * * Revision 1.14 2006-04-15 22:08:36 kkurbjun @@ -359,10 +362,43 @@ static void I_UploadNewPalette(int pal) void I_FinishUpdate (void) { #if (CONFIG_LCD == LCD_H300) && !defined(SIMULATOR) - /* - Lookup tables are no longer needed (H300 specific, decreases timedemo - by about 500 tics) - */ + +#if 1 + /* ASM screen update (drops 600 tics (100 asm)) */ + asm ( + "move.w #33,(%[LCD]) \n" /* Setup the LCD controller */ + "clr.w (%[LCD2]) \n" + "move.w #34,(%[LCD]) \n" /* End LCD controller setup */ + "move.l #220,%%d2 \n" + "move.l #176,%%d3 \n" + "clr.l %%d1 \n" + "widthloop: \n" + "move.b (%[screenptr])+, %%d1 \n" /* Unrolled by 5 */ + "move.w (%[palette], %%d1.l:2), (%[LCD2]) \n" + "move.b (%[screenptr])+, %%d1 \n" + "move.w (%[palette], %%d1.l:2), (%[LCD2]) \n" + "move.b (%[screenptr])+, %%d1 \n" + "move.w (%[palette], %%d1.l:2), (%[LCD2]) \n" + "move.b (%[screenptr])+, %%d1 \n" + "move.w (%[palette], %%d1.l:2), (%[LCD2]) \n" + "move.b (%[screenptr])+, %%d1 \n" + "move.w (%[palette], %%d1.l:2), (%[LCD2]) \n" + "subq.l #5,%%d2 \n" + "bne widthloop \n" + "move.w #220,%%d2 \n" + "subq.l #1,%%d3 \n" + "bne widthloop \n" + : /* outputs */ + : /* inputs */ + [screenptr] "a" (d_screens[0]), + [palette] "a" (palette), + [LCD] "a" (0xf0000000), + [LCD2] "a" (0xf0000002) + : /* clobbers */ + "d1", "d2", "d3" + ); +#else + /* C version of above (drops 500 tics) */ // Start the write *(volatile unsigned short *) 0xf0000000 = 0x21; // register @@ -383,6 +419,8 @@ void I_FinishUpdate (void) wcnt=0; hcnt++; } +#endif + #else unsigned char paletteIndex; int x, y; diff --git a/apps/plugins/doom/m_fixed.h b/apps/plugins/doom/m_fixed.h index 3c922e8f50..e29933befd 100644 --- a/apps/plugins/doom/m_fixed.h +++ b/apps/plugins/doom/m_fixed.h @@ -47,15 +47,15 @@ inline static int FixedMul( int a, int b ) #if defined(CPU_COLDFIRE) && !defined(SIMULATOR) // Code contributed by Thom Johansen register int result; - asm volatile ( + asm ( "mac.l %[x],%[y],%%acc0 \n" /* multiply */ "move.l %[y],%%d2 \n" "mulu.l %[x],%%d2 \n" /* get lower half, avoid emac stall */ "movclr.l %%acc0,%[result] \n" /* get higher half */ - "moveq.l #15,%%d1 \n" - "asl.l %%d1,%[result] \n" /* hi <<= 15, plus one free */ - "moveq.l #16,%%d1 \n" - "lsr.l %%d1,%%d2 \n" /* (unsigned)lo >>= 16 */ + "asl.l #8,%[result] \n" /* hi <<= 15, plus one free */ + "asl.l #7,%[result] \n" /* hi <<= 15, plus one free */ + "lsr.l #8,%%d2 \n" /* (unsigned)lo >>= 16 */ + "lsr.l #8,%%d2 \n" /* (unsigned)lo >>= 16 */ "or.l %%d2 ,%[result] \n" /* combine result */ : /* outputs */ [result]"=&d"(result) @@ -63,7 +63,7 @@ inline static int FixedMul( int a, int b ) [x] "d" (a), [y] "d" (b) : /* clobbers */ - "d1", "d2" + "d2" ); return result; #else diff --git a/apps/plugins/doom/r_draw.c b/apps/plugins/doom/r_draw.c index a6bc21e420..5f45323a36 100644 --- a/apps/plugins/doom/r_draw.c +++ b/apps/plugins/doom/r_draw.c @@ -526,16 +526,48 @@ byte *ds_source IBSS_ATTR; void R_DrawSpan (void) { - register unsigned count,xfrac = ds_xfrac,yfrac = ds_yfrac; - - byte *source; - byte *colormap; - byte *dest; - - source = ds_source; - colormap = ds_colormap; - dest = topleft + ds_y*SCREENWIDTH + ds_x1; - count = ds_x2 - ds_x1 + 1; +#if defined(CPU_COLDFIRE) && !defined(SIMULATOR) + // only slightly faster + asm volatile ( + "tst %[count] \n" + "beq endspanloop \n" + "clr.l %%d4 \n" + "spanloop: \n" + "move.l %[xfrac], %%d1 \n" + "move.l %[yfrac], %%d2 \n" + "lsr.l #8,%%d1 \n" + "lsr.l #8,%%d2 \n" + "lsr.l #8,%%d1 \n" + "lsr.l #2,%%d2 \n" + "and.l #63,%%d1 \n" + "and.l #4032,%%d2 \n" + "or.l %%d2, %%d1 \n" + "move.b (%[source], %%d1), %%d4 \n" + "add.l %[ds_xstep], %[xfrac] \n" + "add.l %[ds_ystep], %[yfrac] \n" + "move.b (%[colormap],%%d4.l), (%[dest])+ \n" + "subq.l #1, %[count] \n" + "bne spanloop \n" + "endspanloop: \n" + : /* outputs */ + : /* inputs */ + [count] "d" (ds_x2-ds_x1+1), + [xfrac] "d" (ds_xfrac), + [yfrac] "d" (ds_yfrac), + [source] "a" (ds_source), + [colormap] "a" (ds_colormap), + [dest] "a" (topleft+ds_y*SCREENWIDTH +ds_x1), + [ds_xstep] "d" (ds_xstep), + [ds_ystep] "d" (ds_ystep) + : /* clobbers */ + "d1", "d2", "d4" + ); +#else + register unsigned count = ds_x2 - ds_x1 + 1,xfrac = ds_xfrac,yfrac = ds_yfrac; + + register byte *source = ds_source; + register byte *colormap = ds_colormap; + register byte *dest = topleft + ds_y*SCREENWIDTH + ds_x1; while (count) { @@ -550,6 +582,7 @@ void R_DrawSpan (void) *dest++ = colormap[source[spot]]; count--; } +#endif } // -- cgit v1.2.3