From c546b70e18b7e0390c032e0fe0de15441cd7ed1b Mon Sep 17 00:00:00 2001 From: Jörg Hohensohn Date: Mon, 26 Jan 2004 22:26:54 +0000 Subject: Really squeezed lcd_write_data() with tricky assembler optimization. Now it does 114 fps when playing video from memory (way more than the ~75 fps needed, I need to slow down the video player), leaving headroom for audio. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@4278 a1c6a512-1295-4272-9138-f99709370657 --- firmware/drivers/lcd.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 110 insertions(+), 6 deletions(-) (limited to 'firmware') diff --git a/firmware/drivers/lcd.c b/firmware/drivers/lcd.c index 4925ea63d2..5c2c514f10 100644 --- a/firmware/drivers/lcd.c +++ b/firmware/drivers/lcd.c @@ -7,7 +7,7 @@ * \/ \/ \/ \/ \/ * $Id$ * - * Copyright (C) 2002 by Alan Korr + * Copyright (C) 2002 by Alan Korr, speedup by Jörg Hohensohn * * All files in this archive are subject to the GNU General Public License. * See the file COPYING in the source tree root for full license agreement. @@ -180,9 +180,10 @@ void lcd_write(bool command, int byte) Ultimately, all calls to lcd_write(false, xxx) should be substituted by this, it will be most efficient if the LCD buffer is tilted to have the X row as consecutive bytes, so we can write a whole row */ -/* FixMe: somehow the red LED is affected by this, although I don't touch - any other bit. Therefore not used yet, except for lcd_blit() */ void lcd_write_data(unsigned char* p_bytes, int count) __attribute__ ((section (".icode"))); + +#ifdef HAVE_LCD_CHARCELLS +/* This version works for both Player and Recorder models */ void lcd_write_data(unsigned char* p_bytes, int count) { do @@ -193,7 +194,8 @@ void lcd_write_data(unsigned char* p_bytes, int count) byte = *p_bytes++ << 24; /* fetch to MSB position */ - cli(); /* make port modifications atomic */ + cli(); /* make port modifications atomic, in case an IRQ uses PBDRL */ + /* (currently not the case, so this could be optimized away) */ /* precalculate the values for later bit toggling, init data write */ asm ( @@ -268,9 +270,9 @@ void lcd_write_data(unsigned char* p_bytes, int count) "mov.b %3,@%4\n" "bf 1f\n" - "mov.b %1,@%4\n" + "mov.b %1,@%4\n" /* set SD high, SC low still */ "1: \n" - "or.b %2, @(r0,gbr)\n" + "or.b %2, @(r0,gbr)\n" /* rise SC (independent of SD level) */ "or.b %5, @(r0,gbr)\n" /* restore port */ : @@ -287,3 +289,105 @@ void lcd_write_data(unsigned char* p_bytes, int count) } while (--count); /* tail loop is faster */ } + +#else /* #ifdef HAVE_LCD_CHARCELLS */ +/* A further optimized version, exploits that SD is on bit 0 for recorders */ +void lcd_write_data(unsigned char* p_bytes, int count) +{ + do + { + unsigned byte; + unsigned sda1; /* precalculated SC=low,SD=1 */ + + /* take inverse data, so I can use the NEGC instruction below, it is + the only carry add/sub which does not destroy a source register */ + byte = ~(*p_bytes++ << 24); /* fetch to MSB position */ + + cli(); /* make port modifications atomic, in case an IRQ uses PBDRL */ + /* (currently not the case, so this could be optimized away) */ + + /* precalculate the values for later bit toggling, init data write */ + asm ( + "mov.b @%1,r0\n" /* r0 = PBDRL */ + "or %3,r0\n" /* r0 |= LCD_DS | LCD_SD DS and SD high, */ + "and %2,r0\n" /* r0 &= ~(LCD_CS | LCD_SC) CS and SC low */ + "mov.b r0,@%1\n" /* PBDRL = r0 */ + "neg r0,%0\n" /* sda1 = 0-r0 */ + : /* outputs: */ + /* %0 */ "=r"(sda1) + : /* inputs: */ + /* %1 */ "r"(LCDR), + /* %2 */ "I"(~(LCD_CS | LCD_SC)), + /* %3 */ "I"(LCD_DS | LCD_SD) + : /* trashed */ + "r0" + ); + + /* unrolled loop to serialize the byte */ + asm ( + "shll %0 \n" /* shift the MSB into carry */ + "negc %1, r0\n" /* carry to SD, SC low */ + "mov.b r0,@%3\n" /* set data to port */ + "or %2, r0\n" /* rise SC (independent of SD level) */ + "mov.b r0,@%3\n" /* set to port */ + + "shll %0 \n" + "negc %1, r0\n" + "mov.b r0,@%3\n" + "or %2, r0\n" + "mov.b r0,@%3\n" + + "shll %0 \n" + "negc %1, r0\n" + "mov.b r0,@%3\n" + "or %2, r0\n" + "mov.b r0,@%3\n" + + "shll %0 \n" + "negc %1, r0\n" + "mov.b r0,@%3\n" + "or %2, r0\n" + "mov.b r0,@%3\n" + + "shll %0 \n" + "negc %1, r0\n" + "mov.b r0,@%3\n" + "or %2, r0\n" + "mov.b r0,@%3\n" + + "shll %0 \n" + "negc %1, r0\n" + "mov.b r0,@%3\n" + "or %2, r0\n" + "mov.b r0,@%3\n" + + "shll %0 \n" + "negc %1, r0\n" + "mov.b r0,@%3\n" + "or %2, r0\n" + "mov.b r0,@%3\n" + + "shll %0 \n" + "negc %1, r0\n" + "mov.b r0,@%3\n" + "or %2, r0\n" + "mov.b r0,@%3\n" + + "or %4, r0\n" /* restore port */ + "mov.b r0,@%3\n" + : /* outputs: */ + : /* inputs: */ + /* %0 */ "r"(byte), + /* %1 */ "r"(sda1), + /* %2 */ "I"(LCD_SC), + /* %3 */ "r"(LCDR), + /* %4 */ "I"(LCD_CS|LCD_DS|LCD_SD|LCD_SC) + : /* trashed: */ + "r0" + ); + + sti(); /* end of atomic port modifications */ + + } while (--count); /* tail loop is faster */ +} +#endif /* #ifdef HAVE_LCD_CHARCELLS */ -- cgit v1.2.3