From e48cc2a491b205a96e2b5aa8c4d02f4cf84ae724 Mon Sep 17 00:00:00 2001 From: Jens Arnold Date: Mon, 10 May 2004 11:38:24 +0000 Subject: Fully assembler optimized lcd driver (another 10% real-world speedup on recorder), replaces lcd.c git-svn-id: svn://svn.rockbox.org/rockbox/trunk@4602 a1c6a512-1295-4272-9138-f99709370657 --- firmware/drivers/lcd.S | 307 +++++++++++++++++++++++++++++++++++++++++ firmware/drivers/lcd.c | 362 ------------------------------------------------- 2 files changed, 307 insertions(+), 362 deletions(-) create mode 100755 firmware/drivers/lcd.S delete mode 100644 firmware/drivers/lcd.c diff --git a/firmware/drivers/lcd.S b/firmware/drivers/lcd.S new file mode 100755 index 0000000000..3ab993c1d0 --- /dev/null +++ b/firmware/drivers/lcd.S @@ -0,0 +1,307 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2004 by Jens Arnold + * Based on the work of Alan Korr and Jörg Hohensohn + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "sh7034.h" + +#define LCDR (PBDR_ADDR+1) + +#ifdef HAVE_LCD_CHARCELLS +#define LCD_DS 1 /* PB0 = 1 --- 0001 --- LCD-DS */ +#define LCD_CS 2 /* PB1 = 1 --- 0010 --- /LCD-CS */ +#define LCD_SD 4 /* PB2 = 1 --- 0100 --- LCD-SD */ +#define LCD_SC 8 /* PB3 = 1 --- 1000 --- LCD-SC */ +#else +#define LCD_SD 1 /* PB0 = 1 --- 0001 */ +#define LCD_SC 2 /* PB1 = 1 --- 0010 */ +#define LCD_RS 4 /* PB2 = 1 --- 0100 */ +#define LCD_CS 8 /* PB3 = 1 --- 1000 */ +#define LCD_DS LCD_RS +#endif + +/* + * About /CS,DS,SC,SD + * ------------------ + * + * LCD on JBP and JBR uses a SPI protocol to receive orders (SDA and SCK lines) + * + * - /CS -> Chip Selection line : + * 0 : LCD chipset is activated. + * - DS -> Data Selection line, latched at the rising edge + * of the 8th serial clock (*) : + * 0 : instruction register, + * 1 : data register; + * - SC -> Serial Clock line (SDA). + * - SD -> Serial Data line (SCK), latched at the rising edge + * of each serial clock (*). + * + * _ _ + * /CS \ / + * \______________________________________________________/ + * _____ ____ ____ ____ ____ ____ ____ ____ ____ _____ + * SD \/ D7 \/ D6 \/ D5 \/ D4 \/ D3 \/ D2 \/ D1 \/ D0 \/ + * _____/\____/\____/\____/\____/\____/\____/\____/\____/\_____ + * + * _____ _ _ _ _ _ _ _ ________ + * SC \ * \ * \ * \ * \ * \ * \ * \ * + * \_/ \_/ \_/ \_/ \_/ \_/ \_/ \_/ + * _ _________________________________________________________ + * DS \/ + * _/\_________________________________________________________ + * + */ + + .section .icode,"ax",@progbits + + .align 2 + .global _lcd_write_command + .type _lcd_write_command,@function + +/* Write a command byte to the lcd controller + * + * Arguments: + * r4 - data byte (int) + * + * Register usage: + * r0 - scratch + * r1 - data byte (copied) + * r2 - precalculated port value (CS, DS and SC low, SD high), + * negated (neg)! + * r3 - lcd port address + * r5 - 1 (byte count for reuse of the loop in _lcd_write_data) + */ + +_lcd_write_command: + mov.l .lcdr,r3 /* put lcd data port address in r3 */ + mov r4,r1 /* copy data byte to r1 */ + mov #1,r5 /* set byte count to 1 (!) */ + + /* This code will fail if an interrupt changes the contents of PBDRL. + * If so, we must disable the interrupt here. */ + + mov.b @r3,r0 /* r0 = PBDRL */ + or #(LCD_SD),r0 /* r0 |= LCD_SD */ + and #(~(LCD_CS|LCD_DS|LCD_SC)),r0 /* r0 &= ~(LCD_CS|LCD_DS|LCD_SC) */ + + bra .single_transfer /* jump into the transfer loop */ + neg r0,r2 /* r2 = 0 - r0 */ + + + .align 2 + .global _lcd_write_data + .type _lcd_write_data,@function + + +/* A high performance function to write data to the display, + * one or multiple bytes. + * + * Arguments: + * r4 - data address + * r5 - byte count + * + * Register usage: + * r0 - scratch + * r1 - current data byte + * r2 - precalculated port value (CS and SC low, DS and SD high), + * negated (neg)! + * r3 - lcd port address + */ + +_lcd_write_data: + mov.l .lcdr,r3 /* put lcd data port address in r3 */ + nop /* align here */ + + /* This code will fail if an interrupt changes the contents of PBDRL. + * If so, we must disable the interrupt here. If disabling interrupts + * for a long time (~9200 clks = ~830 µs for transferring 112 bytes on + * recorders)is undesirable, the loop has to be rewritten to + * disable/precalculate/transfer/enable for each iteration. However, + * this would significantly decrease performance. */ + + mov.b @r3,r0 /* r0 = PBDRL */ + or #(LCD_DS|LCD_SD),r0 /* r0 |= LCD_DS|LCD_SD */ + and #(~(LCD_CS|LCD_SC)),r0 /* r0 &= ~(LCD_CS|LCD_SC) */ + neg r0,r2 /* r2 = 0 - r0 */ + +#ifdef HAVE_LCD_CHARCELLS +/* optimized player version, also works for recorders */ + + .align 2 +.multi_transfer: + mov.b @r4+,r1 /* load data byte from memory */ + +.single_transfer: + shll16 r1 /* shift data to most significant byte */ + shll8 r1 + + shll r1 /* shift the msb into carry */ + neg r2,r0 /* copy negated precalculated port value */ + /* uses neg here for compatibility with recorder version */ + bt 1f /* data bit = 1? */ + and #(~LCD_SD),r0 /* no: r0 &= ~LCD_SD */ + 1: + shll r1 /* next shift here for alignment */ + mov.b r0,@r3 /* set data to port */ + or #(LCD_SC),r0 /* rise SC (independent of SD level) */ + mov.b r0,@r3 /* set to port */ + + neg r2,r0 + bt 1f + and #(~LCD_SD),r0 + 1: + mov.b r0,@r3 + or #(LCD_SC),r0 + mov.b r0,@r3 + + shll r1 + neg r2,r0 + bt 1f + and #(~LCD_SD),r0 + 1: + shll r1 + mov.b r0,@r3 + or #(LCD_SC),r0 + mov.b r0,@r3 + + neg r2,r0 + bt 1f + and #(~LCD_SD),r0 + 1: + mov.b r0,@r3 + or #(LCD_SC),r0 + mov.b r0,@r3 + + shll r1 + neg r2,r0 + bt 1f + and #(~LCD_SD),r0 + 1: + shll r1 + mov.b r0,@r3 + or #(LCD_SC),r0 + mov.b r0,@r3 + + neg r2,r0 + bt 1f + and #(~LCD_SD),r0 + 1: + mov.b r0,@r3 + or #(LCD_SC),r0 + mov.b r0,@r3 + + shll r1 + neg r2,r0 + bt 1f + and #(~LCD_SD),r0 + 1: + shll r1 + mov.b r0,@r3 + or #(LCD_SC),r0 + mov.b r0,@r3 + + neg r2,r0 + bt 1f + and #(~LCD_SD),r0 + 1: + mov.b r0,@r3 + or #(LCD_SC),r0 + mov.b r0,@r3 + +#else /* HAVE_LCD_CHARCELLS */ +/* further optimized version, exploits that SD is on bit 0 for recorders */ + + .align 2 +.multi_transfer: + mov.b @r4+,r1 /* load data byte from memory */ + nop + +.single_transfer: + shll16 r1 /* shift data to most significant byte */ + shll8 r1 + not r1,r1 /* and invert for use with negc */ + + shll r1 /* shift the MSB into carry */ + negc r2,r0 /* carry to SD, SC low */ + shll r1 /* next shift here for alignment */ + mov.b r0,@r3 /* set data to port */ + or #(LCD_SC),r0 /* rise SC (independent of SD level) */ + mov.b r0,@r3 /* set to port */ + + negc r2,r0 + mov.b r0,@r3 + or #(LCD_SC),r0 + mov.b r0,@r3 + + shll r1 + negc r2,r0 + shll r1 + mov.b r0,@r3 + or #(LCD_SC),r0 + mov.b r0,@r3 + + negc r2,r0 + mov.b r0,@r3 + or #(LCD_SC),r0 + mov.b r0,@r3 + + shll r1 + negc r2,r0 + shll r1 + mov.b r0,@r3 + or #(LCD_SC),r0 + mov.b r0,@r3 + + negc r2,r0 + mov.b r0,@r3 + or #(LCD_SC),r0 + mov.b r0,@r3 + + shll r1 + negc r2,r0 + shll r1 + mov.b r0,@r3 + or #(LCD_SC),r0 + mov.b r0,@r3 + + negc r2,r0 + mov.b r0,@r3 + or #(LCD_SC),r0 + mov.b r0,@r3 + +#endif /* HAVE_LCD_CHARCELLS */ + + add #-1,r5 /* decrease byte count */ + tst r5,r5 /* r5 == 0 ? */ + bf .multi_transfer /* no: next iteration */ + + or #(LCD_CS|LCD_DS|LCD_SD|LCD_SC),r0 /* restore port */ + rts + mov.b r0,@r3 + + /* This is the place to reenable the interrupts, if we have disabled + * them. See above. */ + + .align 2 +.lcdr: + .long LCDR + +.end: + .size _lcd_write_command,.end-_lcd_write_command + diff --git a/firmware/drivers/lcd.c b/firmware/drivers/lcd.c deleted file mode 100644 index 68627f7c51..0000000000 --- a/firmware/drivers/lcd.c +++ /dev/null @@ -1,362 +0,0 @@ -/*************************************************************************** - * __________ __ ___. - * Open \______ \ ____ ____ | | _\_ |__ _______ ___ - * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / - * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < - * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ - * \/ \/ \/ \/ \/ - * $Id$ - * - * Copyright (C) 2002 by Alan Korr, speedup by Jörg Hohensohn - * Further speedup and reorganization by Jens Arnold - * - * All files in this archive are subject to the GNU General Public License. - * See the file COPYING in the source tree root for full license agreement. - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY - * KIND, either express or implied. - * - ****************************************************************************/ - -#include -#include "system.h" - -#define LCDR (PBDR_ADDR+1) - -#ifdef HAVE_LCD_CHARCELLS -#define LCD_DS 1 /* PB0 = 1 --- 0001 --- LCD-DS */ -#define LCD_CS 2 /* PB1 = 1 --- 0010 --- /LCD-CS */ -#define LCD_SD 4 /* PB2 = 1 --- 0100 --- LCD-SD */ -#define LCD_SC 8 /* PB3 = 1 --- 1000 --- LCD-SC */ -#else -#define LCD_SD 1 /* PB0 = 1 --- 0001 */ -#define LCD_SC 2 /* PB1 = 1 --- 0010 */ -#define LCD_RS 4 /* PB2 = 1 --- 0100 */ -#define LCD_CS 8 /* PB3 = 1 --- 1000 */ -#define LCD_DS LCD_RS -#endif - -/* - * About /CS,DS,SC,SD - * ------------------ - * - * LCD on JBP and JBR uses a SPI protocol to receive orders (SDA and SCK lines) - * - * - /CS -> Chip Selection line : - * 0 : LCD chipset is activated. - * - DS -> Data Selection line, latched at the rising edge - * of the 8th serial clock (*) : - * 0 : instruction register, - * 1 : data register; - * - SC -> Serial Clock line (SDA). - * - SD -> Serial Data line (SCK), latched at the rising edge - * of each serial clock (*). - * - * _ _ - * /CS \ / - * \______________________________________________________/ - * _____ ____ ____ ____ ____ ____ ____ ____ ____ _____ - * SD \/ D7 \/ D6 \/ D5 \/ D4 \/ D3 \/ D2 \/ D1 \/ D0 \/ - * _____/\____/\____/\____/\____/\____/\____/\____/\____/\_____ - * - * _____ _ _ _ _ _ _ _ ________ - * SC \ * \ * \ * \ * \ * \ * \ * \ * - * \_/ \_/ \_/ \_/ \_/ \_/ \_/ \_/ - * _ _________________________________________________________ - * DS \/ - * _/\_________________________________________________________ - * - */ - -/* - * The only way to do logical operations in an atomic way - * on SH1 is using : - * - * or.b/and.b/tst.b/xor.b #imm,@(r0,gbr) - * - * but GCC doesn't generate them at all so some assembly - * codes are needed here. - * - * The Global Base Register gbr is expected to be zero - * and r0 is the address of one register in the on-chip - * peripheral module. - * - */ - -void lcd_write_command(int byte) __attribute__ ((section (".icode"))); -void lcd_write_command(int byte) -{ - asm ( - "and.b %0, @(r0,gbr)" - : /* outputs */ - : /* inputs */ - /* %0 */ "I"(~(LCD_CS|LCD_DS|LCD_SD|LCD_SC)), - /* %1 = r0 */ "z"(LCDR) - ); - - asm ( - "0: \n" - "and.b %2,@(r0,gbr) \n" - "shll %0 \n" - "bf 1f \n" - "or.b %3,@(r0,gbr) \n" - "1: \n" - "or.b %4,@(r0,gbr) \n" - "add #-1,%1 \n" - "cmp/pl %1 \n" - "bt 0b \n" - : /* outputs */ - : /* inputs */ - /* %0 */ "r"(((unsigned)byte)<<24), - /* %1 */ "r"(8), - /* %2 */ "I"(~(LCD_SC|LCD_SD|LCD_DS)), - /* %3 */ "I"(LCD_SD), - /* %4 */ "I"(LCD_SC), - /* %5 = r0 */ "z"(LCDR) - ); - - asm ( - "or.b %0, @(r0,gbr)" - : /* outputs */ - : /* inputs */ - /* %0 */ "I"(LCD_CS|LCD_DS|LCD_SD|LCD_SC), - /* %1 = r0 */ "z"(LCDR) - ); -} - - -/* A high performance function to write data to the display, - one or multiple bytes. */ -void lcd_write_data(unsigned char* p_bytes, int count) __attribute__ ((section (".icode"))); - -#ifdef HAVE_LCD_CHARCELLS -/* This version works for both Player and Recorder models */ -void lcd_write_data(unsigned char* p_bytes, int count) -{ - do - { - unsigned int byte; - unsigned int sda1; /* precalculated SC=low,SD=1 */ - - byte = *p_bytes++ << 24; /* fetch to MSB position */ - - /* This code will fail if an interrupt changes the contents of PBDRL. - If so, we must disable the interrupt here. */ - - /* precalculate the values for later bit toggling, init data write */ - asm ( - "mov.b @%1,r0 \n" /* r0 = PBDRL */ - "or %3,r0 \n" /* r0 |= LCD_DS | LCD_SD DS and SD high */ - "and %2,r0 \n" /* r0 &= ~(LCD_CS | LCD_SC) CS and SC low */ - "mov.b r0,@%1 \n" /* PBDRL = r0 */ - "mov r0,%0 \n" /* sda1 = r0 */ - : /* outputs */ - /* %0 */ "=r"(sda1) - : /* inputs */ - /* %1 */ "r"(LCDR), - /* %2 */ "I"(~(LCD_CS | LCD_SC)), - /* %3 */ "I"(LCD_DS | LCD_SD) - : /* trashed */ - "r0" - ); - - /* unrolled loop to serialize the byte */ - asm ( - "shll %0 \n" /* shift the msb into carry */ - ".align 2 \n" - "mov %1,r0 \n" /* copy precalculated port value */ - "bt 1f \n" /* data bit = 1? */ - "and %5,r0 \n" /* no: r0 &= ~LCD_SD */ - "1: \n" - "shll %0 \n" /* next shift here for alignment */ - "mov.b r0,@%3 \n" /* set data to port */ - "or %2,r0 \n" /* rise SC (independent of SD level) */ - "mov.b r0,@%3 \n" /* set to port */ - - "mov %1,r0 \n" - "bt 1f \n" - "and %5,r0 \n" - "1: \n" - "mov.b r0,@%3 \n" - "or %2,r0 \n" - "mov.b r0,@%3 \n" - - "shll %0 \n" - "mov %1,r0 \n" - "bt 1f \n" - "and %5,r0 \n" - "1: \n" - "shll %0 \n" - "mov.b r0,@%3 \n" - "or %2,r0 \n" - "mov.b r0,@%3 \n" - - "mov %1,r0 \n" - "bt 1f \n" - "and %5,r0 \n" - "1: \n" - "mov.b r0,@%3 \n" - "or %2,r0 \n" - "mov.b r0,@%3 \n" - - "shll %0 \n" - "mov %1,r0 \n" - "bt 1f \n" - "and %5,r0 \n" - "1: \n" - "shll %0 \n" - "mov.b r0,@%3 \n" - "or %2,r0 \n" - "mov.b r0,@%3 \n" - - "mov %1,r0 \n" - "bt 1f \n" - "and %5,r0 \n" - "1: \n" - "mov.b r0,@%3 \n" - "or %2,r0 \n" - "mov.b r0,@%3 \n" - - "shll %0 \n" - "mov %1,r0 \n" - "bt 1f \n" - "and %5,r0 \n" - "1: \n" - "shll %0 \n" - "mov.b r0,@%3 \n" - "or %2,r0 \n" - "mov.b r0,@%3 \n" - - "mov %1,r0 \n" - "bt 1f \n" - "and %5,r0 \n" - "1: \n" - "mov.b r0,@%3 \n" - "or %2,r0 \n" - "mov.b r0,@%3 \n" - - "or %4,r0 \n" /* restore port */ - "mov.b r0,@%3 \n" - : /* outputs */ - : /* inputs */ - /* %0 */ "r"(byte), - /* %1 */ "r"(sda1), - /* %2 */ "I"(LCD_SC), - /* %3 */ "r"(LCDR), - /* %4 */ "I"(LCD_CS | LCD_DS | LCD_SD | LCD_SC), - /* %5 */ "I"(~(LCD_SD)) - : /* trashed */ - "r0" - ); - - /* This is the place to reenable the interrupts, if we have disabled - them. See above. */ - - } while (--count); /* tail loop is faster */ -} - -#else /* #ifdef HAVE_LCD_CHARCELLS */ -/* A further optimized version, exploits that SD is on bit 0 for recorders */ -void lcd_write_data(unsigned char* p_bytes, int count) -{ - do - { - unsigned byte; - unsigned sda1; /* precalculated SC=low,SD=1 */ - - /* take inverse data, so I can use the NEGC instruction below, it is - the only carry add/sub which does not destroy a source register */ - byte = ~(*p_bytes++ << 24); /* fetch to MSB position */ - - /* This code will fail if an interrupt changes the contents of PBDRL. - If so, we must disable the interrupt here. */ - - /* precalculate the values for later bit toggling, init data write */ - asm ( - "mov.b @%1,r0 \n" /* r0 = PBDRL */ - "or %3,r0 \n" /* r0 |= LCD_DS | LCD_SD DS and SD high, */ - "and %2,r0 \n" /* r0 &= ~(LCD_CS | LCD_SC) CS and SC low */ - "mov.b r0,@%1 \n" /* PBDRL = r0 */ - "neg r0,%0 \n" /* sda1 = 0-r0 */ - : /* outputs: */ - /* %0 */ "=r"(sda1) - : /* inputs: */ - /* %1 */ "r"(LCDR), - /* %2 */ "I"(~(LCD_CS | LCD_SC)), - /* %3 */ "I"(LCD_DS | LCD_SD) - : /* trashed */ - "r0" - ); - - /* unrolled loop to serialize the byte */ - asm ( - "shll %0 \n" /* shift the MSB into carry */ - ".align 2 \n" - "negc %1, r0 \n" /* carry to SD, SC low */ - "shll %0 \n" /* next shift here for alignment */ - "mov.b r0,@%3 \n" /* set data to port */ - "or %2, r0 \n" /* rise SC (independent of SD level) */ - "mov.b r0,@%3 \n" /* set to port */ - - "negc %1, r0 \n" - "mov.b r0,@%3 \n" - "or %2, r0 \n" - "mov.b r0,@%3 \n" - - "shll %0 \n" - "negc %1, r0 \n" - "shll %0 \n" - "mov.b r0,@%3 \n" - "or %2, r0 \n" - "mov.b r0,@%3 \n" - - "negc %1, r0 \n" - "mov.b r0,@%3 \n" - "or %2, r0 \n" - "mov.b r0,@%3 \n" - - "shll %0 \n" - "negc %1, r0 \n" - "shll %0 \n" - "mov.b r0,@%3 \n" - "or %2, r0 \n" - "mov.b r0,@%3 \n" - - "negc %1, r0 \n" - "mov.b r0,@%3 \n" - "or %2, r0 \n" - "mov.b r0,@%3 \n" - - "shll %0 \n" - "negc %1, r0 \n" - "shll %0 \n" - "mov.b r0,@%3 \n" - "or %2, r0 \n" - "mov.b r0,@%3 \n" - - "negc %1, r0 \n" - "mov.b r0,@%3 \n" - "or %2, r0 \n" - "mov.b r0,@%3 \n" - - "or %4, r0 \n" /* restore port */ - "mov.b r0,@%3 \n" - : /* outputs: */ - : /* inputs: */ - /* %0 */ "r"(byte), - /* %1 */ "r"(sda1), - /* %2 */ "I"(LCD_SC), - /* %3 */ "r"(LCDR), - /* %4 */ "I"(LCD_CS|LCD_DS|LCD_SD|LCD_SC) - : /* trashed: */ - "r0" - ); - - /* This is the place to reenable the interrupts, if we have disabled - them. See above. */ - - } while (--count); /* tail loop is faster */ -} -#endif /* #ifdef HAVE_LCD_CHARCELLS */ - - -- cgit v1.2.3