From a035261089403de259e74ce4dd196e2715138ed2 Mon Sep 17 00:00:00 2001 From: Thomas Martitz Date: Sat, 7 Jan 2012 19:56:09 +0100 Subject: Move optimized memcpy and friends and strlen to firmware/asm, using the new automatic-asm-picking infrastructure. --- firmware/target/sh/memcpy-sh.S | 219 --------------------------------------- firmware/target/sh/memmove-sh.S | 222 ---------------------------------------- firmware/target/sh/memset-sh.S | 109 -------------------- firmware/target/sh/strlen-sh.S | 96 ----------------- 4 files changed, 646 deletions(-) delete mode 100644 firmware/target/sh/memcpy-sh.S delete mode 100644 firmware/target/sh/memmove-sh.S delete mode 100644 firmware/target/sh/memset-sh.S delete mode 100644 firmware/target/sh/strlen-sh.S (limited to 'firmware/target/sh') diff --git a/firmware/target/sh/memcpy-sh.S b/firmware/target/sh/memcpy-sh.S deleted file mode 100644 index e23a579b05..0000000000 --- a/firmware/target/sh/memcpy-sh.S +++ /dev/null @@ -1,219 +0,0 @@ -/*************************************************************************** - * __________ __ ___. - * Open \______ \ ____ ____ | | _\_ |__ _______ ___ - * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / - * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < - * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ - * \/ \/ \/ \/ \/ - * $Id$ - * - * Copyright (C) 2004-2005 by Jens Arnold - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY - * KIND, either express or implied. - * - ****************************************************************************/ -#include "config.h" - - .section .icode,"ax",@progbits - - .align 2 - .global _memcpy - .global ___memcpy_fwd_entry - .type _memcpy,@function - -/* Copies bytes of data in memory from to - * This version is optimized for speed - * - * arguments: - * r4 - destination address - * r5 - source address - * r6 - length - * - * return value: - * r0 - destination address (like ANSI version) - * - * register usage: - * r0 - data / scratch - * r1 - 2nd data / scratch - * r2 - scratch - * r3 - first long bound / adjusted end address (only if >= 11 bytes) - * r4 - current dest address - * r5 - current source address - * r6 - source end address - * r7 - stored dest start address - * - * The instruction order is devised in a way to utilize the pipelining - * of the SH1 to the max. The routine also tries to utilize fast page mode. - */ - -_memcpy: - mov r4,r7 /* store dest for returning */ -___memcpy_fwd_entry: - add #-8,r4 /* offset for early increment (max. 2 longs) */ - mov #11,r0 - cmp/hs r0,r6 /* at least 11 bytes to copy? (ensures 2 aligned longs) */ - add r5,r6 /* r6 = source_end */ - bf .start_b2 /* no: jump directly to byte loop */ - - mov #3,r0 - neg r5,r3 - and r0,r3 /* r3 = (4 - align_offset) % 4 */ - tst r3,r3 /* already aligned? */ - bt .end_b1 /* yes: skip leading byte loop */ - - add r5,r3 /* r3 = first source long bound */ - - /* leading byte loop: copies 0..3 bytes */ -.loop_b1: - mov.b @r5+,r0 /* load byte & increment source addr */ - add #1,r4 /* increment dest addr */ - mov.b r0,@(7,r4) /* store byte */ - cmp/hi r5,r3 /* runs r5 up to first long bound */ - bt .loop_b1 - /* now r5 is always at a long boundary */ - /* -> memory reading is done in longs for all dest alignments */ - - /* selector for main copy loop */ -.end_b1: - mov #3,r1 - and r4,r1 /* r1 = dest alignment offset */ - mova .jmptab,r0 - mov.b @(r0,r1),r1 /* select appropriate main loop */ - add r0,r1 - mov r6,r3 /* move end address to r3 */ - jmp @r1 /* and jump to it */ - add #-7,r3 /* adjust end addr for main loops doing 2 longs/pass */ - - /** main loops, copying 2 longs per pass to profit from fast page mode **/ - - /* long aligned destination (fastest) */ - .align 2 -.loop_do0: - mov.l @r5+,r1 /* load first long & increment source addr */ - add #16,r4 /* increment dest addr & account for decrementing stores */ - mov.l @r5+,r0 /* load second long & increment source addr */ - cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ - mov.l r0,@-r4 /* store second long */ - mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */ - bt .loop_do0 - - add #4,r3 /* readjust end address */ - cmp/hi r5,r3 /* one long left? */ - bf .start_b2 /* no, jump to trailing byte loop */ - - mov.l @r5+,r0 /* load last long & increment source addr */ - add #4,r4 /* increment dest addr */ - bra .start_b2 /* jump to trailing byte loop */ - mov.l r0,@(4,r4) /* store last long */ - - /* word aligned destination (long + 2) */ - .align 2 -.loop_do2: - mov.l @r5+,r1 /* load first long & increment source addr */ - add #16,r4 /* increment dest addr */ - mov.l @r5+,r0 /* load second long & increment source addr */ - cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ - mov.w r0,@-r4 /* store low word of second long */ - xtrct r1,r0 /* extract low word of first long & high word of second long */ - mov.l r0,@-r4 /* and store as long */ - swap.w r1,r0 /* get high word of first long */ - mov.w r0,@-r4 /* and store it */ - bt .loop_do2 - - add #4,r3 /* readjust end address */ - cmp/hi r5,r3 /* one long left? */ - bf .start_b2 /* no, jump to trailing byte loop */ - - mov.l @r5+,r0 /* load last long & increment source addr */ - add #4,r4 /* increment dest addr */ - mov.w r0,@(6,r4) /* store low word */ - shlr16 r0 /* get high word */ - bra .start_b2 /* jump to trailing byte loop */ - mov.w r0,@(4,r4) /* and store it */ - - /* jumptable for loop selector */ - .align 2 -.jmptab: - .byte .loop_do0 - .jmptab /* placed in the middle because the SH1 */ - .byte .loop_do1 - .jmptab /* loads bytes sign-extended. Otherwise */ - .byte .loop_do2 - .jmptab /* the last loop would be out of reach */ - .byte .loop_do3 - .jmptab /* of the offset range. */ - - /* byte aligned destination (long + 1) */ - .align 2 -.loop_do1: - mov.l @r5+,r1 /* load first long & increment source addr */ - add #16,r4 /* increment dest addr */ - mov.l @r5+,r0 /* load second long & increment source addr */ - cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ - mov.b r0,@-r4 /* store low byte of second long */ - shlr8 r0 /* get upper 3 bytes */ - mov r1,r2 /* copy first long */ - shll16 r2 /* move low byte of first long all the way up, .. */ - shll8 r2 - or r2,r0 /* ..combine with the 3 bytes of second long.. */ - mov.l r0,@-r4 /* ..and store as long */ - shlr8 r1 /* get middle 2 bytes */ - mov.w r1,@-r4 /* store as word */ - shlr16 r1 /* get upper byte */ - mov.b r1,@-r4 /* and store */ - bt .loop_do1 - - add #4,r3 /* readjust end address */ -.last_do13: - cmp/hi r5,r3 /* one long left? */ - bf .start_b2 /* no, jump to trailing byte loop */ - - mov.l @r5+,r0 /* load last long & increment source addr */ - add #12,r4 /* increment dest addr */ - mov.b r0,@-r4 /* store low byte */ - shlr8 r0 /* get middle 2 bytes */ - mov.w r0,@-r4 /* store as word */ - shlr16 r0 /* get upper byte */ - mov.b r0,@-r4 /* and store */ - bra .start_b2 /* jump to trailing byte loop */ - add #-4,r4 /* readjust destination */ - - /* byte aligned destination (long + 3) */ - .align 2 -.loop_do3: - mov.l @r5+,r1 /* load first long & increment source addr */ - add #16,r4 /* increment dest addr */ - mov.l @r5+,r0 /* load second long & increment source addr */ - mov r1,r2 /* copy first long */ - mov.b r0,@-r4 /* store low byte of second long */ - shlr8 r0 /* get middle 2 bytes */ - mov.w r0,@-r4 /* store as word */ - shlr16 r0 /* get upper byte */ - shll8 r2 /* move lower 3 bytes of first long one up.. */ - or r2,r0 /* ..combine with the 1 byte of second long.. */ - mov.l r0,@-r4 /* ..and store as long */ - shlr16 r1 /* get upper byte of first long.. */ - shlr8 r1 - cmp/hi r5,r3 /* runs r5 up to last or second last long bound */ - mov.b r1,@-r4 /* ..and store */ - bt .loop_do3 - - bra .last_do13 /* handle last longword: reuse routine for (long + 1) */ - add #4,r3 /* readjust end address */ - - /* trailing byte loop: copies 0..3 bytes (or all for < 11 in total) */ - .align 2 -.loop_b2: - mov.b @r5+,r0 /* load byte & increment source addr */ - add #1,r4 /* increment dest addr */ - mov.b r0,@(7,r4) /* store byte */ -.start_b2: - cmp/hi r5,r6 /* runs r5 up to end address */ - bt .loop_b2 - - rts - mov r7,r0 /* return dest start address */ -.end: - .size _memcpy,.end-_memcpy diff --git a/firmware/target/sh/memmove-sh.S b/firmware/target/sh/memmove-sh.S deleted file mode 100644 index d5a7160043..0000000000 --- a/firmware/target/sh/memmove-sh.S +++ /dev/null @@ -1,222 +0,0 @@ -/*************************************************************************** - * __________ __ ___. - * Open \______ \ ____ ____ | | _\_ |__ _______ ___ - * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / - * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < - * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ - * \/ \/ \/ \/ \/ - * $Id$ - * - * Copyright (C) 2006 by Jens Arnold - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY - * KIND, either express or implied. - * - ****************************************************************************/ -#include "config.h" - - .section .icode,"ax",@progbits - - .align 2 - .global _memmove - .type _memmove,@function - -/* Moves bytes of data in memory from to - * Regions may overlap. - * This version is optimized for speed, and needs the corresponding memcpy - * implementation for the forward copy branch. - * - * arguments: - * r4 - destination address - * r5 - source address - * r6 - length - * - * return value: - * r0 - destination address (like ANSI version) - * - * register usage: - * r0 - data / scratch - * r1 - 2nd data / scratch - * r2 - scratch - * r3 - last long bound / adjusted start address (only if >= 11 bytes) - * r4 - current dest address - * r5 - source start address - * r6 - current source address - * - * The instruction order is devised in a way to utilize the pipelining - * of the SH1 to the max. The routine also tries to utilize fast page mode. - */ - -_memmove: - cmp/hi r4,r5 /* source > destination */ - bf .backward /* no: backward copy */ - mov.l .memcpy_fwd,r0 - jmp @r0 - mov r4,r7 /* store dest for returning */ - - .align 2 -.memcpy_fwd: - .long ___memcpy_fwd_entry - -.backward: - add r6,r4 /* r4 = destination end */ - mov #11,r0 - cmp/hs r0,r6 /* at least 11 bytes to copy? (ensures 2 aligned longs) */ - add #-8,r5 /* adjust for late decrement (max. 2 longs) */ - add r5,r6 /* r6 = source end - 8 */ - bf .start_b2r /* no: jump directly to byte loop */ - - mov #-4,r3 /* r3 = 0xfffffffc */ - and r6,r3 /* r3 = last source long bound */ - cmp/hi r3,r6 /* already aligned? */ - bf .end_b1r /* yes: skip leading byte loop */ - -.loop_b1r: - mov.b @(7,r6),r0 /* load byte */ - add #-1,r6 /* decrement source addr */ - mov.b r0,@-r4 /* store byte */ - cmp/hi r3,r6 /* runs r6 down to last long bound */ - bt .loop_b1r - -.end_b1r: - mov #3,r1 - and r4,r1 /* r1 = dest alignment offset */ - mova .jmptab_r,r0 - mov.b @(r0,r1),r1 /* select appropriate main loop.. */ - add r0,r1 - mov r5,r3 /* copy start adress to r3 */ - jmp @r1 /* ..and jump to it */ - add #7,r3 /* adjust end addr for main loops doing 2 longs/pass */ - - /** main loops, copying 2 longs per pass to profit from fast page mode **/ - - /* long aligned destination (fastest) */ - .align 2 -.loop_do0r: - mov.l @r6,r1 /* load first long */ - add #-8,r6 /* decrement source addr */ - mov.l @(12,r6),r0 /* load second long */ - cmp/hi r3,r6 /* runs r6 down to first or second long bound */ - mov.l r0,@-r4 /* store second long */ - mov.l r1,@-r4 /* store first long; NOT ALIGNED - no speed loss here! */ - bt .loop_do0r - - add #-4,r3 /* readjust end address */ - cmp/hi r3,r6 /* first long left? */ - bf .start_b2r /* no, jump to trailing byte loop */ - - mov.l @(4,r6),r0 /* load first long */ - add #-4,r6 /* decrement source addr */ - bra .start_b2r /* jump to trailing byte loop */ - mov.l r0,@-r4 /* store first long */ - - /* word aligned destination (long + 2) */ - .align 2 -.loop_do2r: - mov.l @r6,r1 /* load first long */ - add #-8,r6 /* decrement source addr */ - mov.l @(12,r6),r0 /* load second long */ - cmp/hi r3,r6 /* runs r6 down to first or second long bound */ - mov.w r0,@-r4 /* store low word of second long */ - xtrct r1,r0 /* extract low word of first long & high word of second long */ - mov.l r0,@-r4 /* and store as long */ - shlr16 r1 /* get high word of first long */ - mov.w r1,@-r4 /* and store it */ - bt .loop_do2r - - add #-4,r3 /* readjust end address */ - cmp/hi r3,r6 /* first long left? */ - bf .start_b2r /* no, jump to trailing byte loop */ - - mov.l @(4,r6),r0 /* load first long & decrement source addr */ - add #-4,r6 /* decrement source addr */ - mov.w r0,@-r4 /* store low word */ - shlr16 r0 /* get high word */ - bra .start_b2r /* jump to trailing byte loop */ - mov.w r0,@-r4 /* and store it */ - - /* jumptable for loop selector */ - .align 2 -.jmptab_r: - .byte .loop_do0r - .jmptab_r /* placed in the middle because the SH1 */ - .byte .loop_do1r - .jmptab_r /* loads bytes sign-extended. Otherwise */ - .byte .loop_do2r - .jmptab_r /* the last loop would be out of reach */ - .byte .loop_do3r - .jmptab_r /* of the offset range. */ - - /* byte aligned destination (long + 1) */ - .align 2 -.loop_do1r: - mov.l @r6,r1 /* load first long */ - add #-8,r6 /* decrement source addr */ - mov.l @(12,r6),r0 /* load second long */ - cmp/hi r3,r6 /* runs r6 down to first or second long bound */ - mov.b r0,@-r4 /* store low byte of second long */ - shlr8 r0 /* get upper 3 bytes */ - mov r1,r2 /* copy first long */ - shll16 r2 /* move low byte of first long all the way up, .. */ - shll8 r2 - or r2,r0 /* ..combine with the 3 bytes of second long.. */ - mov.l r0,@-r4 /* ..and store as long */ - shlr8 r1 /* get middle 2 bytes */ - mov.w r1,@-r4 /* store as word */ - shlr16 r1 /* get upper byte */ - mov.b r1,@-r4 /* and store */ - bt .loop_do1r - - add #-4,r3 /* readjust end address */ -.last_do13r: - cmp/hi r3,r6 /* first long left? */ - bf .start_b2r /* no, jump to trailing byte loop */ - - nop /* alignment */ - mov.l @(4,r6),r0 /* load first long */ - add #-4,r6 /* decrement source addr */ - mov.b r0,@-r4 /* store low byte */ - shlr8 r0 /* get middle 2 bytes */ - mov.w r0,@-r4 /* store as word */ - shlr16 r0 /* get upper byte */ - bra .start_b2r /* jump to trailing byte loop */ - mov.b r0,@-r4 /* and store */ - - /* byte aligned destination (long + 3) */ - .align 2 -.loop_do3r: - mov.l @r6,r1 /* load first long */ - add #-8,r6 /* decrement source addr */ - mov.l @(12,r6),r0 /* load second long */ - mov r1,r2 /* copy first long */ - mov.b r0,@-r4 /* store low byte of second long */ - shlr8 r0 /* get middle 2 bytes */ - mov.w r0,@-r4 /* store as word */ - shlr16 r0 /* get upper byte */ - shll8 r2 /* move lower 3 bytes of first long one up.. */ - or r2,r0 /* ..combine with the 1 byte of second long.. */ - mov.l r0,@-r4 /* ..and store as long */ - shlr16 r1 /* get upper byte of first long */ - shlr8 r1 - cmp/hi r3,r6 /* runs r6 down to first or second long bound */ - mov.b r1,@-r4 /* ..and store */ - bt .loop_do3r - - bra .last_do13r /* handle first longword: reuse routine for (long + 1) */ - add #-4,r3 /* readjust end address */ - - /* trailing byte loop: copies 0..3 bytes (or all for < 11 in total) */ - .align 2 -.loop_b2r: - mov.b @(7,r6),r0 /* load byte */ - add #-1,r6 /* decrement source addr */ - mov.b r0,@-r4 /* store byte */ -.start_b2r: - cmp/hi r5,r6 /* runs r6 down to start address */ - bt .loop_b2r - - rts - mov r4,r0 /* return dest start address */ -.end: - .size _memmove,.end-_memmove diff --git a/firmware/target/sh/memset-sh.S b/firmware/target/sh/memset-sh.S deleted file mode 100644 index 8cae1ea112..0000000000 --- a/firmware/target/sh/memset-sh.S +++ /dev/null @@ -1,109 +0,0 @@ -/*************************************************************************** - * __________ __ ___. - * Open \______ \ ____ ____ | | _\_ |__ _______ ___ - * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / - * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < - * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ - * \/ \/ \/ \/ \/ - * $Id$ - * - * Copyright (C) 2004 by Jens Arnold - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY - * KIND, either express or implied. - * - ****************************************************************************/ -#include "config.h" - - .section .icode,"ax",@progbits - - .align 2 - .global _memset - .type _memset,@function - -/* Fills a memory region with specified byte value - * This version is optimized for speed - * - * arguments: - * r4 - start address - * r5 - data - * r6 - length - * - * return value: - * r0 - start address (like ANSI version) - * - * register usage: - * r0 - temporary - * r1 - start address +11 for main loop - * r4 - start address - * r5 - data (spread to all 4 bytes when using long stores) - * r6 - current address (runs down from end to start) - * - * The instruction order below is devised in a way to utilize the pipelining - * of the SH1 to the max. The routine fills memory from end to start in - * order to utilize the auto-decrementing store instructions. - */ - -_memset: - neg r4,r0 - and #3,r0 /* r0 = (4 - align_offset) % 4 */ - add #4,r0 - cmp/hs r0,r6 /* at least one aligned longword to fill? */ - add r4,r6 /* r6 = end_address */ - bf .no_longs /* no, jump directly to byte loop */ - - extu.b r5,r5 /* start: spread data to all 4 bytes */ - swap.b r5,r0 - or r0,r5 /* data now in 2 lower bytes of r5 */ - swap.w r5,r0 - or r0,r5 /* data now in all 4 bytes of r5 */ - - mov r6,r0 - tst #3,r0 /* r0 already long aligned? */ - bt .end_b1 /* yes: skip loop */ - - /* leading byte loop: sets 0..3 bytes */ -.loop_b1: - mov.b r5,@-r0 /* store byte */ - tst #3,r0 /* r0 long aligned? */ - bf .loop_b1 /* runs r0 down until long aligned */ - - mov r0,r6 /* r6 = last long bound */ - nop /* keep alignment */ - -.end_b1: - mov r4,r1 /* r1 = start_address... */ - add #11,r1 /* ... + 11, combined for rounding and offset */ - xor r1,r0 - tst #4,r0 /* bit 2 tells whether an even or odd number of */ - bf .loop_odd /* longwords to set */ - - /* main loop: set 2 longs per pass */ -.loop_2l: - mov.l r5,@-r6 /* store first long */ -.loop_odd: - cmp/hi r1,r6 /* runs r6 down to first long bound */ - mov.l r5,@-r6 /* store second long */ - bt .loop_2l - -.no_longs: - cmp/hi r4,r6 /* any bytes left? */ - bf .end_b2 /* no: skip loop */ - - /* trailing byte loop */ -.loop_b2: - mov.b r5,@-r6 /* store byte */ - cmp/hi r4,r6 /* runs r6 down to the start address */ - bt .loop_b2 - -.end_b2: - rts - mov r4,r0 /* return start address */ - -.end: - .size _memset,.end-_memset diff --git a/firmware/target/sh/strlen-sh.S b/firmware/target/sh/strlen-sh.S deleted file mode 100644 index e7169e25db..0000000000 --- a/firmware/target/sh/strlen-sh.S +++ /dev/null @@ -1,96 +0,0 @@ -/*************************************************************************** - * __________ __ ___. - * Open \______ \ ____ ____ | | _\_ |__ _______ ___ - * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / - * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < - * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ - * \/ \/ \/ \/ \/ - * $Id$ - * - * Copyright (C) 2005 by Jens Arnold - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY - * KIND, either express or implied. - * - ****************************************************************************/ -#include "config.h" - - .section .icode,"ax",@progbits - - .align 2 - .global _strlen - .type _strlen,@function - -/* Works out the length of a string - * This version is optimized for speed - * - * arguments: - * r4 - start address - * - * return value: - * r0 - string length - * - * register usage: - * r0 - current address - * r1 - current value (byte/long) - * r2 - mask for alignment / zero (for cmp/str) - * r4 - start address - * - */ - -_strlen: - mov r4,r0 /* r0 = start address */ - tst #3,r0 /* long aligned? */ - bt .start_l /* yes, jump directly to the longword loop */ - - /* not long aligned: check the first 3 bytes */ - mov.b @r0+,r1 /* fetch first byte */ - tst r1,r1 /* byte == 0 ? */ - bt .hitzero /* yes, string end found */ - mov.b @r0+,r1 /* fetch second byte */ - mov #3,r2 /* prepare mask: r2 = 0..00000011b */ - tst r1,r1 /* byte == 0 ? */ - bt .hitzero /* yes, string end found */ - mov.b @r0+,r1 /* fetch third byte */ - not r2,r2 /* prepare mask: r2 = 1..11111100b */ - tst r1,r1 /* byte == 0 ? */ - bt .hitzero /* yes, string end found */ - - /* not yet found, fall through into longword loop */ - and r2,r0 /* align down to long bound */ - - /* main loop: check longwords */ -.start_l: - mov #0,r2 /* zero longword for cmp/str */ -.loop_l: - mov.l @r0+,r1 /* fetch long word */ - cmp/str r1,r2 /* any zero byte within? */ - bf .loop_l /* no, loop */ - add #-4,r0 /* set address back to start of this longword */ - - /* the last longword contains the string end: figure out the byte */ - mov.b @r0+,r1 /* fetch first byte */ - tst r1,r1 /* byte == 0 ? */ - bt .hitzero /* yes, string end found */ - mov.b @r0+,r1 /* fetch second byte */ - tst r1,r1 /* byte == 0 ? */ - bt .hitzero /* yes, string end found */ - mov.b @r0+,r1 /* fetch third byte */ - tst r1,r1 /* byte == 0 ? */ - bt .hitzero /* yes, string end found */ - rts /* must be the fourth byte */ - sub r4,r0 /* len = string_end - string_start */ - -.hitzero: - add #-1,r0 /* undo address increment */ - rts - sub r4,r0 /* len = string_end - string_start */ - -.end: - .size _strlen,.end-_strlen - -- cgit v1.2.3