1 files changed, 0 insertions, 287 deletions
diff --git a/firmware/target/arm/ipod/lcd-as-color-nano.S b/firmware/target/arm/ipod/lcd-as-color-nano.S
deleted file mode 100644
index f6f9cc5be3..0000000000
--- a/firmware/target/arm/ipod/lcd-as-color-nano.S
+++ /dev/null
@@ -1,287 +0,0 @@
-/***************************************************************************
- *             __________               __   ___.
- *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
- *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
- *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
- *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
- *                     \/            \/     \/    \/            \/
- * $Id:$
- *
- * Copyright (C) 2010-2011 by Andree Buschmann
- *
- * Generic asm helper function used by YUV blitting.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- ****************************************************************************/
-#include "config.h"
-#include "cpu.h"
-/**************************************************************************** 
- * #define FORCE_FIFO_WAIT
- *
- * This is not needed in YUV blitting when the LCD IF is fast enough. In this
- * case YUV-to-RGB conversion per pixel needs longer than the transfer of a 
- * pixel via the LCD IF.
- ****************************************************************************/
-#include "config.h"
-/* Set FIFO wait for both iPod Color and iPod nano1G until we know for which
- * devices we can switch this off. */
-#define FORCE_FIFO_WAIT
-    .section .icode, "ax", %progbits
-    
-/****************************************************************************
- * extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
- *                                    const unsigned LCD_BASE,
- *                                    int width,
- *                                    int stride);
- *
- *   Conversion from Motion JPEG and MPEG Y'PbPr to RGB is:
- *   |R|   |1.164  0.000  1.596| |Y' -  16|
- *   |G| = |1.164 -0.391 -0.813| |Pb - 128|
- *   |B|   |1.164  2.018  0.000| |Pr - 128|
- *
- *   Scaled, normalized, rounded and tweaked to yield RGB 565:
- *   |R|   |74   0 101| |Y' -  16| >> 9
- *   |G| = |74 -24 -51| |Cb - 128| >> 8
- *   |B|   |74 128   0| |Cr - 128| >> 9
- *
- * Converts two lines from YUV to RGB565 and writes to LCD at once. First loop
- * loads Cb/Cr, calculates the chroma offset and saves them to buffer. Within
- * the second loop these chroma offset are reloaded from buffer. Within each 
- * loop two pixels are calculated and written to LCD. 
- */
-    .align      2
-    .global     lcd_write_yuv420_lines
-    .type       lcd_write_yuv420_lines, %function
-lcd_write_yuv420_lines:
-                                      /* r0 = src = yuv_src */
-                                      /* r1 = dst = LCD_BASE */
-                                      /* r2 = width */
-                                      /* r3 = stride */                
-    stmfd       sp!, { r4-r10, lr }   /* save non-scratch */
-    ldmia       r0, { r9, r10, r12 }  /* r9 = yuv_src[0] = Y'_p */
-                                      /* r10 = yuv_src[1] = Cb_p */
-                                      /* r12 = yuv_src[2] = Cr_p */
-    add         r3, r9, r3            /* r3 = &ysrc[stride] */
-    add         r4, r2, r2, asr #1    /* chroma buffer lenght = width/2 *3 */
-    mov         r4, r4, asl #2        /*   use words for str/ldm possibility */
-    add         r4, r4, #19           /*   plus room for 4 additional words, */
-    bic         r4, r4, #3            /*   rounded up to multiples of 4 byte */
-    sub         sp, sp, r4            /*   and allocate on stack */
-    stmia       sp, {r1-r4}           /* LCD_BASE, width, &ysrc[stride], stack_alloc */
-    mov         r7, r2                /* r7 = loop count */
-    add         r8, sp, #16           /* chroma buffer */
-    add         lr, r1, #0x100        /* LCD data port = LCD2_BASE + 0x100 */
-    /* 1st loop start */
-10:                                   /* loop start */
-    ldrb        r0, [r10], #1         /* r0 = *usrc++ = *Cb_p++ */
-    ldrb        r1, [r12], #1         /* r1 = *vsrc++ = *Cr_p++ */
-    sub         r0, r0, #128          /* r0 = Cb-128 */
-    sub         r1, r1, #128          /* r1 = Cr-128 */
-    add         r2, r1, r1, asl #1    /* r2 = Cr*51 + Cb*24 */
-    add         r2, r2, r2, asl #4   
-    add         r2, r2, r0, asl #3   
-    add         r2, r2, r0, asl #4   
-    add         r4, r1, r1, asl #2    /* r1 = Cr*101 */
-    add         r4, r4, r1, asl #5
-    add         r1, r4, r1, asl #6
-    add         r1, r1, #256          /* r1 = rv = (r1 + 256) >> 9 */
-    mov         r1, r1, asr #9
-    rsb         r2, r2, #128          /* r2 = guv = (-r2 + 128) >> 8 */
-    mov         r2, r2, asr #8       
-    add         r0, r0, #2            /* r0 = bu = (Cb*128 + 256) >> 9 */
-    mov         r0, r0, asr #2       
-    stmia       r8!, {r0-r2}          /* store r0, r1 and r2 to chroma buffer */
-    /* 1st loop, first pixel */
-    ldrb        r5, [r9], #1          /* r5 = *ysrc++ = *Y'_p++ */
-    sub         r5, r5, #16           /* r5 = (Y'-16) * 74 */
-    add         r3, r5, r5, asl #2
-    add         r5, r3, r5, asl #5
-    add         r6, r1, r5, asr #8    /* r6 = r = (Y >> 9) + rv */
-    add         r3, r2, r5, asr #7    /* r3 = g = (Y >> 8) + guv */
-    add         r4, r0, r5, asr #8    /* r4 = b = (Y >> 9) + bu */
-    orr         r5, r6, r4            /* check if clamping is needed... */
-    orr         r5, r5, r3, asr #1    /* ...at all */
-    cmp         r5, #31                 
-    bls         15f                   /* -> no clamp */
-    cmp         r6, #31               /* clamp r */
-    mvnhi       r6, r6, asr #31         
-    andhi       r6, r6, #31             
-    cmp         r3, #63               /* clamp g */
-    mvnhi       r3, r3, asr #31
-    andhi       r3, r3, #63
-    cmp         r4, #31               /* clamp b */
-    mvnhi       r4, r4, asr #31         
-    andhi       r4, r4, #31          
-15:                                   /* no clamp */
-    /* calculate pixel_1 and save to r4 for later pixel packing */
-    orr         r4, r4, r3, lsl #5    /* pixel_1 = r<<11 | g<<5 | b */
-    orr         r4, r4, r6, lsl #11   /* r4 = pixel_1 */
-    /* 1st loop, second pixel */
-    ldrb        r5, [r9], #1          /* r5 = *ysrc++ = *Y'_p++ */
-    sub         r5, r5, #16           /* r5 = (Y'-16) * 74 */
-    add         r3, r5, r5, asl #2
-    add         r5, r3, r5, asl #5
-    add         r6, r1, r5, asr #8    /* r6 = r = (Y >> 9) + rv */
-    add         r3, r2, r5, asr #7    /* r3 = g = (Y >> 8) + guv */
-    add         r5, r0, r5, asr #8    /* r5 = b = (Y >> 9) + bu */   
-    orr         r0, r6, r5            /* check if clamping is needed... */
-    orr         r0, r0, r3, asr #1    /* ...at all */
-    cmp         r0, #31                 
-    bls         15f                   /* -> no clamp */
-    cmp         r6, #31               /* clamp r */
-    mvnhi       r6, r6, asr #31         
-    andhi       r6, r6, #31             
-    cmp         r3, #63               /* clamp g */
-    mvnhi       r3, r3, asr #31
-    andhi       r3, r3, #63
-    cmp         r5, #31               /* clamp b */
-    mvnhi       r5, r5, asr #31         
-    andhi       r5, r5, #31          
-15:                                   /* no clamp */
-    /* calculate pixel_2 and pack with pixel_1 before writing */
-    orr         r5, r5, r3, lsl #5    /* pixel_2 = r<<11 | g<<5 | b */
-    orr         r5, r5, r6, lsl #11   /* r5 = pixel_2 */
-#ifdef FORCE_FIFO_WAIT
-    /* wait for FIFO half full */
-.fifo_wait1:
-    ldr         r3, [lr, #-0xE0]      /* while !(LCD2_BLOCK_CTRL & 0x1000000); */
-    tst         r3, #0x1000000
-    beq         .fifo_wait1
-#endif
-    mov         r3, r4, lsl #8        /* swap pixel_1 */
-    and         r3, r3, #0xff00
-    add         r4, r3, r4, lsr #8
-    
-    orr         r4, r4, r5, lsl #24   /* swap pixel_2 and pack with pixel_1 */
-    mov         r5, r5, lsr #8
-    orr         r4, r4, r5, lsl #16
-    str         r4, [lr]              /* write pixel_1 and pixel_2 */
-    subs        r7, r7, #2            /* check for loop end */
-    bgt         10b                   /* back to beginning  */
-    /* 1st loop end */
-    /* Reload several registers for pointer rewinding for next loop */
-    add         r8, sp, #16           /* chroma buffer */
-    ldmia       sp, { r1, r7, r9}     /* r1  = LCD_BASE */
-                                      /* r7  = loop count */
-                                      /* r9 = &ysrc[stride] */   
-    /* 2nd loop start */
-20:                                   /* loop start */
-    /* restore r0 (bu), r1 (rv) and r2 (guv) from chroma buffer */
-    ldmia       r8!, {r0-r2}
-    /* 2nd loop, first pixel */
-    ldrb        r5, [r9], #1          /* r5 = *ysrc++ = *Y'_p++ */
-    sub         r5, r5, #16           /* r5 = (Y'-16) * 74 */
-    add         r3, r5, r5, asl #2
-    add         r5, r3, r5, asl #5
-    add         r6, r1, r5, asr #8    /* r6 = r = (Y >> 9) + rv */
-    add         r3, r2, r5, asr #7    /* r3 = g = (Y >> 8) + guv */
-    add         r4, r0, r5, asr #8    /* r4 = b = (Y >> 9) + bu */
-    orr         r5, r6, r4            /* check if clamping is needed... */
-    orr         r5, r5, r3, asr #1    /* ...at all */
-    cmp         r5, #31                 
-    bls         15f                   /* -> no clamp */
-    cmp         r6, #31               /* clamp r */
-    mvnhi       r6, r6, asr #31         
-    andhi       r6, r6, #31             
-    cmp         r3, #63               /* clamp g */
-    mvnhi       r3, r3, asr #31
-    andhi       r3, r3, #63
-    cmp         r4, #31               /* clamp b */
-    mvnhi       r4, r4, asr #31         
-    andhi       r4, r4, #31          
-15:                                   /* no clamp */
-    /* calculate pixel_1 and save to r4 for later pixel packing */
-    orr         r4, r4, r3, lsl #5    /* pixel_1 = r<<11 | g<<5 | b */
-    orr         r4, r4, r6, lsl #11   /* r4 = pixel_1 */
-    /* 2nd loop, second pixel */
-    ldrb        r5, [r9], #1          /* r5 = *ysrc++ = *Y'_p++ */
-    sub         r5, r5, #16           /* r5 = (Y'-16) * 74 */
-    add         r3, r5, r5, asl #2
-    add         r5, r3, r5, asl #5
-    add         r6, r1, r5, asr #8    /* r6 = r = (Y >> 9) + rv */
-    add         r3, r2, r5, asr #7    /* r3 = g = (Y >> 8) + guv */
-    add         r5, r0, r5, asr #8    /* r5 = b = (Y >> 9) + bu */
-    orr         r0, r6, r5            /* check if clamping is needed... */
-    orr         r0, r0, r3, asr #1    /* ...at all */
-    cmp         r0, #31                 
-    bls         15f                   /* -> no clamp */
-    cmp         r6, #31               /* clamp r */
-    mvnhi       r6, r6, asr #31         
-    andhi       r6, r6, #31             
-    cmp         r3, #63               /* clamp g */
-    mvnhi       r3, r3, asr #31
-    andhi       r3, r3, #63
-    cmp         r5, #31               /* clamp b */
-    mvnhi       r5, r5, asr #31         
-    andhi       r5, r5, #31          
-15:                                   /* no clamp */
-    /* calculate pixel_2 and pack with pixel_1 before writing */
-    orr         r5, r5, r3, lsl #5    /* pixel_2 = r<<11 | g<<5 | b */
-    orr         r5, r5, r6, lsl #11   /* r5 = pixel_2 */
-#ifdef FORCE_FIFO_WAIT
-    /* wait for FIFO half full */
-.fifo_wait2:
-    ldr         r3, [lr, #-0xE0]      /* while !(LCD2_BLOCK_CTRL & 0x1000000); */
-    tst         r3, #0x1000000
-    beq         .fifo_wait2
-#endif
-    mov         r3, r4, lsl #8        /* swap pixel_1 */
-    and         r3, r3, #0xff00
-    add         r4, r3, r4, lsr #8
-    
-    orr         r4, r4, r5, lsl #24   /* swap pixel_2 and pack with pixel_1 */
-    mov         r5, r5, lsr #8
-    orr         r4, r4, r5, lsl #16
-    
-    str         r4, [lr]              /* write pixel_1 and pixel_2 */
-    subs        r7, r7, #2            /* check for loop end */
-    bgt         20b                   /* back to beginning  */
-    /* 2nd loop end */
-    ldr         r3, [sp, #12]
-    add         sp, sp, r3            /* deallocate buffer */
-    ldmpc       regs=r4-r10           /* restore registers */
-    .ltorg
-    .size   lcd_write_yuv420_lines, .-lcd_write_yuv420_lines

diff --git a/firmware/target/arm/ipod/lcd-as-color-nano.S b/firmware/target/arm/ipod/lcd-as-color-nano.S deleted file mode 100644 index f6f9cc5be3..0000000000 --- a/firmware/target/arm/ipod/lcd-as-color-nano.S +++ /dev/null
@@ -1,287 +0,0 @@
1	/***************************************************************************
2	* __________ __ ___.
3	* Open \______ \ ____ ____ \| \| _\_ \|__ _______ ___
4	* Source \| _// _ \_/ ___\\| \|/ /\| __ \ / _ \ \/ /
5	* Jukebox \| \| ( <_> ) \___\| < \| \_\ ( <_> > < <
6	* Firmware \|____\|_ /\____/ \___ >__\|_ \\|___ /\____/__/\_ \
7	* \/ \/ \/ \/ \/
8	* $Id:$
9	*
10	* Copyright (C) 2010-2011 by Andree Buschmann
11	*
12	* Generic asm helper function used by YUV blitting.
13	*
14	* This program is free software; you can redistribute it and/or
15	* modify it under the terms of the GNU General Public License
16	* as published by the Free Software Foundation; either version 2
17	* of the License, or (at your option) any later version.
18	*
19	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20	* KIND, either express or implied.
21	*
22	****************************************************************************/
23
24	#include "config.h"
25	#include "cpu.h"
26
27	/****************************************************************************
28	* #define FORCE_FIFO_WAIT
29	*
30	* This is not needed in YUV blitting when the LCD IF is fast enough. In this
31	* case YUV-to-RGB conversion per pixel needs longer than the transfer of a
32	* pixel via the LCD IF.
33	****************************************************************************/
34
35	#include "config.h"
36
37	/* Set FIFO wait for both iPod Color and iPod nano1G until we know for which
38	* devices we can switch this off. */
39	#define FORCE_FIFO_WAIT
40
41	.section .icode, "ax", %progbits
42
43	/****************************************************************************
44	* extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
45	* const unsigned LCD_BASE,
46	* int width,
47	* int stride);
48	*
49	* Conversion from Motion JPEG and MPEG Y'PbPr to RGB is:
50	* \|R\| \|1.164 0.000 1.596\| \|Y' - 16\|
51	* \|G\| = \|1.164 -0.391 -0.813\| \|Pb - 128\|
52	* \|B\| \|1.164 2.018 0.000\| \|Pr - 128\|
53	*
54	* Scaled, normalized, rounded and tweaked to yield RGB 565:
55	* \|R\| \|74 0 101\| \|Y' - 16\| >> 9
56	* \|G\| = \|74 -24 -51\| \|Cb - 128\| >> 8
57	* \|B\| \|74 128 0\| \|Cr - 128\| >> 9
58	*
59	* Converts two lines from YUV to RGB565 and writes to LCD at once. First loop
60	* loads Cb/Cr, calculates the chroma offset and saves them to buffer. Within
61	* the second loop these chroma offset are reloaded from buffer. Within each
62	* loop two pixels are calculated and written to LCD.
63	*/
64	.align 2
65	.global lcd_write_yuv420_lines
66	.type lcd_write_yuv420_lines, %function
67	lcd_write_yuv420_lines:
68	/* r0 = src = yuv_src */
69	/* r1 = dst = LCD_BASE */
70	/* r2 = width */
71	/* r3 = stride */
72	stmfd sp!, { r4-r10, lr } /* save non-scratch */
73	ldmia r0, { r9, r10, r12 } /* r9 = yuv_src[0] = Y'_p */
74	/* r10 = yuv_src[1] = Cb_p */
75	/* r12 = yuv_src[2] = Cr_p */
76	add r3, r9, r3 /* r3 = &ysrc[stride] */
77	add r4, r2, r2, asr #1 /* chroma buffer lenght = width/2 3 /
78	mov r4, r4, asl #2 /* use words for str/ldm possibility */
79	add r4, r4, #19 /* plus room for 4 additional words, */
80	bic r4, r4, #3 /* rounded up to multiples of 4 byte */
81	sub sp, sp, r4 /* and allocate on stack */
82	stmia sp, {r1-r4} /* LCD_BASE, width, &ysrc[stride], stack_alloc */
83
84	mov r7, r2 /* r7 = loop count */
85	add r8, sp, #16 /* chroma buffer */
86	add lr, r1, #0x100 /* LCD data port = LCD2_BASE + 0x100 */
87
88	/* 1st loop start */
89	10: /* loop start */
90
91	ldrb r0, [r10], #1 /* r0 = usrc++ = Cb_p++ */
92	ldrb r1, [r12], #1 /* r1 = vsrc++ = Cr_p++ */
93
94	sub r0, r0, #128 /* r0 = Cb-128 */
95	sub r1, r1, #128 /* r1 = Cr-128 */
96
97	add r2, r1, r1, asl #1 /* r2 = Cr51 + Cb24 */
98	add r2, r2, r2, asl #4
99	add r2, r2, r0, asl #3
100	add r2, r2, r0, asl #4
101
102	add r4, r1, r1, asl #2 /* r1 = Cr101 /
103	add r4, r4, r1, asl #5
104	add r1, r4, r1, asl #6
105
106	add r1, r1, #256 /* r1 = rv = (r1 + 256) >> 9 */
107	mov r1, r1, asr #9
108	rsb r2, r2, #128 /* r2 = guv = (-r2 + 128) >> 8 */
109	mov r2, r2, asr #8
110	add r0, r0, #2 /* r0 = bu = (Cb128 + 256) >> 9 /
111	mov r0, r0, asr #2
112	stmia r8!, {r0-r2} /* store r0, r1 and r2 to chroma buffer */
113
114	/* 1st loop, first pixel */
115	ldrb r5, [r9], #1 /* r5 = ysrc++ = Y'_p++ */
116	sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
117	add r3, r5, r5, asl #2
118	add r5, r3, r5, asl #5
119
120	add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
121	add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
122	add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
123
124	orr r5, r6, r4 /* check if clamping is needed... */
125	orr r5, r5, r3, asr #1 /* ...at all */
126	cmp r5, #31
127	bls 15f /* -> no clamp */
128	cmp r6, #31 /* clamp r */
129	mvnhi r6, r6, asr #31
130	andhi r6, r6, #31
131	cmp r3, #63 /* clamp g */
132	mvnhi r3, r3, asr #31
133	andhi r3, r3, #63
134	cmp r4, #31 /* clamp b */
135	mvnhi r4, r4, asr #31
136	andhi r4, r4, #31
137	15: /* no clamp */
138
139	/* calculate pixel_1 and save to r4 for later pixel packing */
140	orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 \| g<<5 \| b */
141	orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
142
143	/* 1st loop, second pixel */
144	ldrb r5, [r9], #1 /* r5 = ysrc++ = Y'_p++ */
145	sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
146	add r3, r5, r5, asl #2
147	add r5, r3, r5, asl #5
148
149	add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
150	add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
151	add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
152
153	orr r0, r6, r5 /* check if clamping is needed... */
154	orr r0, r0, r3, asr #1 /* ...at all */
155	cmp r0, #31
156	bls 15f /* -> no clamp */
157	cmp r6, #31 /* clamp r */
158	mvnhi r6, r6, asr #31
159	andhi r6, r6, #31
160	cmp r3, #63 /* clamp g */
161	mvnhi r3, r3, asr #31
162	andhi r3, r3, #63
163	cmp r5, #31 /* clamp b */
164	mvnhi r5, r5, asr #31
165	andhi r5, r5, #31
166	15: /* no clamp */
167
168	/* calculate pixel_2 and pack with pixel_1 before writing */
169	orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 \| g<<5 \| b */
170	orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
171	#ifdef FORCE_FIFO_WAIT
172	/* wait for FIFO half full */
173	.fifo_wait1:
174	ldr r3, [lr, #-0xE0] /* while !(LCD2_BLOCK_CTRL & 0x1000000); */
175	tst r3, #0x1000000
176	beq .fifo_wait1
177	#endif
178
179	mov r3, r4, lsl #8 /* swap pixel_1 */
180	and r3, r3, #0xff00
181	add r4, r3, r4, lsr #8
182
183	orr r4, r4, r5, lsl #24 /* swap pixel_2 and pack with pixel_1 */
184	mov r5, r5, lsr #8
185	orr r4, r4, r5, lsl #16
186
187	str r4, [lr] /* write pixel_1 and pixel_2 */
188
189	subs r7, r7, #2 /* check for loop end */
190	bgt 10b /* back to beginning */
191	/* 1st loop end */
192
193	/* Reload several registers for pointer rewinding for next loop */
194	add r8, sp, #16 /* chroma buffer */
195	ldmia sp, { r1, r7, r9} /* r1 = LCD_BASE */
196	/* r7 = loop count */
197	/* r9 = &ysrc[stride] */
198
199	/* 2nd loop start */
200	20: /* loop start */
201	/* restore r0 (bu), r1 (rv) and r2 (guv) from chroma buffer */
202	ldmia r8!, {r0-r2}
203
204	/* 2nd loop, first pixel */
205	ldrb r5, [r9], #1 /* r5 = ysrc++ = Y'_p++ */
206	sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
207	add r3, r5, r5, asl #2
208	add r5, r3, r5, asl #5
209
210	add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
211	add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
212	add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
213
214	orr r5, r6, r4 /* check if clamping is needed... */
215	orr r5, r5, r3, asr #1 /* ...at all */
216	cmp r5, #31
217	bls 15f /* -> no clamp */
218	cmp r6, #31 /* clamp r */
219	mvnhi r6, r6, asr #31
220	andhi r6, r6, #31
221	cmp r3, #63 /* clamp g */
222	mvnhi r3, r3, asr #31
223	andhi r3, r3, #63
224	cmp r4, #31 /* clamp b */
225	mvnhi r4, r4, asr #31
226	andhi r4, r4, #31
227	15: /* no clamp */
228	/* calculate pixel_1 and save to r4 for later pixel packing */
229	orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 \| g<<5 \| b */
230	orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
231
232	/* 2nd loop, second pixel */
233	ldrb r5, [r9], #1 /* r5 = ysrc++ = Y'_p++ */
234	sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
235	add r3, r5, r5, asl #2
236	add r5, r3, r5, asl #5
237
238	add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
239	add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
240	add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
241
242	orr r0, r6, r5 /* check if clamping is needed... */
243	orr r0, r0, r3, asr #1 /* ...at all */
244	cmp r0, #31
245	bls 15f /* -> no clamp */
246	cmp r6, #31 /* clamp r */
247	mvnhi r6, r6, asr #31
248	andhi r6, r6, #31
249	cmp r3, #63 /* clamp g */
250	mvnhi r3, r3, asr #31
251	andhi r3, r3, #63
252	cmp r5, #31 /* clamp b */
253	mvnhi r5, r5, asr #31
254	andhi r5, r5, #31
255	15: /* no clamp */
256
257	/* calculate pixel_2 and pack with pixel_1 before writing */
258	orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 \| g<<5 \| b */
259	orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
260	#ifdef FORCE_FIFO_WAIT
261	/* wait for FIFO half full */
262	.fifo_wait2:
263	ldr r3, [lr, #-0xE0] /* while !(LCD2_BLOCK_CTRL & 0x1000000); */
264	tst r3, #0x1000000
265	beq .fifo_wait2
266	#endif
267
268	mov r3, r4, lsl #8 /* swap pixel_1 */
269	and r3, r3, #0xff00
270	add r4, r3, r4, lsr #8
271
272	orr r4, r4, r5, lsl #24 /* swap pixel_2 and pack with pixel_1 */
273	mov r5, r5, lsr #8
274	orr r4, r4, r5, lsl #16
275
276	str r4, [lr] /* write pixel_1 and pixel_2 */
277
278	subs r7, r7, #2 /* check for loop end */
279	bgt 20b /* back to beginning */
280	/* 2nd loop end */
281
282	ldr r3, [sp, #12]
283	add sp, sp, r3 /* deallocate buffer */
284	ldmpc regs=r4-r10 /* restore registers */
285
286	.ltorg
287	.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines