summaryrefslogtreecommitdiff
path: root/apps/plugins/lib/gray_draw.c
diff options
context:
space:
mode:
Diffstat (limited to 'apps/plugins/lib/gray_draw.c')
-rw-r--r--apps/plugins/lib/gray_draw.c267
1 files changed, 267 insertions, 0 deletions
diff --git a/apps/plugins/lib/gray_draw.c b/apps/plugins/lib/gray_draw.c
index b66b8d708a..4d75af5c9b 100644
--- a/apps/plugins/lib/gray_draw.c
+++ b/apps/plugins/lib/gray_draw.c
@@ -2006,6 +2006,273 @@ static void _writearray(unsigned char *address, const unsigned char *src,
2006 : /* clobbers */ 2006 : /* clobbers */
2007 "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "a0" 2007 "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "a0"
2008 ); 2008 );
2009#elif defined(CPU_ARM)
2010 const unsigned char *_src;
2011 unsigned _mask, depth, trash;
2012
2013 _mask = mask;
2014 _src = src;
2015
2016 pat_ptr = &pat_stack[0];
2017
2018 /* precalculate the bit patterns with random shifts
2019 for all 8 pixels and put them on an extra "stack" */
2020 asm volatile
2021 (
2022 "mov r3, #8 \n" /* loop count */
2023
2024 ".wa_loop: \n" /** load pattern for pixel **/
2025 "mov r2, #0 \n" /* pattern for skipped pixel must be 0 */
2026 "movs %[mask], %[mask], lsr #1 \n" /* shift out msb of mask */
2027 "bcc .wa_skip \n" /* skip this pixel */
2028
2029 "ldrb r0, [%[src]] \n" /* load src byte */
2030 "ldrb r0, [%[trns], r0] \n" /* idxtable into pattern index */
2031 "ldr r2, [%[bpat], r0, lsl #2] \n" /* r2 = bitpattern[byte]; */
2032
2033 "add %[rnd], %[rnd], %[rnd], lsl #2 \n" /* multiply by 75 */
2034 "rsb %[rnd], %[rnd], %[rnd], lsl #4 \n"
2035 "add %[rnd], %[rnd], #74 \n" /* add another 74 */
2036 /* Since the lower bits are not very random: get bits 8..15 (need max. 5) */
2037 "and r1, %[rmsk], %[rnd], lsr #8 \n" /* ..and mask out unneeded bits */
2038
2039 "cmp r1, %[dpth] \n" /* random >= depth ? */
2040 "subhs r1, r1, %[dpth] \n" /* yes: random -= depth */
2041
2042 "mov r0, r2, lsl r1 \n" /** rotate pattern **/
2043 "sub r1, %[dpth], r1 \n"
2044 "orr r2, r0, r2, lsr r1 \n"
2045
2046 ".wa_skip: \n"
2047 "str r2, [%[patp]], #4 \n" /* push on pattern stack */
2048
2049 "add %[src], %[src], %[stri] \n" /* src += stride; */
2050 "subs r3, r3, #1 \n" /* loop 8 times (pixel block) */
2051 "bne .wa_loop \n"
2052 : /* outputs */
2053 [src] "+r"(_src),
2054 [patp]"+r"(pat_ptr),
2055 [rnd] "+r"(_gray_random_buffer),
2056 [mask]"+r"(_mask)
2057 : /* inputs */
2058 [stri]"r"(stride),
2059 [bpat]"r"(_gray_info.bitpattern),
2060 [trns]"r"(_gray_info.idxtable),
2061 [dpth]"r"(_gray_info.depth),
2062 [rmsk]"r"(_gray_info.randmask)
2063 : /* clobbers */
2064 "r0", "r1", "r2", "r3"
2065 );
2066
2067 addr = address;
2068 _mask = mask;
2069 depth = _gray_info.depth;
2070
2071 /* set the bits for all 8 pixels in all bytes according to the
2072 * precalculated patterns on the pattern stack */
2073 asm volatile
2074 (
2075 "ldmdb %[patp], {r1 - r8} \n" /* pop all 8 patterns */
2076
2077 /** Rotate the four 8x8 bit "blocks" within r1..r8 **/
2078
2079 "mov %[rx], #0xF0 \n" /** Stage 1: 4 bit "comb" **/
2080 "orr %[rx], %[rx], %[rx], lsl #8 \n"
2081 "orr %[rx], %[rx], %[rx], lsl #16\n" /* bitmask = ...11110000 */
2082 "eor r0, r1, r5, lsl #4 \n"
2083 "and r0, r0, %[rx] \n"
2084 "eor r1, r1, r0 \n" /* r1 = ...e3e2e1e0a3a2a1a0 */
2085 "eor r5, r5, r0, lsr #4 \n" /* r5 = ...e7e6e5e4a7a6a5a4 */
2086 "eor r0, r2, r6, lsl #4 \n"
2087 "and r0, r0, %[rx] \n"
2088 "eor r2, r2, r0 \n" /* r2 = ...f3f2f1f0b3b2b1b0 */
2089 "eor r6, r6, r0, lsr #4 \n" /* r6 = ...f7f6f5f4f7f6f5f4 */
2090 "eor r0, r3, r7, lsl #4 \n"
2091 "and r0, r0, %[rx] \n"
2092 "eor r3, r3, r0 \n" /* r3 = ...g3g2g1g0c3c2c1c0 */
2093 "eor r7, r7, r0, lsr #4 \n" /* r7 = ...g7g6g5g4c7c6c5c4 */
2094 "eor r0, r4, r8, lsl #4 \n"
2095 "and r0, r0, %[rx] \n"
2096 "eor r4, r4, r0 \n" /* r4 = ...h3h2h1h0d3d2d1d0 */
2097 "eor r8, r8, r0, lsr #4 \n" /* r8 = ...h7h6h5h4d7d6d5d4 */
2098
2099 "mov %[rx], #0xCC \n" /** Stage 2: 2 bit "comb" **/
2100 "orr %[rx], %[rx], %[rx], lsl #8 \n"
2101 "orr %[rx], %[rx], %[rx], lsl #16\n" /* bitmask = ...11001100 */
2102 "eor r0, r1, r3, lsl #2 \n"
2103 "and r0, r0, %[rx] \n"
2104 "eor r1, r1, r0 \n" /* r1 = ...g1g0e1e0c1c0a1a0 */
2105 "eor r3, r3, r0, lsr #2 \n" /* r3 = ...g3g2e3e2c3c2a3a2 */
2106 "eor r0, r2, r4, lsl #2 \n"
2107 "and r0, r0, %[rx] \n"
2108 "eor r2, r2, r0 \n" /* r2 = ...h1h0f1f0d1d0b1b0 */
2109 "eor r4, r4, r0, lsr #2 \n" /* r4 = ...h3h2f3f2d3d2b3b2 */
2110 "eor r0, r5, r7, lsl #2 \n"
2111 "and r0, r0, %[rx] \n"
2112 "eor r5, r5, r0 \n" /* r5 = ...g5g4e5e4c5c4a5a4 */
2113 "eor r7, r7, r0, lsr #2 \n" /* r7 = ...g7g6e7e6c7c6a7a6 */
2114 "eor r0, r6, r8, lsl #2 \n"
2115 "and r0, r0, %[rx] \n"
2116 "eor r6, r6, r0 \n" /* r6 = ...h5h4f5f4d5d4b5b4 */
2117 "eor r8, r8, r0, lsr #2 \n" /* r8 = ...h7h6f7f6d7d6b7b6 */
2118
2119 "mov %[rx], #0xAA \n" /** Stage 3: 1 bit "comb" **/
2120 "orr %[rx], %[rx], %[rx], lsl #8 \n"
2121 "orr %[rx], %[rx], %[rx], lsl #16\n" /* bitmask = ...10101010 */
2122 "eor r0, r1, r2, lsl #1 \n"
2123 "and r0, r0, %[rx] \n"
2124 "eor r1, r1, r0 \n" /* r1 = ...h0g0f0e0d0c0b0a0 */
2125 "eor r2, r2, r0, lsr #1 \n" /* r2 = ...h1g1f1e1d1c1b1a1 */
2126 "eor r0, r3, r4, lsl #1 \n"
2127 "and r0, r0, %[rx] \n"
2128 "eor r3, r3, r0 \n" /* r3 = ...h2g2f2e2d2c2b2a2 */
2129 "eor r4, r4, r0, lsr #1 \n" /* r4 = ...h3g3f3e3d3c3b3a3 */
2130 "eor r0, r5, r6, lsl #1 \n"
2131 "and r0, r0, %[rx] \n"
2132 "eor r5, r5, r0 \n" /* r5 = ...h4g4f4e4d4c4b4a4 */
2133 "eor r6, r6, r0, lsr #1 \n" /* r6 = ...h5g5f5e5d5c5b5a5 */
2134 "eor r0, r7, r8, lsl #1 \n"
2135 "and r0, r0, %[rx] \n"
2136 "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */
2137 "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */
2138
2139 "sub r0, %[dpth], #1 \n" /** shift out unused low bytes **/
2140 "and r0, r0, #7 \n"
2141 "add pc, pc, r0, lsl #2 \n" /* jump into shift streak */
2142 "mov r8, r8, lsr #8 \n" /* r8: never reached */
2143 "mov r7, r7, lsr #8 \n"
2144 "mov r6, r6, lsr #8 \n"
2145 "mov r5, r5, lsr #8 \n"
2146 "mov r4, r4, lsr #8 \n"
2147 "mov r3, r3, lsr #8 \n"
2148 "mov r2, r2, lsr #8 \n"
2149 "mov r1, r1, lsr #8 \n"
2150
2151 "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */
2152 "ands %[mask], %[mask], #0xff \n"
2153 "beq .wa_sstart \n" /* short loop if no bits to keep */
2154
2155 "ldrb r0, [pc, r0] \n" /* jump into full loop */
2156 "add pc, pc, r0 \n"
2157 ".wa_ftable: \n"
2158 ".byte .wa_f1 - .wa_ftable - 4 \n" /* [jump tables are tricky] */
2159 ".byte .wa_f2 - .wa_ftable - 4 \n"
2160 ".byte .wa_f3 - .wa_ftable - 4 \n"
2161 ".byte .wa_f4 - .wa_ftable - 4 \n"
2162 ".byte .wa_f5 - .wa_ftable - 4 \n"
2163 ".byte .wa_f6 - .wa_ftable - 4 \n"
2164 ".byte .wa_f7 - .wa_ftable - 4 \n"
2165 ".byte .wa_f8 - .wa_ftable - 4 \n"
2166
2167 ".wa_floop: \n" /** full loop (bits to keep)**/
2168 ".wa_f8: \n"
2169 "ldrb r0, [%[addr]] \n" /* load old byte */
2170 "and r0, r0, %[mask] \n" /* mask out replaced bits */
2171 "orr r0, r0, r1 \n" /* set new bits */
2172 "strb r0, [%[addr]], %[psiz] \n" /* store byte */
2173 "mov r1, r1, lsr #8 \n" /* shift out used-up byte */
2174 ".wa_f7: \n"
2175 "ldrb r0, [%[addr]] \n"
2176 "and r0, r0, %[mask] \n"
2177 "orr r0, r0, r2 \n"
2178 "strb r0, [%[addr]], %[psiz] \n"
2179 "mov r2, r2, lsr #8 \n"
2180 ".wa_f6: \n"
2181 "ldrb r0, [%[addr]] \n"
2182 "and r0, r0, %[mask] \n"
2183 "orr r0, r0, r3 \n"
2184 "strb r0, [%[addr]], %[psiz] \n"
2185 "mov r3, r3, lsr #8 \n"
2186 ".wa_f5: \n"
2187 "ldrb r0, [%[addr]] \n"
2188 "and r0, r0, %[mask] \n"
2189 "orr r0, r0, r4 \n"
2190 "strb r0, [%[addr]], %[psiz] \n"
2191 "mov r4, r4, lsr #8 \n"
2192 ".wa_f4: \n"
2193 "ldrb r0, [%[addr]] \n"
2194 "and r0, r0, %[mask] \n"
2195 "orr r0, r0, r5 \n"
2196 "strb r0, [%[addr]], %[psiz] \n"
2197 "mov r5, r5, lsr #8 \n"
2198 ".wa_f3: \n"
2199 "ldrb r0, [%[addr]] \n"
2200 "and r0, r0, %[mask] \n"
2201 "orr r0, r0, r6 \n"
2202 "strb r0, [%[addr]], %[psiz] \n"
2203 "mov r6, r6, lsr #8 \n"
2204 ".wa_f2: \n"
2205 "ldrb r0, [%[addr]] \n"
2206 "and r0, r0, %[mask] \n"
2207 "orr r0, r0, r7 \n"
2208 "strb r0, [%[addr]], %[psiz] \n"
2209 "mov r7, r7, lsr #8 \n"
2210 ".wa_f1: \n"
2211 "ldrb r0, [%[addr]] \n"
2212 "and r0, r0, %[mask] \n"
2213 "orr r0, r0, r8 \n"
2214 "strb r0, [%[addr]], %[psiz] \n"
2215 "mov r8, r8, lsr #8 \n"
2216
2217 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */
2218 "bhi .wa_floop \n"
2219
2220 "b .wa_end \n"
2221
2222 ".wa_sstart: \n"
2223 "ldrb r0, [pc, r0] \n" /* jump into short loop*/
2224 "add pc, pc, r0 \n"
2225 ".wa_stable: \n"
2226 ".byte .wa_s1 - .wa_stable - 4 \n"
2227 ".byte .wa_s2 - .wa_stable - 4 \n"
2228 ".byte .wa_s3 - .wa_stable - 4 \n"
2229 ".byte .wa_s4 - .wa_stable - 4 \n"
2230 ".byte .wa_s5 - .wa_stable - 4 \n"
2231 ".byte .wa_s6 - .wa_stable - 4 \n"
2232 ".byte .wa_s7 - .wa_stable - 4 \n"
2233 ".byte .wa_s8 - .wa_stable - 4 \n"
2234
2235 ".wa_sloop: \n" /** short loop (nothing to keep) **/
2236 ".wa_s8: \n"
2237 "strb r1, [%[addr]], %[psiz] \n" /* store byte */
2238 "mov r1, r1, lsr #8 \n" /* shift out used-up byte */
2239 ".wa_s7: \n"
2240 "strb r2, [%[addr]], %[psiz] \n"
2241 "mov r2, r2, lsr #8 \n"
2242 ".wa_s6: \n"
2243 "strb r3, [%[addr]], %[psiz] \n"
2244 "mov r3, r3, lsr #8 \n"
2245 ".wa_s5: \n"
2246 "strb r4, [%[addr]], %[psiz] \n"
2247 "mov r4, r4, lsr #8 \n"
2248 ".wa_s4: \n"
2249 "strb r5, [%[addr]], %[psiz] \n"
2250 "mov r5, r5, lsr #8 \n"
2251 ".wa_s3: \n"
2252 "strb r6, [%[addr]], %[psiz] \n"
2253 "mov r6, r6, lsr #8 \n"
2254 ".wa_s2: \n"
2255 "strb r7, [%[addr]], %[psiz] \n"
2256 "mov r7, r7, lsr #8 \n"
2257 ".wa_s1: \n"
2258 "strb r8, [%[addr]], %[psiz] \n"
2259 "mov r8, r8, lsr #8 \n"
2260
2261 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */
2262 "bhi .wa_sloop \n"
2263
2264 ".wa_end: \n"
2265 : /* outputs */
2266 [addr]"+r"(addr),
2267 [mask]"+r"(_mask),
2268 [dpth]"+r"(depth),
2269 [rx] "=&r"(trash)
2270 : /* inputs */
2271 [psiz]"r"(_gray_info.plane_size),
2272 [patp]"[rx]"(pat_ptr)
2273 : /* clobbers */
2274 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8"
2275 );
2009#else /* C version, for reference*/ 2276#else /* C version, for reference*/
2010#warning C version of _writearray() used 2277#warning C version of _writearray() used
2011 unsigned char *end; 2278 unsigned char *end;