summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Mahone <andrew.mahone@gmail.com>2009-07-03 10:03:17 +0000
committerAndrew Mahone <andrew.mahone@gmail.com>2009-07-03 10:03:17 +0000
commit10803a74806738a6958b3cf1807e0e771c6e5be6 (patch)
treec0dea244ac46ad7dff3321fd75311e8d490b8111
parent0e445fcef6a803e0b321964b3c739f6dc9f2a82d (diff)
downloadrockbox-10803a74806738a6958b3cf1807e0e771c6e5be6.tar.gz
rockbox-10803a74806738a6958b3cf1807e0e771c6e5be6.zip
JPEG IDCT8 ARMv6 assembly, slight speedup vs ARMv5.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21620 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/recorder/jpeg_idct_arm.S230
1 files changed, 215 insertions, 15 deletions
diff --git a/apps/recorder/jpeg_idct_arm.S b/apps/recorder/jpeg_idct_arm.S
index 46ac479caa..75e3e4d2b3 100644
--- a/apps/recorder/jpeg_idct_arm.S
+++ b/apps/recorder/jpeg_idct_arm.S
@@ -429,6 +429,7 @@ jpeg_idct4h:
429#endif 429#endif
430 .size jpeg_idct4h, .-jpeg_idct4h 430 .size jpeg_idct4h, .-jpeg_idct4h
431 431
432#if ARM_ARCH < 6
432jpeg_idct8v: 433jpeg_idct8v:
433 stmdb sp!, { r4-r11, lr } 434 stmdb sp!, { r4-r11, lr }
434 add r2, r0, #128 435 add r2, r0, #128
@@ -641,13 +642,9 @@ jpeg_idct8h:
641 orrs r9, r6, r7 642 orrs r9, r6, r7
642 orreqs r9, r5, r4, lsr #16 643 orreqs r9, r5, r4, lsr #16
643 bne 2f 644 bne 2f
644#if ARM_ARCH < 6
645 mov r8, r8, asr #21 645 mov r8, r8, asr #21
646 cmp r8, #255 646 cmp r8, #255
647 mvnhi r8, r8, asr #31 647 mvnhi r8, r8, asr #31
648#else
649 usat r8, #8, r8, asr #21
650#endif
651#ifdef HAVE_LCD_COLOR 648#ifdef HAVE_LCD_COLOR
652 strb r8, [r1] 649 strb r8, [r1]
653 strb r8, [r1, #4] 650 strb r8, [r1, #4]
@@ -848,7 +845,6 @@ jpeg_idct8h:
848 add r10, r11, r4 /* o0 */ 845 add r10, r11, r4 /* o0 */
849 sub r11, r11, r4 /* o7 */ 846 sub r11, r11, r4 /* o7 */
850 /* output in order: r10 r12 r8 r6 r7 r9 r14 r11 */ 847 /* output in order: r10 r12 r8 r6 r7 r9 r14 r11 */
851#if ARM_ARCH < 6
852 mov r10, r10, asr #18 848 mov r10, r10, asr #18
853 cmp r10, #255 849 cmp r10, #255
854 mvnhi r10, r10, asr #31 850 mvnhi r10, r10, asr #31
@@ -873,16 +869,6 @@ jpeg_idct8h:
873 mov r11, r11, asr #18 869 mov r11, r11, asr #18
874 cmp r11, #255 870 cmp r11, #255
875 mvnhi r11, r11, asr #31 871 mvnhi r11, r11, asr #31
876#else
877 usat r10, #8, r10, asr #18
878 usat r12, #8, r12, asr #18
879 usat r8, #8, r8, asr #18
880 usat r6, #8, r6, asr #18
881 usat r7, #8, r7, asr #18
882 usat r9, #8, r9, asr #18
883 usat r14, #8, r14, asr #18
884 usat r11, #8, r11, asr #18
885#endif
886#ifdef HAVE_LCD_COLOR 872#ifdef HAVE_LCD_COLOR
887 strb r10, [r1] 873 strb r10, [r1]
888 strb r12, [r1, #4] 874 strb r12, [r1, #4]
@@ -908,3 +894,217 @@ jpeg_idct8h:
908 bcc 1b 894 bcc 1b
909 ldmia sp!, { r4-r11, pc } 895 ldmia sp!, { r4-r11, pc }
910 .size jpeg_idct8h, .-jpeg_idct8h 896 .size jpeg_idct8h, .-jpeg_idct8h
897#else
898jpeg_idct8v:
899 stmdb sp!, { r4-r11, lr }
900 add r2, r0, #128
9011:
902 ldmia r0!, { r4-r7 }
903 orrs r9, r6, r7
904 orreqs r9, r5, r4, lsr #16
905 bne 2f
906 mov r4, r4, lsl #2
907 strh r4, [r2]
908 strh r4, [r2, #16]
909 strh r4, [r2, #32]
910 strh r4, [r2, #48]
911 strh r4, [r2, #64]
912 strh r4, [r2, #80]
913 strh r4, [r2, #96]
914 strh r4, [r2, #112]
915 cmp r0, r1
916 add r2, r2, #2
917 bcc 1b
918 ldmia sp!, { r4-r11, pc }
9192:
920 ldrd r8, .Lpool8
921 mov r12, r4, lsl #16
922 add r10, r5, r7 /* r10 = d2 + d6 */
923 add r12, r12, #8192
924 add r3, r12, r6, lsl #16 /* tmp0 */
925 sub r12, r12, r6, lsl #16 /* tmp1 */
926 pkhtb r4, r5, r4, asr #16 /* r4 = (tmp3[o], tmp2[0]) = (d1, d3) */
927 smulbb r14, r8, r10 /* r14 = z1[e] = (d2 + d6) * 4433 */
928 pkhtb r6, r6, r7, asr #16 /* r6 = (tmp0[o], tmp1[o]) = (d7, d5) */
929 smlatb r7, r8, r7, r14 /* r7 = tmp2[e] = z1 - d6 * 15137 */
930 smlabb r5, r9, r5, r14 /* r5 = tmp3[e] = z1 + d2 * 6270 */
931 pkhtb r9, r9, r9, asr #16 /* r9 = (9633, 9633) */
932 add r10, r5, r3, asr #3 /* r10 = tmp10 */
933 rsb r11, r5, r3, asr #3 /* r11 = tmp13 */
934 mov r3, r4, ror #16
935 rsb r14, r7, r12, asr #3 /* r14 = tmp12 */
936 add r12, r7, r12, asr #3 /* r12 = tmp11 */
937 sadd16 r8, r3, r6 /* z3, z4 */
938 stmdb sp, { r10-r12, r14 } /* tmp10 tmp13 tmp11 tmp12 */
939 smuad r5, r9, r8 /* r5 = z5 = (z3[o] + z4[o]) * 9633 */
940 ldrd r10, .Lpool8+8
941 sadd16 r7, r4, r6 /* r7 = (z1, z2) */
942 smlatt r9, r10, r8, r5 /* r9 = z4 = z5 - z4 * 16069 */
943 smlabb r8, r10, r8, r5 /* r8 = z3 = z5 - z3 * 3196 */
944 smlabb r14, r11, r7, r9 /* r14 = z1 + z4 */
945 smlabb r12, r11, r7, r8 /* r12 = z1 + z3 */
946 smlatt r5, r11, r7, r9 /* r5 = z2 + z4 */
947 smlatt r7, r11, r7, r8 /* r7 = z2 + z3 */
948 ldrd r8, .Lpool8+16
949 smlabt r7, r9, r4, r7 /* r7 = tmp2 */
950 smlatb r14, r9, r4, r14 /* r14 = tmp3 */
951 ldmdb sp, { r4, r9-r11 } /* tmp10 tmp13 tmp11 tmp12 */
952 smlabb r12, r8, r6, r12 /* r12 = tmp0 */
953 smlatt r5, r8, r6, r5 /* r5 = tmp1 */
954 /* used: r4, r5, r7, r9-r12, r14 */
955 add r6, r4, r14 /* o0 */
956 sub r8, r4, r14 /* o7 */
957 add r14, r9, r12 /* o3 */
958 sub r12, r9, r12 /* o4 */
959 add r4, r10, r7 /* o1 */
960 sub r7, r10, r7 /* o6 */
961 add r9, r11, r5 /* o2 */
962 sub r10, r11, r5 /* o5 */
963 mov r6, r6, asr #11
964 mov r4, r4, asr #11
965 mov r9, r9, asr #11
966 mov r14, r14, asr #11
967 mov r12, r12, asr #11
968 mov r10, r10, asr #11
969 mov r7, r7, asr #11
970 mov r8, r8, asr #11
971 strh r6, [r2]
972 strh r4, [r2, #16]
973 strh r9, [r2, #32]
974 strh r14, [r2, #48]
975 strh r12, [r2, #64]
976 strh r10, [r2, #80]
977 strh r7, [r2, #96]
978 strh r8, [r2, #112]
979 cmp r0, r1
980 add r2, r2, #2
981 bcc 1b
982 ldmia sp!, { r4-r11, pc }
983 .size jpeg_idct8v, .-jpeg_idct8v
984
985 .align 4
986.Lpool8:
987 .short 4433
988 .short -15137
989 .short 6270
990 .short 9633
991 .short -16069
992 .short -3196
993 .short -7373
994 .short -20995
995 .short 2446
996 .short 16819
997 .short 25172
998 .short 12299
999
1000 .align 2
1001jpeg_idct8h:
1002 stmdb sp!, { r4-r11, lr }
10031:
1004 ldr r14, =4112
1005 ldmia r0!, { r4-r7 }
1006 sadd16 r4, r4, r14
1007 orrs r9, r6, r7
1008 orreqs r9, r5, r4, lsr #16
1009 bne 2f
1010 sxth r4, r4
1011 usat r4, #8, r4, asr #5
1012#ifdef HAVE_LCD_COLOR
1013 strb r4, [r1]
1014 strb r4, [r1, #4]
1015 strb r4, [r1, #8]
1016 strb r4, [r1, #12]
1017 strb r4, [r1, #16]
1018 strb r4, [r1, #20]
1019 strb r4, [r1, #24]
1020 strb r4, [r1, #28]
1021#else
1022 strb r4, [r1]
1023 strb r4, [r1, #1]
1024 strb r4, [r1, #2]
1025 strb r4, [r1, #3]
1026 strb r4, [r1, #4]
1027 strb r4, [r1, #5]
1028 strb r4, [r1, #6]
1029 strb r4, [r1, #7]
1030#endif
1031 add r1, r1, r3
1032 cmp r0, r2
1033 bcc 1b
1034 ldmia sp!, { r4-r11, pc }
10352:
1036 ldrd r8, .Lpool8
1037 sadd16 r10, r5, r7 /* r10 = (d2 + d6, d3 + d7) */
1038 ssub16 r12, r4, r6 /* r12 = (d0 - d4, d1 - d5) */
1039 sadd16 r11, r4, r6 /* r11 = (d0 + d4, d1 + d5) */
1040 pkhtb r4, r5, r4, asr #16 /* r4 = (tmp3[o], tmp2[0]) = (d1, d3) */
1041 smulbb r14, r8, r10 /* r14 = z1[e] = (d2 + d6) * 4433 */
1042 pkhtb r6, r6, r7, asr #16 /* r6 = (tmp0[o], tmp1[o]) = (d7, d5) */
1043 smlatb r7, r8, r7, r14 /* r7 = tmp2[e] = z1 - d6 * 15137 */
1044 smlabb r5, r9, r5, r14 /* r5 = tmp3[e] = z1 + d2 * 6270 */
1045 sxth r12, r12 /* r12 = tmp1[e] = d0 - d4 */
1046 pkhtb r8, r11, r10, asr #16 /* r8 = (z3[o], z4[o]) */
1047 sxth r14, r11 /* r14 = tmp0[e] */
1048 pkhtb r9, r9, r9, asr #16 /* r9 = (9633, 9633) */
1049 add r10, r5, r14, lsl #13 /* r10 = tmp10 */
1050 rsb r11, r5, r14, lsl #13 /* r11 = tmp13 */
1051 rsb r14, r7, r12, lsl #13 /* r14 = tmp12 */
1052 add r12, r7, r12, lsl #13 /* r12 = tmp11 */
1053 stmdb sp, { r10-r12, r14 } /* tmp10 tmp13 tmp11 tmp12 */
1054 smuad r5, r9, r8 /* r5 = z5 = (z3[o] + z4[o]) * 9633 */
1055 ldrd r10, .Lpool8+8
1056 sadd16 r7, r4, r6 /* r7 = (z1, z2) */
1057 smlatt r9, r10, r8, r5 /* r9 = z4 = z5 - z4 * 16069 */
1058 smlabb r8, r10, r8, r5 /* r8 = z3 = z5 - z3 * 3196 */
1059 smlabb r14, r11, r7, r9 /* r14 = z1 + z4 */
1060 smlabb r12, r11, r7, r8 /* r12 = z1 + z3 */
1061 smlatt r5, r11, r7, r9 /* r5 = z2 + z4 */
1062 smlatt r7, r11, r7, r8 /* r7 = z2 + z3 */
1063 ldrd r8, .Lpool8+16
1064 smlabt r7, r9, r4, r7 /* r7 = tmp2 */
1065 smlatb r14, r9, r4, r14 /* r14 = tmp3 */
1066 ldmdb sp, { r4, r9-r11 } /* tmp10 tmp13 tmp11 tmp12 */
1067 smlabb r12, r8, r6, r12 /* r12 = tmp0 */
1068 smlatt r5, r8, r6, r5 /* r5 = tmp1 */
1069 /* used: r4, r5, r7, r9-r12, r14 */
1070 add r6, r4, r14 /* o0 */
1071 sub r8, r4, r14 /* o7 */
1072 add r14, r9, r12 /* o3 */
1073 sub r12, r9, r12 /* o4 */
1074 add r4, r10, r7 /* o1 */
1075 sub r7, r10, r7 /* o6 */
1076 add r9, r11, r5 /* o2 */
1077 sub r10, r11, r5 /* o5 */
1078 usat r6, #8, r6, asr #18
1079 usat r4, #8, r4, asr #18
1080 usat r9, #8, r9, asr #18
1081 usat r14, #8, r14, asr #18
1082 usat r12, #8, r12, asr #18
1083 usat r10, #8, r10, asr #18
1084 usat r7, #8, r7, asr #18
1085 usat r8, #8, r8, asr #18
1086#ifdef HAVE_LCD_COLOR
1087 strb r6, [r1]
1088 strb r4, [r1, #4]
1089 strb r9, [r1, #8]
1090 strb r14, [r1, #12]
1091 strb r12, [r1, #16]
1092 strb r10, [r1, #20]
1093 strb r7, [r1, #24]
1094 strb r8, [r1, #28]
1095#else
1096 strb r6, [r1]
1097 strb r4, [r1, #1]
1098 strb r9, [r1, #2]
1099 strb r14, [r1, #3]
1100 strb r12, [r1, #4]
1101 strb r10, [r1, #5]
1102 strb r7, [r1, #6]
1103 strb r8, [r1, #7]
1104#endif
1105 cmp r0, r2
1106 add r1, r1, r3
1107 bcc 1b
1108 ldmia sp!, { r4-r11, pc }
1109 .size jpeg_idct8h, .-jpeg_idct8h
1110#endif