MULS_MATRIX_4x4_N050_ZERO_LATENCY ;a0 start of dest matrix ;a1 start of src matrix ;a2 start of src2 matrix ;a3 should be PC but the assembler doesn't support it yet. sub.w #16*4,sp fmove.s M11(a2),fp0 ;1 fmul.s M11(a1),fp0 ;2 fmove.s M21(a2),fp1 ;2 fmul.s M12(a1),fp1 ;3 fmove.s M31(a2),fp2 ;4 fmul.s M13(a1),fp2 ;5 fmove.s M41(a2),fp3 ;6 fmul.s M14(a1),fp3 ;7 fmove.s M12(a2),fp4 ;8 fmul.s (a1)+,fp4 ;9 fmove.s M22(a2),fp5 ;10 fmul.s (a1)+,fp5 ;11 fmove.s M32(a2),fp6 ;12 fmul.s (a1)+,fp6 ;13 fadd.x fp0,fp1 ;14 fmove.s M42(a2),fp7 ;16 fmul.s (a1)+,fp7 ;17 fadd.x fp2,fp3 ;18 fmove.s M13(a2),fp0 ;19 fmul.s M11(a1),fp0 ;20 fadd.x fp4,fp5 ;21 fmove.s fp1,,fp9+4(a3) ;22 fmove.s M23(a2),fp1 ;23 fmul.s M12(a1),fp1 ;24 fmove.s M33(a2),fp2 ;25 fmul.s M13(a1),fp2 ;26 fadd.x fp6,fp7 ;27 fmove.s fp3,(sp)+ ;28 fmove.s M43(a2),fp3 ;29 fmul.s M14(a1),fp3 ;30 fmove.s M14(a2),fp4 ;31 fmul.s (a1)+,fp4 ;32 fmove.s fp5,fp10+4(a3) ;33 fmove.s M24(a2),fp5 ;34 fmul.s (a1)+,fp5 ;35 fmove.s M34(a2),fp6 ;36 fmul.s (a1)+,fp6 ;37 fmove.s fp7,(sp)+ ;38 fadd.x fp0,fp1 ;39 fmove.s M44(a2),fp7 ;40 fmul.s (a1)+,fp7 ;41 fadd.x fp2,fp3 ;42 fmove.s M11(a2),fp0 ;43 fmul.s M11(a1),fp0 ;44 fadd.x fp4,fp5 ;45 fmove.s fp1,fp11+4(a3) ;46 fmove.s M21(a2),fp1 ;47 fmul.s M12(a1),fp1 ;48 fmove.s M31(a2),fp2 ;49 fmul.s M13(a1),fp2 ;50 fadd.x fp6,fp7 ;51 fmove.s fp3,(sp)+ ;52 fmove.s M41(a2),fp3 ;53 fmul.s M14(a1),fp3 ;54 fmove.s M12(a2),fp4 ;55 fmul.s (a1)+,fp4 ;56 fmove.s fp5,fp12+4(a3) ;57 fmove.s M22(a2),fp5 ;58 fmul.s (a1)+,fp5 ;59 fmove.s M32(a2),fp6 ;60 fmul.s (a1)+,fp6 ;61 fmove.s fp7,(sp)+ ;62 fadd.x fp0,fp1 ;63 fmove.s M42(a2),fp7 ;64 fmul.s (a1)+,fp7 ;65 fadd.x fp2,fp3 ;66 fmove.s M13(a2),fp0 ;67 fmul.s M11(a1),fp0 ;68 fadd.x fp4,fp5 ;69 fmove.s fp1,fp13+4(a3) ;70 fmove.s M23(a2),fp1 ;71 fmul.s M12(a1),fp1 ;72 fmove.s M33(a2),fp2 ;73 fmul.s M13(a1),fp2 ;74 fadd.x fp6,fp7 ;75 fmove.s fp3,(sp)+ ;76 fmove.s M43(a2),fp3 ;77 fmul.s M14(a1),fp3 ;78 fmove.s M14(a2),fp4 ;79 fmul.s (a1)+,fp4 ;80 fmove.s fp5,fp14+4(a3) ;81 fmove.s M24(a2),fp5 ;82 fmul.s (a1)+,fp5 ;83 fmove.s M34(a2),fp6 ;84 fmul.s (a1)+,fp6 ;85 fmove.s fp7,(sp)+ ;86 fadd.x fp0,fp1 ;87 fmove.s M44(a2),fp7 ;88 fmul.s (a1)+,fp7 ;89 fadd.x fp2,fp3 ;90 fmove.s M11(a2),fp0 ;91 fmul.s M11(a1),fp0 ;92 fadd.x fp4,fp5 ;93 fmove.s fp1,fp15+4(a3) ;94 fmove.s M21(a2),fp1 ;95 fmul.s M12(a1),fp1 ;96 fmove.s M31(a2),fp2 ;97 fmul.s M13(a1),fp2 ;98 fadd.x fp6,fp7 ;99 fmove.s fp3,(sp)+ ;100 fmove.s M41(a2),fp3 ;101 fmul.s M14(a1),fp3 ;102 fmove.s M12(a2),fp4 ;103 fmul.s (a1)+,fp4 ;104 fmove.s fp5,fp16+4(a3) ;105 fmove.s M22(a2),fp5 ;106 fmul.s (a1)+,fp5 ;107 fmove.s M32(a2),fp6 ;108 fmul.s (a1)+,fp6 ;109 fmove.s fp7,(sp)+ ;110 fadd.x fp0,fp1 ;111 fmove.s M42(a2),fp7 ;112 fmul.s (a1)+,fp7 ;113 fadd.x fp2,fp3 ;114 fmove.s M13(a2),fp0 ;115 fmul.s M11(a1),fp0 ;116 fadd.x fp4,fp5 ;117 fmove.s fp1,fp17+4(a3) ;118 fmove.s M23(a2),fp1 ;119 fmul.s M12(a1),fp1 ;120 fmove.s M33(a2),fp2 ;121 fmul.s M13(a1),fp2 ;122 fadd.x fp6,fp7 ;123 fmove.s fp3,(sp)+ ;124 fmove.s M43(a2),fp3 ;125 fmul.s M14(a1),fp3 ;126 fmove.s M14(a2),fp4 ;127 fmul.s (a1)+,fp4 ;128 fmove.s fp5,fp18+4(a3) ;130 fmove.s M24(a2),fp5 ;131 fmul.s (a1)+,fp5 ;132 fmove.s M34(a2),fp6 ;133 fmul.s (a1)+,fp6 ;134 fmove.s fp7,(sp)+ ;135 fadd.x fp0,fp1 ;136 fmove.s M44(a2),fp7 ;137 fmul.s (a1)+,fp7 ;138 fadd.x fp2,fp3 ;139 fmove.s M11(a2),fp0 ;140 fmul.s M11(a1),fp0 ;141 fadd.x fp4,fp5 ;142 fmove.s fp1,fp19+4(a3) ;143 fmove.s M21(a2),fp1 ;144 fmul.s M12(a1),fp1 ;145 fmove.s M31(a2),fp2 ;146 fmul.s M13(a1),fp2 ;147 fadd.x fp6,fp7 ;148 fmove.s fp3,(sp)+ ;149 fmove.s M41(a2),fp3 ;150 fmul.s M14(a1),fp3 ;151 fmove.s M12(a2),fp4 ;152 fmul.s (a1)+,fp4 ;153 fmove.s fp5,fp20+4(a3) ;154 fmove.s M22(a2),fp5 ;155 fmul.s (a1)+,fp5 ;156 fmove.s M32(a2),fp6 ;157 fmul.s (a1)+,fp6 ;158 fmove.s fp7,(sp)+ ;159 fadd.x fp0,fp1 ;160 fmove.s M42(a2),fp7 ;161 fmul.s (a1)+,fp7 ;162 fadd.x fp2,fp3 ;163 fmove.s M13(a2),fp0 ;164 fmul.s M11(a1),fp0 ;164 fadd.x fp4,fp5 ;165 fmove.s fp1,fp21+4(a3) ;166 fmove.s M23(a2),fp1 ;167 fmul.s M12(a1),fp1 ;168 fmove.s M33(a2),fp2 ;169 fmul.s M13(a1),fp2 ;170 fadd.x fp6,fp7 ;171 fmove.s fp3,(sp)+ ;172 fmove.s M43(a2),fp3 ;173 fmul.s M14(a1),fp3 ;174 fmove.s M14(a2),fp4 ;175 fmul.s (a1)+,fp4 ;175 fmove.s fp5,fp22+4(a3) ;176 fmove.s M24(a2),fp5 ;177 fmul.s (a1)+,fp5 ;178 fmove.s M34(a2),fp6 ;179 fmul.s (a1)+,fp6 ;180 fmove.s fp7,(sp)+ ;181 fadd.x fp0,fp1 ;182 fmove.s M44(a2),fp7 ;183 fmul.s (a1)+,fp7 ;184 fadd.x fp2,fp3 ;185 fadd.x fp4,fp5 ;186 fmove.s fp1,fp23+4(a3) ;187 fadd.x fp6,fp7 ;188 fmove.s fp3,(sp)+ ;189 fmove.s fp5,fp24+4(a3) ;190 fmove.s fp7,(sp)+ ;191 sub.w #16*4,sp ;192 .fp9 fmove.s #0,fp0 ;fused fadd.s (sp)+,fp0 ;193 .fp10 fmove.s #0,fp1 ;fused fadd.s (sp)+,fp1 ;194 .fp11 fmove.s #0,fp2 ;fused fadd.s (sp)+,fp2 ;195 .fp12 fmove.s #0,fp3 ;fused fadd.s (sp)+,fp3 ;196 .fp13 fmove.s #0,fp4 ;fused fadd.s (sp)+,fp4 ;197 .fp14 fmove.s #0,fp5 ;fused fadd.s (sp)+,fp5 ;198 .fp15 fmove.s #0,fp6 ;fused fadd.s (sp)+,fp6 ;199 .fp16 fmove.s #0,fp7 ;fused fadd.s (sp)+,fp7 ;200 fmove.s fp0,(a0)+ ;201 fmove.s fp1,(a0)+ ;202 fmove.s fp2,(a0)+ ;203 fmove.s fp3,(a0)+ ;204 fmove.s fp4,(a0)+ ;205 fmove.s fp5,(a0)+ ;206 fmove.s fp6,(a0)+ ;207 fmove.s fp7,(a0)+ ;208 .fp17 fmove.s #0,fp0 ;fused fadd.s (sp)+,fp0 ;209 .fp18 fmove.s #0,fp1 ;fused fadd.s (sp)+,fp1 ;210 .fp19 fmove.s #0,fp2 ;fused fadd.s (sp)+,fp2 ;211 .fp20 fmove.s #0,fp3 ;fused fadd.s (sp)+,fp3 ;212 .fp21 fmove.s #0,fp4 ;fused fadd.s (sp)+,fp4 ;213 .fp22 fmove.s #0,fp5 ;fused fadd.s (sp)+,fp5 ;214 .fp23 fmove.s #0,fp6 ;fused fadd.s (sp)+,fp6 ;215 .fp24 fmove.s #0,fp7 ;fused fadd.s (sp)+,fp7 ;216 fmove.s fp0,(a0)+ ;217 fmove.s fp1,(a0)+ ;218 fmove.s fp2,(a0)+ ;219 fmove.s fp3,(a0)+ ;220 fmove.s fp4,(a0)+ ;221 fmove.s fp5,(a0)+ ;222 fmove.s fp6,(a0)+ ;223 fmove.s fp7,(a0)+ ;224 rts ;225
|