diff options
Diffstat (limited to 'contrib/gcc/config/i386/athlon.md')
-rw-r--r-- | contrib/gcc/config/i386/athlon.md | 378 |
1 files changed, 336 insertions, 42 deletions
diff --git a/contrib/gcc/config/i386/athlon.md b/contrib/gcc/config/i386/athlon.md index 6d92b948b4db..a52f9bc28b4b 100644 --- a/contrib/gcc/config/i386/athlon.md +++ b/contrib/gcc/config/i386/athlon.md @@ -29,6 +29,8 @@ (const_string "vector")] (const_string "direct"))) +(define_attr "amdfam10_decode" "direct,vector,double" + (const_string "direct")) ;; ;; decode0 decode1 decode2 ;; \ | / @@ -131,18 +133,22 @@ ;; Jump instructions are executed in the branch unit completely transparent to us (define_insn_reservation "athlon_branch" 0 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (eq_attr "type" "ibr")) "athlon-direct,athlon-ieu") (define_insn_reservation "athlon_call" 0 (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "call,callv")) "athlon-vector,athlon-ieu") +(define_insn_reservation "athlon_call_amdfam10" 0 + (and (eq_attr "cpu" "amdfam10") + (eq_attr "type" "call,callv")) + "athlon-double,athlon-ieu") ;; Latency of push operation is 3 cycles, but ESP value is available ;; earlier (define_insn_reservation "athlon_push" 2 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (eq_attr "type" "push")) "athlon-direct,athlon-agu,athlon-store") (define_insn_reservation "athlon_pop" 4 @@ -153,12 +159,16 @@ (and (eq_attr "cpu" "k8,generic64") (eq_attr "type" "pop")) "athlon-double,(athlon-ieu+athlon-load)") +(define_insn_reservation "athlon_pop_amdfam10" 3 + (and (eq_attr "cpu" "amdfam10") + (eq_attr "type" "pop")) + "athlon-direct,(athlon-ieu+athlon-load)") (define_insn_reservation "athlon_leave" 3 (and (eq_attr "cpu" "athlon") (eq_attr "type" "leave")) "athlon-vector,(athlon-ieu+athlon-load)") (define_insn_reservation "athlon_leave_k8" 3 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (eq_attr "type" "leave")) "athlon-double,(athlon-ieu+athlon-load)") @@ -167,6 +177,11 @@ (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "lea")) "athlon-direct,athlon-agu,nothing") +;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10 +(define_insn_reservation "athlon_lea_amdfam10" 1 + (and (eq_attr "cpu" "amdfam10") + (eq_attr "type" "lea")) + "athlon-direct,athlon-agu,nothing") ;; Mul executes in special multiplier unit attached to IEU0 (define_insn_reservation "athlon_imul" 5 @@ -176,29 +191,35 @@ "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0") ;; ??? Widening multiply is vector or double. (define_insn_reservation "athlon_imul_k8_DI" 4 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "imul") (and (eq_attr "mode" "DI") (eq_attr "memory" "none,unknown")))) "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0") (define_insn_reservation "athlon_imul_k8" 3 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "imul") (eq_attr "memory" "none,unknown"))) "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0") +(define_insn_reservation "athlon_imul_amdfam10_HI" 4 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "imul") + (and (eq_attr "mode" "HI") + (eq_attr "memory" "none,unknown")))) + "athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0") (define_insn_reservation "athlon_imul_mem" 8 (and (eq_attr "cpu" "athlon") (and (eq_attr "type" "imul") (eq_attr "memory" "load,both"))) "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu") (define_insn_reservation "athlon_imul_mem_k8_DI" 7 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "imul") (and (eq_attr "mode" "DI") (eq_attr "memory" "load,both")))) "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu") (define_insn_reservation "athlon_imul_mem_k8" 6 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "imul") (eq_attr "memory" "load,both"))) "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu") @@ -209,21 +230,23 @@ ;; other instructions. ;; ??? Experiments show that the idiv can overlap with roughly 6 cycles ;; of the other code +;; Using the same heuristics for amdfam10 as K8 with idiv (define_insn_reservation "athlon_idiv" 6 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (and (eq_attr "type" "idiv") (eq_attr "memory" "none,unknown"))) "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))") (define_insn_reservation "athlon_idiv_mem" 9 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (and (eq_attr "type" "idiv") (eq_attr "memory" "load,both"))) "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))") ;; The parallelism of string instructions is not documented. Model it same way ;; as idiv to create smaller automata. This probably does not matter much. +;; Using the same heuristics for amdfam10 as K8 with idiv (define_insn_reservation "athlon_str" 6 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (and (eq_attr "type" "str") (eq_attr "memory" "load,both,store"))) "athlon-vector,athlon-load,athlon-ieu0*6") @@ -234,34 +257,62 @@ (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "none,unknown")))) "athlon-direct,athlon-ieu") +(define_insn_reservation "athlon_idirect_amdfam10" 1 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "amdfam10_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "none,unknown")))) + "athlon-direct,athlon-ieu") (define_insn_reservation "athlon_ivector" 2 (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "athlon_decode" "vector") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "none,unknown")))) "athlon-vector,athlon-ieu,athlon-ieu") +(define_insn_reservation "athlon_ivector_amdfam10" 2 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "amdfam10_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "none,unknown")))) + "athlon-vector,athlon-ieu,athlon-ieu") + (define_insn_reservation "athlon_idirect_loadmov" 3 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (and (eq_attr "type" "imov") (eq_attr "memory" "load"))) "athlon-direct,athlon-load") + (define_insn_reservation "athlon_idirect_load" 4 (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "athlon_decode" "direct") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "load")))) "athlon-direct,athlon-load,athlon-ieu") +(define_insn_reservation "athlon_idirect_load_amdfam10" 4 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "amdfam10_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-load,athlon-ieu") (define_insn_reservation "athlon_ivector_load" 6 (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "athlon_decode" "vector") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "load")))) "athlon-vector,athlon-load,athlon-ieu,athlon-ieu") +(define_insn_reservation "athlon_ivector_load_amdfam10" 6 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "amdfam10_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "load")))) + "athlon-vector,athlon-load,athlon-ieu,athlon-ieu") + (define_insn_reservation "athlon_idirect_movstore" 1 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (and (eq_attr "type" "imov") (eq_attr "memory" "store"))) "athlon-direct,athlon-agu,athlon-store") + (define_insn_reservation "athlon_idirect_both" 4 (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "athlon_decode" "direct") @@ -270,6 +321,15 @@ "athlon-direct,athlon-load, athlon-ieu,athlon-store, athlon-store") +(define_insn_reservation "athlon_idirect_both_amdfam10" 4 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "amdfam10_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "both")))) + "athlon-direct,athlon-load, + athlon-ieu,athlon-store, + athlon-store") + (define_insn_reservation "athlon_ivector_both" 6 (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "athlon_decode" "vector") @@ -279,6 +339,16 @@ athlon-ieu, athlon-ieu, athlon-store") +(define_insn_reservation "athlon_ivector_both_amdfam10" 6 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "amdfam10_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "both")))) + "athlon-vector,athlon-load, + athlon-ieu, + athlon-ieu, + athlon-store") + (define_insn_reservation "athlon_idirect_store" 1 (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "athlon_decode" "direct") @@ -286,6 +356,14 @@ (eq_attr "memory" "store")))) "athlon-direct,(athlon-ieu+athlon-agu), athlon-store") +(define_insn_reservation "athlon_idirect_store_amdfam10" 1 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "amdfam10_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "store")))) + "athlon-direct,(athlon-ieu+athlon-agu), + athlon-store") + (define_insn_reservation "athlon_ivector_store" 2 (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "athlon_decode" "vector") @@ -293,6 +371,13 @@ (eq_attr "memory" "store")))) "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu, athlon-store") +(define_insn_reservation "athlon_ivector_store_amdfam10" 2 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "amdfam10_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "store")))) + "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu, + athlon-store") ;; Athlon floatin point unit (define_insn_reservation "athlon_fldxf" 12 @@ -302,7 +387,7 @@ (eq_attr "mode" "XF")))) "athlon-vector,athlon-fpload2,athlon-fvector*9") (define_insn_reservation "athlon_fldxf_k8" 13 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "fmov") (and (eq_attr "memory" "load") (eq_attr "mode" "XF")))) @@ -314,7 +399,7 @@ (eq_attr "memory" "load"))) "athlon-direct,athlon-fpload,athlon-fany") (define_insn_reservation "athlon_fld_k8" 2 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "fmov") (eq_attr "memory" "load"))) "athlon-direct,athlon-fploadk8,athlon-fstore") @@ -326,7 +411,7 @@ (eq_attr "mode" "XF")))) "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))") (define_insn_reservation "athlon_fstxf_k8" 8 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "fmov") (and (eq_attr "memory" "store,both") (eq_attr "mode" "XF")))) @@ -337,16 +422,16 @@ (eq_attr "memory" "store,both"))) "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") (define_insn_reservation "athlon_fst_k8" 2 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both"))) "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") (define_insn_reservation "athlon_fist" 4 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (eq_attr "type" "fistp,fisttp")) "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") (define_insn_reservation "athlon_fmov" 2 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (eq_attr "type" "fmov")) "athlon-direct,athlon-fpsched,athlon-faddmul") (define_insn_reservation "athlon_fadd_load" 4 @@ -355,12 +440,12 @@ (eq_attr "memory" "load"))) "athlon-direct,athlon-fpload,athlon-fadd") (define_insn_reservation "athlon_fadd_load_k8" 6 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "fop") (eq_attr "memory" "load"))) "athlon-direct,athlon-fploadk8,athlon-fadd") (define_insn_reservation "athlon_fadd" 4 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (eq_attr "type" "fop")) "athlon-direct,athlon-fpsched,athlon-fadd") (define_insn_reservation "athlon_fmul_load" 4 @@ -369,16 +454,16 @@ (eq_attr "memory" "load"))) "athlon-direct,athlon-fpload,athlon-fmul") (define_insn_reservation "athlon_fmul_load_k8" 6 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "fmul") (eq_attr "memory" "load"))) "athlon-direct,athlon-fploadk8,athlon-fmul") (define_insn_reservation "athlon_fmul" 4 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (eq_attr "type" "fmul")) "athlon-direct,athlon-fpsched,athlon-fmul") (define_insn_reservation "athlon_fsgn" 2 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (eq_attr "type" "fsgn")) "athlon-direct,athlon-fpsched,athlon-fmul") (define_insn_reservation "athlon_fdiv_load" 24 @@ -387,7 +472,7 @@ (eq_attr "memory" "load"))) "athlon-direct,athlon-fpload,athlon-fmul") (define_insn_reservation "athlon_fdiv_load_k8" 13 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "fdiv") (eq_attr "memory" "load"))) "athlon-direct,athlon-fploadk8,athlon-fmul") @@ -396,16 +481,16 @@ (eq_attr "type" "fdiv")) "athlon-direct,athlon-fpsched,athlon-fmul") (define_insn_reservation "athlon_fdiv_k8" 11 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (eq_attr "type" "fdiv")) "athlon-direct,athlon-fpsched,athlon-fmul") (define_insn_reservation "athlon_fpspc_load" 103 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (and (eq_attr "type" "fpspc") (eq_attr "memory" "load"))) "athlon-vector,athlon-fpload,athlon-fvector") (define_insn_reservation "athlon_fpspc" 100 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (eq_attr "type" "fpspc")) "athlon-vector,athlon-fpsched,athlon-fvector") (define_insn_reservation "athlon_fcmov_load" 7 @@ -418,12 +503,12 @@ (eq_attr "type" "fcmov")) "athlon-vector,athlon-fpsched,athlon-fvector") (define_insn_reservation "athlon_fcmov_load_k8" 17 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "fcmov") (eq_attr "memory" "load"))) "athlon-vector,athlon-fploadk8,athlon-fvector") (define_insn_reservation "athlon_fcmov_k8" 15 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (eq_attr "type" "fcmov")) "athlon-vector,athlon-fpsched,athlon-fvector") ;; fcomi is vector decoded by uses only one pipe. @@ -434,13 +519,13 @@ (eq_attr "memory" "load")))) "athlon-vector,athlon-fpload,athlon-fadd") (define_insn_reservation "athlon_fcomi_load_k8" 5 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "fcmp") (and (eq_attr "athlon_decode" "vector") (eq_attr "memory" "load")))) "athlon-vector,athlon-fploadk8,athlon-fadd") (define_insn_reservation "athlon_fcomi" 3 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (and (eq_attr "athlon_decode" "vector") (eq_attr "type" "fcmp"))) "athlon-vector,athlon-fpsched,athlon-fadd") @@ -450,18 +535,18 @@ (eq_attr "memory" "load"))) "athlon-direct,athlon-fpload,athlon-fadd") (define_insn_reservation "athlon_fcom_load_k8" 4 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "fcmp") (eq_attr "memory" "load"))) "athlon-direct,athlon-fploadk8,athlon-fadd") (define_insn_reservation "athlon_fcom" 2 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (eq_attr "type" "fcmp")) "athlon-direct,athlon-fpsched,athlon-fadd") ;; Never seen by the scheduler because we still don't do post reg-stack ;; scheduling. ;(define_insn_reservation "athlon_fxch" 2 -; (and (eq_attr "cpu" "athlon,k8,generic64") +; (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") ; (eq_attr "type" "fxch")) ; "athlon-direct,athlon-fpsched,athlon-fany") @@ -516,6 +601,23 @@ (and (eq_attr "type" "mmxmov,ssemov") (eq_attr "memory" "load"))) "athlon-direct,athlon-fploadk8,athlon-fstore") +;; On AMDFAM10 all double, single and integer packed and scalar SSEx data +;; loads generated are direct path, latency of 2 and do not use any FP +;; executions units. No seperate entries for movlpx/movhpx loads, which +;; are direct path, latency of 4 and use the FADD/FMUL FP execution units, +;; as they will not be generated. +(define_insn_reservation "athlon_sseld_amdfam10" 2 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssemov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8") +;; On AMDFAM10 MMX data loads generated are direct path, latency of 4 +;; and can use any FP executions units +(define_insn_reservation "athlon_mmxld_amdfam10" 4 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "mmxmov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8, athlon-fany") (define_insn_reservation "athlon_mmxssest" 3 (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "mmxmov,ssemov") @@ -533,6 +635,25 @@ (and (eq_attr "type" "mmxmov,ssemov") (eq_attr "memory" "store,both"))) "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") +;; On AMDFAM10 all double, single and integer packed SSEx data stores +;; generated are all double path, latency of 2 and use the FSTORE FP +;; execution unit. No entries seperate for movupx/movdqu, which are +;; vector path, latency of 3 and use the FSTORE*2 FP execution unit, +;; as they will not be generated. +(define_insn_reservation "athlon_ssest_amdfam10" 2 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssemov") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "store,both")))) + "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)") +;; On AMDFAM10 all double, single and integer scalar SSEx and MMX +;; data stores generated are all direct path, latency of 2 and use +;; the FSTORE FP execution unit +(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "mmxmov,ssemov") + (eq_attr "memory" "store,both"))) + "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") (define_insn_reservation "athlon_movaps_k8" 2 (and (eq_attr "cpu" "k8,generic64") (and (eq_attr "type" "ssemov") @@ -578,6 +699,11 @@ (and (eq_attr "type" "sselog,sselog1") (eq_attr "memory" "load"))) "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") +(define_insn_reservation "athlon_sselog_load_amdfam10" 4 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sselog,sselog1") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)") (define_insn_reservation "athlon_sselog" 3 (and (eq_attr "cpu" "athlon") (eq_attr "type" "sselog,sselog1")) @@ -586,6 +712,11 @@ (and (eq_attr "cpu" "k8,generic64") (eq_attr "type" "sselog,sselog1")) "athlon-double,athlon-fpsched,athlon-fmul") +(define_insn_reservation "athlon_sselog_amdfam10" 2 + (and (eq_attr "cpu" "amdfam10") + (eq_attr "type" "sselog,sselog1")) + "athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)") + ;; ??? pcmp executes in addmul, probably not worthwhile to bother about that. (define_insn_reservation "athlon_ssecmp_load" 2 (and (eq_attr "cpu" "athlon") @@ -594,13 +725,13 @@ (eq_attr "memory" "load")))) "athlon-direct,athlon-fpload,athlon-fadd") (define_insn_reservation "athlon_ssecmp_load_k8" 4 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "ssecmp") (and (eq_attr "mode" "SF,DF,DI,TI") (eq_attr "memory" "load")))) "athlon-direct,athlon-fploadk8,athlon-fadd") (define_insn_reservation "athlon_ssecmp" 2 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (and (eq_attr "type" "ssecmp") (eq_attr "mode" "SF,DF,DI,TI"))) "athlon-direct,athlon-fpsched,athlon-fadd") @@ -614,6 +745,11 @@ (and (eq_attr "type" "ssecmp") (eq_attr "memory" "load"))) "athlon-double,athlon-fpload2k8,(athlon-fadd*2)") +(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecmp") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fadd") (define_insn_reservation "athlon_ssecmpvector" 3 (and (eq_attr "cpu" "athlon") (eq_attr "type" "ssecmp")) @@ -622,6 +758,10 @@ (and (eq_attr "cpu" "k8,generic64") (eq_attr "type" "ssecmp")) "athlon-double,athlon-fpsched,(athlon-fadd*2)") +(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2 + (and (eq_attr "cpu" "amdfam10") + (eq_attr "type" "ssecmp")) + "athlon-direct,athlon-fpsched,athlon-fadd") (define_insn_reservation "athlon_ssecomi_load" 4 (and (eq_attr "cpu" "athlon") (and (eq_attr "type" "ssecomi") @@ -632,10 +772,20 @@ (and (eq_attr "type" "ssecomi") (eq_attr "memory" "load"))) "athlon-vector,athlon-fploadk8,athlon-fadd") +(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fadd") (define_insn_reservation "athlon_ssecomi" 4 (and (eq_attr "cpu" "athlon,k8,generic64") (eq_attr "type" "ssecmp")) "athlon-vector,athlon-fpsched,athlon-fadd") +(define_insn_reservation "athlon_ssecomi_amdfam10" 3 + (and (eq_attr "cpu" "amdfam10") +;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10 + (eq_attr "type" "ssecomi")) + "athlon-direct,athlon-fpsched,athlon-fadd") (define_insn_reservation "athlon_sseadd_load" 4 (and (eq_attr "cpu" "athlon") (and (eq_attr "type" "sseadd") @@ -643,13 +793,13 @@ (eq_attr "memory" "load")))) "athlon-direct,athlon-fpload,athlon-fadd") (define_insn_reservation "athlon_sseadd_load_k8" 6 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "sseadd") (and (eq_attr "mode" "SF,DF,DI") (eq_attr "memory" "load")))) "athlon-direct,athlon-fploadk8,athlon-fadd") (define_insn_reservation "athlon_sseadd" 4 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (and (eq_attr "type" "sseadd") (eq_attr "mode" "SF,DF,DI"))) "athlon-direct,athlon-fpsched,athlon-fadd") @@ -663,6 +813,11 @@ (and (eq_attr "type" "sseadd") (eq_attr "memory" "load"))) "athlon-double,athlon-fpload2k8,(athlon-fadd*2)") +(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseadd") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fadd") (define_insn_reservation "athlon_sseaddvector" 5 (and (eq_attr "cpu" "athlon") (eq_attr "type" "sseadd")) @@ -671,6 +826,10 @@ (and (eq_attr "cpu" "k8,generic64") (eq_attr "type" "sseadd")) "athlon-double,athlon-fpsched,(athlon-fadd*2)") +(define_insn_reservation "athlon_sseaddvector_amdfam10" 4 + (and (eq_attr "cpu" "amdfam10") + (eq_attr "type" "sseadd")) + "athlon-direct,athlon-fpsched,athlon-fadd") ;; Conversions behaves very irregularly and the scheduling is critical here. ;; Take each instruction separately. Assume that the mode is always set to the @@ -684,12 +843,25 @@ (and (eq_attr "mode" "DF") (eq_attr "memory" "load"))))) "athlon-direct,athlon-fploadk8,athlon-fstore") +(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "DF") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") (define_insn_reservation "athlon_ssecvt_cvtss2sd" 2 (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "ssecvt") (and (eq_attr "athlon_decode" "direct") (eq_attr "mode" "DF")))) "athlon-direct,athlon-fpsched,athlon-fstore") +(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "amdfam10_decode" "vector") + (eq_attr "mode" "DF")))) + "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)") ;; cvtps2pd. Model same way the other double decoded FP conversions. (define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5 (and (eq_attr "cpu" "k8,athlon,generic64") @@ -698,12 +870,25 @@ (and (eq_attr "mode" "V2DF,V4SF,TI") (eq_attr "memory" "load"))))) "athlon-double,athlon-fpload2k8,(athlon-fstore*2)") +(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "amdfam10_decode" "direct") + (and (eq_attr "mode" "V2DF,V4SF,TI") + (eq_attr "memory" "load"))))) + "athlon-direct,athlon-fploadk8,athlon-fstore") (define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3 (and (eq_attr "cpu" "k8,athlon,generic64") (and (eq_attr "type" "ssecvt") (and (eq_attr "athlon_decode" "double") (eq_attr "mode" "V2DF,V4SF,TI")))) "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore") +(define_insn_reservation "athlon_ssecvt_cvtps2pd_amdfam10" 2 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "amdfam10_decode" "direct") + (eq_attr "mode" "V2DF,V4SF,TI")))) + "athlon-direct,athlon-fpsched,athlon-fstore") ;; cvtsi2sd mem,reg is directpath path (cvtsi2sd reg,reg is doublepath) ;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6 (define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6 @@ -713,6 +898,13 @@ (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "load"))))) "athlon-direct,athlon-fploadk8,athlon-fstore") +(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load_amdfam10" 9 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") ;; cvtsi2ss mem, reg is doublepath (define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9 (and (eq_attr "cpu" "athlon") @@ -728,6 +920,13 @@ (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "load"))))) "athlon-double,athlon-fploadk8,(athlon-fstore*2)") +(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_amdfam10" 9 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") ;; cvtsi2sd reg,reg is double decoded (vector on Athlon) (define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11 (and (eq_attr "cpu" "k8,athlon,generic64") @@ -736,6 +935,13 @@ (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "none"))))) "athlon-double,athlon-fploadk8,athlon-fstore") +(define_insn_reservation "athlon_sseicvt_cvtsi2sd_amdfam10" 14 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "amdfam10_decode" "vector") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "none"))))) + "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") ;; cvtsi2ss reg, reg is doublepath (define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14 (and (eq_attr "cpu" "athlon,k8,generic64") @@ -744,6 +950,13 @@ (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "none"))))) "athlon-vector,athlon-fploadk8,(athlon-fvector*2)") +(define_insn_reservation "athlon_sseicvt_cvtsi2ss_amdfam10" 14 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "amdfam10_decode" "vector") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "none"))))) + "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") ;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9 (define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9 (and (eq_attr "cpu" "k8,athlon,generic64") @@ -752,6 +965,13 @@ (and (eq_attr "mode" "SF") (eq_attr "memory" "load"))))) "athlon-double,athlon-fploadk8,(athlon-fstore*3)") +(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_amdfam10" 9 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "SF") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") ;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12 (define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12 (and (eq_attr "cpu" "athlon,k8,generic64") @@ -760,6 +980,13 @@ (and (eq_attr "mode" "SF") (eq_attr "memory" "none"))))) "athlon-vector,athlon-fpsched,(athlon-fvector*3)") +(define_insn_reservation "athlon_ssecvt_cvtsd2ss_amdfam10" 8 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "amdfam10_decode" "vector") + (and (eq_attr "mode" "SF") + (eq_attr "memory" "none"))))) + "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)") (define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8 (and (eq_attr "cpu" "athlon,k8,generic64") (and (eq_attr "type" "ssecvt") @@ -767,6 +994,13 @@ (and (eq_attr "mode" "V4SF,V2DF,TI") (eq_attr "memory" "load"))))) "athlon-double,athlon-fpload2k8,(athlon-fstore*3)") +(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_amdfam10" 9 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") ;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10 ;; ??? Why it is fater than cvtsd2ss? (define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8 @@ -776,6 +1010,13 @@ (and (eq_attr "mode" "V4SF,V2DF,TI") (eq_attr "memory" "none"))))) "athlon-vector,athlon-fpsched,athlon-fvector*2") +(define_insn_reservation "athlon_ssecvt_cvtpd2ps_amdfam10" 7 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "none"))))) + "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)") ;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9 (define_insn_reservation "athlon_secvt_cvtsX2si_load" 9 (and (eq_attr "cpu" "athlon,k8,generic64") @@ -784,6 +1025,13 @@ (and (eq_attr "mode" "SI,DI") (eq_attr "memory" "load"))))) "athlon-vector,athlon-fploadk8,athlon-fvector") +(define_insn_reservation "athlon_secvt_cvtsX2si_load_amdfam10" 10 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "SI,DI") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-fadd+athlon-fstore)") ;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9 (define_insn_reservation "athlon_ssecvt_cvtsX2si" 9 (and (eq_attr "cpu" "athlon") @@ -799,6 +1047,29 @@ (and (eq_attr "mode" "SI,DI") (eq_attr "memory" "none"))))) "athlon-double,athlon-fpsched,athlon-fstore") +(define_insn_reservation "athlon_ssecvt_cvtsX2si_amdfam10" 8 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "SI,DI") + (eq_attr "memory" "none"))))) + "athlon-double,athlon-fpsched,(athlon-fadd+athlon-fstore)") +;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 9 on amdfam10 +(define_insn_reservation "athlon_sseicvt_cvtpd2dq_load_amdfam10" 9 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "TI") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") +;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 7 on amdfam10 +(define_insn_reservation "athlon_sseicvt_cvtpd2dq_amdfam10" 7 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "TI") + (eq_attr "memory" "none"))))) + "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)") (define_insn_reservation "athlon_ssemul_load" 4 @@ -808,13 +1079,13 @@ (eq_attr "memory" "load")))) "athlon-direct,athlon-fpload,athlon-fmul") (define_insn_reservation "athlon_ssemul_load_k8" 6 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "ssemul") (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "load")))) "athlon-direct,athlon-fploadk8,athlon-fmul") (define_insn_reservation "athlon_ssemul" 4 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (and (eq_attr "type" "ssemul") (eq_attr "mode" "SF,DF"))) "athlon-direct,athlon-fpsched,athlon-fmul") @@ -828,6 +1099,11 @@ (and (eq_attr "type" "ssemul") (eq_attr "memory" "load"))) "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") +(define_insn_reservation "athlon_ssemulvector_load_amdfam10" 6 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssemul") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fmul") (define_insn_reservation "athlon_ssemulvector" 5 (and (eq_attr "cpu" "athlon") (eq_attr "type" "ssemul")) @@ -836,6 +1112,10 @@ (and (eq_attr "cpu" "k8,generic64") (eq_attr "type" "ssemul")) "athlon-double,athlon-fpsched,(athlon-fmul*2)") +(define_insn_reservation "athlon_ssemulvector_amdfam10" 4 + (and (eq_attr "cpu" "amdfam10") + (eq_attr "type" "ssemul")) + "athlon-direct,athlon-fpsched,athlon-fmul") ;; divsd timings. divss is faster (define_insn_reservation "athlon_ssediv_load" 20 (and (eq_attr "cpu" "athlon") @@ -844,13 +1124,13 @@ (eq_attr "memory" "load")))) "athlon-direct,athlon-fpload,athlon-fmul*17") (define_insn_reservation "athlon_ssediv_load_k8" 22 - (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "cpu" "k8,generic64,amdfam10") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "load")))) "athlon-direct,athlon-fploadk8,athlon-fmul*17") (define_insn_reservation "athlon_ssediv" 20 - (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") (and (eq_attr "type" "ssediv") (eq_attr "mode" "SF,DF"))) "athlon-direct,athlon-fpsched,athlon-fmul*17") @@ -864,6 +1144,11 @@ (and (eq_attr "type" "ssediv") (eq_attr "memory" "load"))) "athlon-double,athlon-fpload2k8,athlon-fmul*34") +(define_insn_reservation "athlon_ssedivvector_load_amdfam10" 22 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fmul*17") (define_insn_reservation "athlon_ssedivvector" 39 (and (eq_attr "cpu" "athlon") (eq_attr "type" "ssediv")) @@ -872,3 +1157,12 @@ (and (eq_attr "cpu" "k8,generic64") (eq_attr "type" "ssediv")) "athlon-double,athlon-fmul*34") +(define_insn_reservation "athlon_ssedivvector_amdfam10" 20 + (and (eq_attr "cpu" "amdfam10") + (eq_attr "type" "ssediv")) + "athlon-direct,athlon-fmul*17") +(define_insn_reservation "athlon_sseins_amdfam10" 5 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseins") + (eq_attr "mode" "TI"))) + "athlon-vector,athlon-fpsched,athlon-faddmul") |