您的位置:首页 > 其它

Studying note of GCC-3.4.6 source (44)

2010-05-18 09:55 423 查看

4.2.3.3. Determine cost of moving data between registers

Following, init_reg_sets_1 evaluates the cost of moving data between registers of different class, then between register and memory.

init_reg_sets_1 (continue)

477 /* Initialize the move cost table. Find every subset of each class
478 and take the maximum cost of moving any subset to any other. */
479
480 for (m = 0; m < (unsigned int) MAX_MACHINE_MODE; m++)
481 if (allocatable_regs_of_mode [m])
482 {
483 for (i = 0; i < N_REG_CLASSES; i++)
484 if (contains_reg_of_mode [i][m])
485 for (j = 0; j < N_REG_CLASSES; j++)
486 {
487 int cost;
488 enum reg_class *p1, *p2;
489
490 if (!contains_reg_of_mode [j][m])
491 {
492 move_cost[m][i][j] = 65536;
493 may_move_in_cost[m][i][j] = 65536;
494 may_move_out_cost[m][i][j] = 65536;
495 }
496 else
497 {
498 cost = REGISTER_MOVE_COST (m, i, j);
499
500 for (p2 = ®_class_subclasses[j][0];
501 *p2 != LIM_REG_CLASSES;
502 p2++)
503 if (*p2 != i && contains_reg_of_mode [*p2][m])
504 cost = MAX (cost, move_cost [m][i][*p2]);
505
506 for (p1 = ®_class_subclasses[i][0];
507 *p1 != LIM_REG_CLASSES;
508 p1++)
509 if (*p1 != j && contains_reg_of_mode [*p1][m])
510 cost = MAX (cost, move_cost [m][*p1][j]);
511
512 move_cost[m][i][j] = cost;
513
514 if (reg_class_subset_p (i, j))
515 may_move_in_cost[m][i][j] = 0;
516 else
517 may_move_in_cost[m][i][j] = cost;
518
519 if (reg_class_subset_p (j, i))
520 may_move_out_cost[m][i][j] = 0;
521 else
522 may_move_out_cost[m][i][j] = cost;
523 }
524 }
525 else
526 for (j = 0; j < N_REG_CLASSES; j++)
527 {
528 move_cost[m][i][j] = 65536;
529 may_move_in_cost[m][i][j] = 65536;
530 may_move_out_cost[m][i][j] = 65536;
531 }
532 }
533 }

No doubt, at moving data between registers, it has different cost for different target. Some movings are not possible as the target is not allowed, and some may need temparory saved in memory. Above, move_cost is used to record maximum cost of moving from a register in one class to a register in another class. For permitted moving, macro REGISTER_MOVE_COST evaulates the cost.

2645 #define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2) / in i386.h
2646 ix86_register_move_cost ((MODE), (CLASS1), (CLASS2))

14880 int
14881 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1, in i386.c
14882 enum reg_class class2)
14883 {
14884 /* In case we require secondary memory, compute cost of the store followed
14885 by load. In order to avoid bad register allocation choices, we need
14886 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14887
14888 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14889 {
14890 int cost = 1;
14891
14892 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14893 MEMORY_MOVE_COST (mode, class1, 1));
14894 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14895 MEMORY_MOVE_COST (mode, class2, 1));
14896
14897 /* In case of copying from general_purpose_register we may emit multiple
14898 stores followed by single load causing memory size mismatch stall.
14899 Count this as arbitrarily high cost of 20. */
14900 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14901 cost += 20;
14902
14903 /* In the case of FP/MMX moves, the registers actually overlap, and we
14904 have to switch modes in order to treat them differently. */
14905 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14906 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14907 cost += 20;
14908
14909 return cost;
14910 }
14911
14912 /* Moves between SSE/MMX and integer unit are expensive. */
14913 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14914 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14915 return ix86_cost->mmxsse_to_integer;
14916 if (MAYBE_FLOAT_CLASS_P (class1))
14917 return ix86_cost->fp_move;
14918 if (MAYBE_SSE_CLASS_P (class1))
14919 return ix86_cost->sse_move;
14920 if (MAYBE_MMX_CLASS_P (class1))
14921 return ix86_cost->mmx_move;
14922 return 2;
14923 }

Above, at line 14888, ix86_secondary_memory_needed checks if memory needed for moving data between the two register classes.

14852 Int in i386.c
14853 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14854 enum machine_mode mode, int strict)
14855 {
14856 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14857 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14858 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14859 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14860 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14861 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14862 {
14863 if (strict)
14864 abort ();
14865 else
14866 return 1;
14867 }
14868 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14869 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14870 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14871 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14872 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14873 }

Note that the last parameter of the function is 0 here. And it is the only place this function is called. Macro MAYBE_FLOAT_CLASS_P checks if the class the register belongs to has intersection with FLOAT_REG class. And the macro FLOAT_CLASS_P checks if the class the register belongs to is contained by FLOAT_REG class. Other macros are similar. The definitions of FLOAT_CLASS_P and MAYBE_FLOAT_CLASS_P are as below:

1318 #define MAYBE_FLOAT_CLASS_P(CLASS) / in i386.h
1319 reg_classes_intersect_p ((CLASS), FLOAT_REGS)

1310 #define FLOAT_CLASS_P(CLASS) /
1311 reg_class_subset_p ((CLASS), FLOAT_REGS)

2545 int
2546 reg_classes_intersect_p (enum reg_class c1, enum reg_class c2) in regclass.c
2547 {
2548 HARD_REG_SET c;
2549
2550 if (c1 == c2) return 1;
2551
2552 if (c1 == ALL_REGS || c2 == ALL_REGS)
2553 return 1;
2554
2555 COPY_HARD_REG_SET (c, reg_class_contents[(int) c1]);
2556 AND_HARD_REG_SET (c, reg_class_contents[(int) c2]);
2557
2558 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
2559 return 1;
2560
2561 lose:
2562 return 0;
2563 }

2529 int
2530 reg_class_subset_p (enum reg_class c1, enum reg_class c2) in regclass.c
2531 {
2532 if (c1 == c2) return 1;
2533
2534 if (c2 == ALL_REGS)
2535 win:
2536 return 1;
2537 GO_IF_HARD_REG_SUBSET (reg_class_contents[(int) c1],
2538 reg_class_contents[(int) c2],
2539 win);
2540 return 0;
2541 }

So in ix86_register_move_cost, at line 14888, if class1 has not intersection with the class which class2 belongs to, or vice versa; or if classes1 and class2 don’t belong to the same class, ix86_secondary_memory_needed will return true, and we need memory for the moving between these reigsters. The system first needs place the value into memory from register1, then put this value from memory into register2, as the encoding of the value for these two registers are different, direct copy is not appropriate. So we need to count the cost twice, one for moving out of the register1, the other for moving into register2. And being conservation, we just take the max cost operation of each class into account. Now let’s see how MEMORY_MOVE_COST works.

2656 #define MEMORY_MOVE_COST(MODE, CLASS, IN) / in i386.h
2657 ix86_memory_move_cost ((MODE), (CLASS), (IN))

14984 int in i386.c
14985 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14986 {
14987 if (FLOAT_CLASS_P (class))
14988 {
14989 int index;
14990 switch (mode)
14991 {
14992 case SFmode:
14993 index = 0;
14994 break;
14995 case DFmode:
14996 index = 1;
14997 break;
14998 case XFmode:
14999 index = 2;
15000 break;
15001 default:
15002 return 100;
15003 }
15004 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
15005 }
15006 if (SSE_CLASS_P (class))
15007 {
15008 int index;
15009 switch (GET_MODE_SIZE (mode))
15010 {
15011 case 4:
15012 index = 0;
15013 break;
15014 case 8:
15015 index = 1;
15016 break;
15017 case 16:
15018 index = 2;
15019 break;
15020 default:
15021 return 100;
15022 }
15023 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
15024 }
15025 if (MMX_CLASS_P (class))
15026 {
15027 int index;
15028 switch (GET_MODE_SIZE (mode))
15029 {
15030 case 4:
15031 index = 0;
15032 break;
15033 case 8:
15034 index = 1;
15035 break;
15036 default:
15037 return 100;
15038 }
15039 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
15040 }
15041 switch (GET_MODE_SIZE (mode))
15042 {
15043 case 1:
15044 if (in)
15045 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
15046 : ix86_cost->movzbl_load);
15047 else
15048 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
15049 : ix86_cost->int_store[0] + 4);
15050 break;
15051 case 2:
15052 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
15053 default:
15054 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15055 if (mode == TFmode)
15056 mode = XFmode;
15057 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15058 * (((int) GET_MODE_SIZE (mode)
15059 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15060 }
15061 }

The key data structure here is ix86_cost, which records cost relate to operation with certain processor. For x86 machine, we get following definition:

416 static const in i386.c
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
428 6, /* MOVE_RATIO */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
451 2, /* Branch cost */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
458 };
459
460 const struct processor_costs *ix86_cost = &pentium_cost;

And the definition of processor_costs is:

39 struct processor_costs { in i386.h
40 const int add; /* cost of an add instruction */
41 const int lea; /* cost of a lea instruction */
42 const int shift_var; /* variable shift costs */
43 const int shift_const; /* constant shift costs */
44 const int mult_init[5]; /* cost of starting a multiply
45 in QImode, HImode, SImode, DImode, TImode*/
46 const int mult_bit; /* cost of multiply per each bit set */
47 const int divide[5]; /* cost of a divide/mod
48 in QImode, HImode, SImode, DImode, TImode*/
49 int movsx; /* The cost of movsx operation. */
50 int movzx; /* The cost of movzx operation. */
51 const int large_insn; /* insns larger than this cost more */
52 const int move_ratio; /* The threshold of number of scalar
53 memory-to-memory move insns. */
54 const int movzbl_load; /* cost of loading using movzbl */
55 const int int_load[3]; /* cost of loading integer registers
56 in QImode, HImode and SImode relative
57 to reg-reg move (2). */
58 const int int_store[3]; /* cost of storing integer register
59 in QImode, HImode and SImode */
60 const int fp_move; /* cost of reg,reg fld/fst */
61 const int fp_load[3]; /* cost of loading FP register
62 in SFmode, DFmode and XFmode */
63 const int fp_store[3]; /* cost of storing FP register
64 in SFmode, DFmode and XFmode */
65 const int mmx_move; /* cost of moving MMX register. */
66 const int mmx_load[2]; /* cost of loading MMX register
67 in SImode and DImode */
68 const int mmx_store[2]; /* cost of storing MMX register
69 in SImode and DImode */
70 const int sse_move; /* cost of moving SSE register. */
71 const int sse_load[3]; /* cost of loading SSE register
72 in SImode, DImode and TImode*/
73 const int sse_store[3]; /* cost of storing SSE register
74 in SImode, DImode and TImode*/
75 const int mmxsse_to_integer; /* cost of moving mmxsse register to
76 integer and vice versa. */
77 const int prefetch_block; /* bytes moved to cache for prefetch. */
78 const int simultaneous_prefetches; /* number of parallel prefetch
79 operations. */
80 const int branch_cost; /* Default value for BRANCH_COST. */
81 const int fadd; /* cost of FADD and FSUB instructions. */
82 const int fmul; /* cost of FMUL instruction. */
83 const int fdiv; /* cost of FDIV instruction. */
84 const int fabs; /* cost of FABS instruction. */
85 const int fchs; /* cost of FCHS instruction. */
86 const int fsqrt; /* cost of FSQRT instruction. */
87 };

From this definition, we can know that most data must be predefined and offered beforehand. To do a compiler, we must know the cpu well.
Then at line 14892 and 14894 in ix86_register_move_cost, it gets the larger cost between two classes in two different directions as the wanted cost and saves it into move_cost.
At line 14900, macro CLASS_MAX_NREGS finds out maximum number of consecutive registers of CLASS needed to represent MODE. From line 14913, for case no secondary memory no needed, the data can be gotten from ix86_cost.The data moving between register of integer class has the smallest cost – 2. Notice that if at line 14888, ix86_secondary_memory_needed returns false, and if it satisfies condition at line 14913, it should be using MMX or SSE register to hold integer (it satisfies condition at line 14868 in ix86_secondary_memory_needed.
After finding out the move cost between register classes by REGISTER_MOVE_COST, selects maxium cost of subclasses of the classes involved as the move cost for the class.
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: