1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
|
// Performance counter class for PS2
// Nov 2001 - Amit Bakshi
#ifndef PS2PERF_HPP
#define PS2PERF_HPP
/************************************************************************************/
// nov6/2001 amb - Performance Counter class
// based on David Coombes' (david_coombes@playstation.sony.com) code
// sample usage :
//
// void TestSinCos()
// {
// ps2Perf perf("mySinCos");
//
// for(int i =0; i < 16; i++)
// {
// perf.StartSample();
//
// float s,c;
// mySinCos( i, &s , &c );
//
// perf.EndSample();
// }
// perf.PrintStats();
// };
/************************************************************************************/
#define ClocksToMs 294912
inline unsigned int GetCycleCounter(void)
{
unsigned int ret;
asm __volatile__ ("mfc0 %0,$9" : "=r" (ret) );
return ret;
}
// amb nov6/2001 - caution : do not call in middle of the code,
// it'll mess up PDDI stats
inline void ResetCycleCounter(void)
{
asm __volatile__ ("mtc0 $0,$9 ");
};
class ps2Perf
{
public:
ps2Perf(const char* what)
{
strncpy( desc,what,sizeof(desc)-1);
Reset();
};
~ps2Perf()
{
asm __volatile__ ("mtps $0,0");
};
inline void Reset()
{
count = 0;
for(int i = 0; i < PC0_NO_EVENT; i++)
{
pc0[i].min = pc1[i].min = 0xffff;
pc0[i].max = pc1[i].max = 0;
pc0[i].cur = pc1[i].cur = 0;
pc0[i].tot = pc1[i].tot = 0;
pc0[i].num = pc1[i].num = 0;
}
}
inline void BeginSample()
{
int evt = count % PC0_NO_EVENT;
BeginSample(evt);
};
inline void BeginSample(int evt)
{
pccr.cl0 = 0x8; // only user mode
pccr.event0 = evt;
pccr.cl1 = 0x8;
pccr.event1 = evt;
pccr.cte = 1;
int hack = *((int*)(&pccr));
asm __volatile__("
.set noreorder
.set noat
mtps $0,0 # halt performance counters
sync.p #
mtpc $0,0 # set perfcounter 0 to zero
mtpc $0,1 # set perfcounter 1 to zero
sync.p #
mtps %0,0 # truly - we rule ( well stewart does anyway...)
.set reorder
.set at
"
: // no output
: "r"(hack)
);
}
inline void EndSample()
{
int evt = count % PC0_NO_EVENT;
EndSample(evt);
count++;
};
inline void EndSample(int evt)
{
register unsigned int ret_pc0=0;
register unsigned int ret_pc1=0;
asm __volatile__("
.set noreorder
.set noat
mfpc %0,0
mfpc %1,1
.set reorder
.set at
":"=r"(ret_pc0),"=r"(ret_pc1));
if(ret_pc0<pc0[evt].min) pc0[evt].min = ret_pc0;
if(ret_pc0>pc0[evt].max) pc0[evt].max = ret_pc0;
pc0[evt].cur = ret_pc0;
pc0[evt].tot+= ret_pc0;
pc0[evt].num++;
if(ret_pc1<pc1[evt].min) pc1[evt].min = ret_pc1;
if(ret_pc1>pc1[evt].max) pc1[evt].max = ret_pc1;
pc1[evt].cur = ret_pc1;
pc1[evt].tot+= ret_pc1;
pc1[evt].num++;
asm __volatile__ ("mtps $0,0");
};
inline void StopCounters()
{
asm __volatile__ ("mtps $0,0");
}
void PrintStats()
{
#define PRINT_STAT_0(desc,i) if (pc0[i].num) printf(desc "%6d, %6d, %6d, %6d\n",pc0[i].min, pc0[i].max, pc0[i].cur, pc0[i].tot/pc0[i].num);
#define PRINT_STAT_1(desc,i) if (pc1[i].num) printf(desc "%6d, %6d, %6d, %6d\n",pc1[i].min, pc1[i].max, pc1[i].cur, pc1[i].tot/pc1[i].num);
printf("==== %s ====(total iterations)%d (per event)%d (frame%%) ===========\n",desc,count,(count/PC0_NO_EVENT));
printf("Event , min, max, cur, ave \n");
PRINT_STAT_0("Processor cycle ,", 1 );
PRINT_STAT_0("Single instructions issue ,", 2 );
PRINT_STAT_0("Branch issued ,", 3 );
PRINT_STAT_0("BTAC miss ,", 4 );
PRINT_STAT_0("ITLB miss ,", 5 );
PRINT_STAT_0("Instruction cache miss ,", 6 );
PRINT_STAT_0("DTLB accessed ,", 7 );
PRINT_STAT_0("Non-blocking load ,", 8 );
PRINT_STAT_0("WBB single request ,", 9 );
PRINT_STAT_0("WBB burst request ,",10 );
PRINT_STAT_0("CPU address bus busy ,",11 );
PRINT_STAT_0("Instruction completed ,",12 );
PRINT_STAT_0("Non-BDS instruction completed ,",13 );
PRINT_STAT_0("COP2 instruction completed ,",14 );
PRINT_STAT_0("Load completed ,",15 );
PRINT_STAT_1("Low-order branch issued ,", 0 );
PRINT_STAT_1("Processor cycle ,", 1 );
PRINT_STAT_1("Dual instructions issue ,", 2 );
PRINT_STAT_1("Branch miss-predicted ,", 3 );
PRINT_STAT_1("TLB miss ,", 4 );
PRINT_STAT_1("DTLB miss ,", 5 );
PRINT_STAT_1("Data cache miss ,", 6 );
PRINT_STAT_1("WBB single request unavailable,", 7 );
PRINT_STAT_1("WBB burst request unavailable ,", 8 );
PRINT_STAT_1("WBB burst request almost full ,", 9 );
PRINT_STAT_1("WBB burst request full ,",10 );
PRINT_STAT_1("CPU data bus busy ,",11 );
PRINT_STAT_1("Instruction completed ,",12 );
PRINT_STAT_1("Non-BDS instruction completed ,",13 );
PRINT_STAT_1("COP1 instruction completed ,",14 );
PRINT_STAT_1("Store completed ,",15 );
#undef PRINT_STAT_0
#undef PRINT_STAT_1
}
private:
enum PCOUNT0_EVENT // Performance Counter 0 Events
{
PC0_RESERVED =(0 ),
PC0_CPU_CYCLE =(1 ), // Processor cycle
PC0_SINGLE_ISSUE =(2 ), // Single instructions issue
PC0_BRANCH_ISSUED =(3 ), // Branch issued
PC0_BTAC_MISS =(4 ), // BTAC miss
PC0_ITLB_MISS =(5 ), // ITLB miss
PC0_ICACHE_MISS =(6 ), // Instruction cache miss
PC0_DTLB_ACCESSED =(7 ), // DTLB accessed
PC0_NONBLOCK_LOAD =(8 ), // Non-blocking load
PC0_WBB_SINGLE_REQ =(9 ), // WBB single request
PC0_WBB_BURST_REQ =(10), // WBB burst request
PC0_ADDR_BUS_BUSY =(11), // CPU address bus busy
PC0_INST_COMP =(12), // Instruction completed
PC0_NON_BDS_COMP =(13), // Non-BDS instruction completed
PC0_COP2_COMP =(14), // COP2 instruction completed
PC0_LOAD_COMP =(15), // Load completed
PC0_NO_EVENT =(16) // No event
};
enum PCOUNT1_EVENT // Performance Counter 1 Events
{
PC1_LOW_BRANCH_ISSUED =(0 ), // Low-order branch issued
PC1_CPU_CYCLE =(1 ), // Processor cycle
PC1_DUAL_ISSUE =(2 ), // Dual instructions issue
PC1_BRANCH_MISS_PREDICT =(3 ), // Branch miss-predicted
PC1_TLB_MISS =(4 ), // TLB miss
PC1_DTLB_MISS =(5 ), // DTLB miss
PC1_DCACHE_MISS =(6 ), // Data cache miss
PC1_WBB_SINGLE_UNAVAIL =(7 ), // WBB single request unavailable
PC1_WBB_BURST_UNAVAIL =(8 ), // WBB burst request unavailable
PC1_WBB_BURST_ALMOST =(9 ), // WBB burst request almost full
PC1_WBB_BURST_FULL =(10), // WBB burst request full
PC1_DATA_BUS_BUSY =(11), // CPU data bus busy
PC1_INST_COMP =(12), // Instruction completed
PC1_NON_BDS_COMP =(13), // Non-BDS instruction completed
PC1_COP1_COMP =(14), // COP1 instruction completed
PC1_STORE_COMP =(15), // Store completed
PC1_NO_EVENT =(16) // No event
};
struct count_t
{
unsigned int min,max,tot,cur,num;
};
// nov6/2001 amb - see p82 EE Core User's Manual 4.0
struct pccr_t
{
unsigned pad0:1; // unused
unsigned cl0:4; // events in which mode (eg user/kernel/super/exception)
unsigned event0:5; // event to count in counter 0 (see PCOUNT0_EVENT)
unsigned pad1:1; // unused
unsigned cl1:4; // events in which mode (eg user/kernel/super/exception)
unsigned event1:5; // event to count in counter 1 (see PCOUNT1_EVENT)
unsigned pad2:11; // unused
unsigned cte:1; // counter enable
};
char desc[32];
pccr_t pccr; // 16 events
count_t pc0[PC0_NO_EVENT];
count_t pc1[PC0_NO_EVENT];
unsigned int count;
};
#endif // PS2PERF_HPP
|