1 : /*
2 : * This code largely moved from arch/i386/kernel/time.c.
3 : * See comments there for proper credits.
4 : *
5 : * 2004-06-25 Jesper Juhl
6 : * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
7 : * failing to inline.
8 : */
9 :
10 : #include <linux/spinlock.h>
11 : #include <linux/init.h>
12 : #include <linux/timex.h>
13 : #include <linux/errno.h>
14 : #include <linux/cpufreq.h>
15 : #include <linux/string.h>
16 : #include <linux/jiffies.h>
17 :
18 : #include <asm/timer.h>
19 : #include <asm/io.h>
20 : /* processor.h for distable_tsc flag */
21 : #include <asm/processor.h>
22 :
23 : #include "io_ports.h"
24 : #include "mach_timer.h"
25 :
26 : #include <asm/hpet.h>
27 : #include <asm/i8253.h>
28 :
29 : #ifdef CONFIG_HPET_TIMER
30 : static unsigned long hpet_usec_quotient;
31 : static unsigned long hpet_last;
32 : static struct timer_opts timer_tsc;
33 : #endif
34 :
35 : static inline void cpufreq_delayed_get(void);
36 :
37 : int tsc_disable __devinitdata = 0;
38 :
39 : static int use_tsc;
40 : /* Number of usecs that the last interrupt was delayed */
41 : static int delay_at_last_interrupt;
42 :
43 : static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
44 : static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
45 : static unsigned long long monotonic_base;
46 : static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
47 :
48 : /* convert from cycles(64bits) => nanoseconds (64bits)
49 : * basic equation:
50 : * ns = cycles / (freq / ns_per_sec)
51 : * ns = cycles * (ns_per_sec / freq)
52 : * ns = cycles * (10^9 / (cpu_mhz * 10^6))
53 : * ns = cycles * (10^3 / cpu_mhz)
54 : *
55 : * Then we use scaling math (suggested by george@mvista.com) to get:
56 : * ns = cycles * (10^3 * SC / cpu_mhz) / SC
57 : * ns = cycles * cyc2ns_scale / SC
58 : *
59 : * And since SC is a constant power of two, we can convert the div
60 : * into a shift.
61 : * -johnstul@us.ibm.com "math is hard, lets go shopping!"
62 : */
63 : static unsigned long cyc2ns_scale;
64 : #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
65 :
66 : static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
67 1 : {
68 1 : cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
69 : }
70 :
71 : static inline unsigned long long cycles_2_ns(unsigned long long cyc)
72 123398 : {
73 61699 : return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
74 : }
75 :
76 : static int count2; /* counter for mark_offset_tsc() */
77 :
78 : /* Cached *multiplier* to convert TSC counts to microseconds.
79 : * (see the equation below).
80 : * Equal to 2^32 * (1 / (clocks per usec) ).
81 : * Initialized in time_init.
82 : */
83 : static unsigned long fast_gettimeoffset_quotient;
84 :
85 : static unsigned long get_offset_tsc(void)
86 8755 : {
87 8755 : register unsigned long eax, edx;
88 :
89 : /* Read the Time Stamp Counter */
90 :
91 8755 : rdtsc(eax,edx);
92 :
93 : /* .. relative to previous jiffy (32 bits is enough) */
94 8755 : eax -= last_tsc_low; /* tsc_low delta */
95 :
96 : /*
97 : * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
98 : * = (tsc_low delta) * (usecs_per_clock)
99 : * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
100 : *
101 : * Using a mull instead of a divl saves up to 31 clock cycles
102 : * in the critical path.
103 : */
104 :
105 8755 : __asm__("mull %2"
106 : :"=a" (eax), "=d" (edx)
107 : :"rm" (fast_gettimeoffset_quotient),
108 : "0" (eax));
109 :
110 : /* our adjusted time offset in microseconds */
111 8755 : return delay_at_last_interrupt + edx;
112 : }
113 :
114 : static unsigned long long monotonic_clock_tsc(void)
115 0 : {
116 0 : unsigned long long last_offset, this_offset, base;
117 0 : unsigned seq;
118 :
119 : /* atomically read monotonic base & last_offset */
120 0 : do {
121 0 : seq = read_seqbegin(&monotonic_lock);
122 0 : last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
123 0 : base = monotonic_base;
124 0 : } while (read_seqretry(&monotonic_lock, seq));
125 :
126 : /* Read the Time Stamp Counter */
127 0 : rdtscll(this_offset);
128 :
129 : /* return the value in ns */
130 0 : return base + cycles_2_ns(this_offset - last_offset);
131 : }
132 :
133 : /*
134 : * Scheduler clock - returns current time in nanosec units.
135 : */
136 : unsigned long long sched_clock(void)
137 18395 : {
138 18395 : unsigned long long this_offset;
139 :
140 : /*
141 : * In the NUMA case we dont use the TSC as they are not
142 : * synchronized across all CPUs.
143 : */
144 : #ifndef CONFIG_NUMA
145 18395 : if (!use_tsc)
146 : #endif
147 : /* no locking but a rare wrong value is not a big deal */
148 0 : return jiffies_64 * (1000000000 / HZ);
149 :
150 : /* Read the Time Stamp Counter */
151 18395 : rdtscll(this_offset);
152 :
153 : /* return the value in ns */
154 18395 : return cycles_2_ns(this_offset);
155 : }
156 :
157 : static void delay_tsc(unsigned long loops)
158 4461 : {
159 4461 : unsigned long bclock, now;
160 :
161 4461 : rdtscl(bclock);
162 15749 : do
163 : {
164 15749 : rep_nop();
165 15749 : rdtscl(now);
166 15749 : } while ((now-bclock) < loops);
167 : }
168 :
169 : #ifdef CONFIG_HPET_TIMER
170 : static void mark_offset_tsc_hpet(void)
171 : {
172 : unsigned long long this_offset, last_offset;
173 : unsigned long offset, temp, hpet_current;
174 :
175 : write_seqlock(&monotonic_lock);
176 : last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
177 : /*
178 : * It is important that these two operations happen almost at
179 : * the same time. We do the RDTSC stuff first, since it's
180 : * faster. To avoid any inconsistencies, we need interrupts
181 : * disabled locally.
182 : */
183 : /*
184 : * Interrupts are just disabled locally since the timer irq
185 : * has the SA_INTERRUPT flag set. -arca
186 : */
187 : /* read Pentium cycle counter */
188 :
189 : hpet_current = hpet_readl(HPET_COUNTER);
190 : rdtsc(last_tsc_low, last_tsc_high);
191 :
192 : /* lost tick compensation */
193 : offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
194 : if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
195 : int lost_ticks = (offset - hpet_last) / hpet_tick;
196 : jiffies_64 += lost_ticks;
197 : }
198 : hpet_last = hpet_current;
199 :
200 : /* update the monotonic base value */
201 : this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
202 : monotonic_base += cycles_2_ns(this_offset - last_offset);
203 : write_sequnlock(&monotonic_lock);
204 :
205 : /* calculate delay_at_last_interrupt */
206 : /*
207 : * Time offset = (hpet delta) * ( usecs per HPET clock )
208 : * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
209 : * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
210 : * Where,
211 : * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
212 : */
213 : delay_at_last_interrupt = hpet_current - offset;
214 : ASM_MUL64_REG(temp, delay_at_last_interrupt,
215 : hpet_usec_quotient, delay_at_last_interrupt);
216 : }
217 : #endif
218 :
219 :
220 : #ifdef CONFIG_CPU_FREQ
221 : #include <linux/workqueue.h>
222 :
223 : static unsigned int cpufreq_delayed_issched = 0;
224 : static unsigned int cpufreq_init = 0;
225 : static struct work_struct cpufreq_delayed_get_work;
226 :
227 : static void handle_cpufreq_delayed_get(void *v)
228 : {
229 : unsigned int cpu;
230 : for_each_online_cpu(cpu) {
231 : cpufreq_get(cpu);
232 : }
233 : cpufreq_delayed_issched = 0;
234 : }
235 :
236 : /* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
237 : * to verify the CPU frequency the timing core thinks the CPU is running
238 : * at is still correct.
239 : */
240 : static inline void cpufreq_delayed_get(void)
241 : {
242 : if (cpufreq_init && !cpufreq_delayed_issched) {
243 : cpufreq_delayed_issched = 1;
244 : printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
245 : schedule_work(&cpufreq_delayed_get_work);
246 : }
247 : }
248 :
249 : /* If the CPU frequency is scaled, TSC-based delays will need a different
250 : * loops_per_jiffy value to function properly.
251 : */
252 :
253 : static unsigned int ref_freq = 0;
254 : static unsigned long loops_per_jiffy_ref = 0;
255 :
256 : #ifndef CONFIG_SMP
257 : static unsigned long fast_gettimeoffset_ref = 0;
258 : static unsigned int cpu_khz_ref = 0;
259 : #endif
260 :
261 : static int
262 : time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
263 : void *data)
264 : {
265 : struct cpufreq_freqs *freq = data;
266 :
267 : if (val != CPUFREQ_RESUMECHANGE)
268 : write_seqlock_irq(&xtime_lock);
269 : if (!ref_freq) {
270 : ref_freq = freq->old;
271 : loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
272 : #ifndef CONFIG_SMP
273 : fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
274 : cpu_khz_ref = cpu_khz;
275 : #endif
276 : }
277 :
278 : if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
279 : (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
280 : (val == CPUFREQ_RESUMECHANGE)) {
281 : if (!(freq->flags & CPUFREQ_CONST_LOOPS))
282 : cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
283 : #ifndef CONFIG_SMP
284 : if (cpu_khz)
285 : cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
286 : if (use_tsc) {
287 : if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
288 : fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
289 : set_cyc2ns_scale(cpu_khz/1000);
290 : }
291 : }
292 : #endif
293 : }
294 :
295 : if (val != CPUFREQ_RESUMECHANGE)
296 : write_sequnlock_irq(&xtime_lock);
297 :
298 : return 0;
299 : }
300 :
301 : static struct notifier_block time_cpufreq_notifier_block = {
302 : .notifier_call = time_cpufreq_notifier
303 : };
304 :
305 :
306 : static int __init cpufreq_tsc(void)
307 : {
308 : int ret;
309 : INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
310 : ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
311 : CPUFREQ_TRANSITION_NOTIFIER);
312 : if (!ret)
313 : cpufreq_init = 1;
314 : return ret;
315 : }
316 : core_initcall(cpufreq_tsc);
317 :
318 : #else /* CONFIG_CPU_FREQ */
319 43302 : static inline void cpufreq_delayed_get(void) { return; }
320 : #endif
321 :
322 : int recalibrate_cpu_khz(void)
323 0 : {
324 : #ifndef CONFIG_SMP
325 : unsigned int cpu_khz_old = cpu_khz;
326 :
327 : if (cpu_has_tsc) {
328 : init_cpu_khz();
329 : cpu_data[0].loops_per_jiffy =
330 : cpufreq_scale(cpu_data[0].loops_per_jiffy,
331 : cpu_khz_old,
332 : cpu_khz);
333 : return 0;
334 : } else
335 : return -ENODEV;
336 : #else
337 0 : return -ENODEV;
338 : #endif
339 : }
340 : EXPORT_SYMBOL(recalibrate_cpu_khz);
341 :
342 : static void mark_offset_tsc(void)
343 43304 : {
344 43304 : unsigned long lost,delay;
345 43304 : unsigned long delta = last_tsc_low;
346 43304 : int count;
347 43304 : int countmp;
348 43304 : static int count1 = 0;
349 43304 : unsigned long long this_offset, last_offset;
350 43304 : static int lost_count = 0;
351 :
352 43304 : write_seqlock(&monotonic_lock);
353 43304 : last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
354 : /*
355 : * It is important that these two operations happen almost at
356 : * the same time. We do the RDTSC stuff first, since it's
357 : * faster. To avoid any inconsistencies, we need interrupts
358 : * disabled locally.
359 : */
360 :
361 : /*
362 : * Interrupts are just disabled locally since the timer irq
363 : * has the SA_INTERRUPT flag set. -arca
364 : */
365 :
366 : /* read Pentium cycle counter */
367 :
368 43304 : rdtsc(last_tsc_low, last_tsc_high);
369 :
370 43304 : spin_lock(&i8253_lock);
371 43304 : outb_p(0x00, PIT_MODE); /* latch the count ASAP */
372 :
373 43304 : count = inb_p(PIT_CH0); /* read the latched count */
374 43304 : count |= inb(PIT_CH0) << 8;
375 :
376 : /*
377 : * VIA686a test code... reset the latch if count > max + 1
378 : * from timer_pit.c - cjb
379 : */
380 43304 : if (count > LATCH) {
381 0 : outb_p(0x34, PIT_MODE);
382 0 : outb_p(LATCH & 0xff, PIT_CH0);
383 0 : outb(LATCH >> 8, PIT_CH0);
384 0 : count = LATCH - 1;
385 : }
386 :
387 43304 : spin_unlock(&i8253_lock);
388 :
389 43304 : if (pit_latch_buggy) {
390 : /* get center value of last 3 time lutch */
391 0 : if ((count2 >= count && count >= count1)
392 : || (count1 >= count && count >= count2)) {
393 0 : count2 = count1; count1 = count;
394 0 : } else if ((count1 >= count2 && count2 >= count)
395 : || (count >= count2 && count2 >= count1)) {
396 0 : countmp = count;count = count2;
397 0 : count2 = count1;count1 = countmp;
398 : } else {
399 0 : count2 = count1; count1 = count; count = count1;
400 : }
401 : }
402 :
403 : /* lost tick compensation */
404 43304 : delta = last_tsc_low - delta;
405 : {
406 43304 : register unsigned long eax, edx;
407 43304 : eax = delta;
408 43304 : __asm__("mull %2"
409 : :"=a" (eax), "=d" (edx)
410 : :"rm" (fast_gettimeoffset_quotient),
411 : "0" (eax));
412 43304 : delta = edx;
413 : }
414 43304 : delta += delay_at_last_interrupt;
415 43304 : lost = delta/(1000000/HZ);
416 43304 : delay = delta%(1000000/HZ);
417 43304 : if (lost >= 2) {
418 2 : jiffies_64 += lost-1;
419 :
420 : /* sanity check to ensure we're not always losing ticks */
421 2 : if (lost_count++ > 100) {
422 0 : printk(KERN_WARNING "Losing too many ticks!\n");
423 0 : printk(KERN_WARNING "TSC cannot be used as a timesource. \n");
424 0 : printk(KERN_WARNING "Possible reasons for this are:\n");
425 0 : printk(KERN_WARNING " You're running with Speedstep,\n");
426 0 : printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n");
427 0 : printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n");
428 0 : printk(KERN_WARNING "Falling back to a sane timesource now.\n");
429 :
430 0 : clock_fallback();
431 : }
432 : /* ... but give the TSC a fair chance */
433 2 : if (lost_count > 25)
434 43302 : cpufreq_delayed_get();
435 : } else
436 43302 : lost_count = 0;
437 : /* update the monotonic base value */
438 43304 : this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
439 43304 : monotonic_base += cycles_2_ns(this_offset - last_offset);
440 43304 : write_sequnlock(&monotonic_lock);
441 :
442 : /* calculate delay_at_last_interrupt */
443 43304 : count = ((LATCH-1) - count) * TICK_SIZE;
444 43304 : delay_at_last_interrupt = (count + LATCH/2) / LATCH;
445 :
446 : /* catch corner case where tick rollover occured
447 : * between tsc and pit reads (as noted when
448 : * usec delta is > 90% # of usecs/tick)
449 : */
450 43304 : if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
451 0 : jiffies_64++;
452 : }
453 :
454 : static int __init init_tsc(char* override)
455 1 : {
456 :
457 : /* check clock override */
458 1 : if (override[0] && strncmp(override,"tsc",3)) {
459 : #ifdef CONFIG_HPET_TIMER
460 : if (is_hpet_enabled()) {
461 : printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
462 : } else
463 : #endif
464 : {
465 0 : return -ENODEV;
466 : }
467 : }
468 :
469 : /*
470 : * If we have APM enabled or the CPU clock speed is variable
471 : * (CPU stops clock on HLT or slows clock to save power)
472 : * then the TSC timestamps may diverge by up to 1 jiffy from
473 : * 'real time' but nothing will break.
474 : * The most frequent case is that the CPU is "woken" from a halt
475 : * state by the timer interrupt itself, so we get 0 error. In the
476 : * rare cases where a driver would "wake" the CPU and request a
477 : * timestamp, the maximum error is < 1 jiffy. But timestamps are
478 : * still perfectly ordered.
479 : * Note that the TSC counter will be reset if APM suspends
480 : * to disk; this won't break the kernel, though, 'cuz we're
481 : * smart. See arch/i386/kernel/apm.c.
482 : */
483 : /*
484 : * Firstly we have to do a CPU check for chips with
485 : * a potentially buggy TSC. At this point we haven't run
486 : * the ident/bugs checks so we must run this hook as it
487 : * may turn off the TSC flag.
488 : *
489 : * NOTE: this doesn't yet handle SMP 486 machines where only
490 : * some CPU's have a TSC. Thats never worked and nobody has
491 : * moaned if you have the only one in the world - you fix it!
492 : */
493 :
494 1 : count2 = LATCH; /* initialize counter for mark_offset_tsc() */
495 :
496 1 : if (cpu_has_tsc) {
497 1 : unsigned long tsc_quotient;
498 : #ifdef CONFIG_HPET_TIMER
499 : if (is_hpet_enabled() && hpet_use_timer) {
500 : unsigned long result, remain;
501 : printk("Using TSC for gettimeofday\n");
502 : tsc_quotient = calibrate_tsc_hpet(NULL);
503 : timer_tsc.mark_offset = &mark_offset_tsc_hpet;
504 : /*
505 : * Math to calculate hpet to usec multiplier
506 : * Look for the comments at get_offset_tsc_hpet()
507 : */
508 : ASM_DIV64_REG(result, remain, hpet_tick,
509 : 0, KERNEL_TICK_USEC);
510 : if (remain > (hpet_tick >> 1))
511 : result++; /* rounding the result */
512 :
513 : hpet_usec_quotient = result;
514 : } else
515 : #endif
516 : {
517 1 : tsc_quotient = calibrate_tsc();
518 : }
519 :
520 1 : if (tsc_quotient) {
521 1 : fast_gettimeoffset_quotient = tsc_quotient;
522 1 : use_tsc = 1;
523 : /*
524 : * We could be more selective here I suspect
525 : * and just enable this for the next intel chips ?
526 : */
527 : /* report CPU clock rate in Hz.
528 : * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
529 : * clock/second. Our precision is about 100 ppm.
530 : */
531 1 : { unsigned long eax=0, edx=1000;
532 1 : __asm__("divl %2"
533 : :"=a" (cpu_khz), "=d" (edx)
534 : :"r" (tsc_quotient),
535 : "0" (eax), "1" (edx));
536 1 : printk("Detected %u.%03u MHz processor.\n",
537 : cpu_khz / 1000, cpu_khz % 1000);
538 : }
539 1 : set_cyc2ns_scale(cpu_khz/1000);
540 1 : return 0;
541 : }
542 : }
543 0 : return -ENODEV;
544 : }
545 :
546 : static int tsc_resume(void)
547 0 : {
548 0 : write_seqlock(&monotonic_lock);
549 : /* Assume this is the last mark offset time */
550 0 : rdtsc(last_tsc_low, last_tsc_high);
551 : #ifdef CONFIG_HPET_TIMER
552 : if (is_hpet_enabled() && hpet_use_timer)
553 : hpet_last = hpet_readl(HPET_COUNTER);
554 : #endif
555 0 : write_sequnlock(&monotonic_lock);
556 0 : return 0;
557 : }
558 :
559 : #ifndef CONFIG_X86_TSC
560 : /* disable flag for tsc. Takes effect by clearing the TSC cpu flag
561 : * in cpu/common.c */
562 : static int __init tsc_setup(char *str)
563 : {
564 : tsc_disable = 1;
565 : return 1;
566 : }
567 : #else
568 : static int __init tsc_setup(char *str)
569 0 : {
570 0 : printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
571 : "cannot disable TSC.\n");
572 0 : return 1;
573 : }
574 : #endif
575 : __setup("notsc", tsc_setup);
576 :
577 :
578 :
579 : /************************************************************/
580 :
581 : /* tsc timer_opts struct */
582 : static struct timer_opts timer_tsc = {
583 : .name = "tsc",
584 : .mark_offset = mark_offset_tsc,
585 : .get_offset = get_offset_tsc,
586 : .monotonic_clock = monotonic_clock_tsc,
587 : .delay = delay_tsc,
588 : .read_timer = read_timer_tsc,
589 : .resume = tsc_resume,
590 : };
591 :
592 : struct init_timer_opts __initdata timer_tsc_init = {
593 : .init = init_tsc,
594 : .opts = &timer_tsc,
595 : };
|