Ruby 3.2.1p31 (2023-02-08 revision 31819e82c88c6f8ecfaeb162519bfa26a14b21fd)
thread_pthread.c
1/* -*-c-*- */
2/**********************************************************************
3
4 thread_pthread.c -
5
6 $Author$
7
8 Copyright (C) 2004-2007 Koichi Sasada
9
10**********************************************************************/
11
12#ifdef THREAD_SYSTEM_DEPENDENT_IMPLEMENTATION
13
14#include "gc.h"
15#include "mjit.h"
16
17#ifdef HAVE_SYS_RESOURCE_H
18#include <sys/resource.h>
19#endif
20#ifdef HAVE_THR_STKSEGMENT
21#include <thread.h>
22#endif
23#if defined(HAVE_FCNTL_H)
24#include <fcntl.h>
25#elif defined(HAVE_SYS_FCNTL_H)
26#include <sys/fcntl.h>
27#endif
28#ifdef HAVE_SYS_PRCTL_H
29#include <sys/prctl.h>
30#endif
31#if defined(HAVE_SYS_TIME_H)
32#include <sys/time.h>
33#endif
34#if defined(__HAIKU__)
35#include <kernel/OS.h>
36#endif
37#ifdef __linux__
38#include <sys/syscall.h> /* for SYS_gettid */
39#endif
40#include <time.h>
41#include <signal.h>
42
43#if defined __APPLE__
44# include <AvailabilityMacros.h>
45#endif
46
47#if defined(HAVE_SYS_EVENTFD_H) && defined(HAVE_EVENTFD)
48# define USE_EVENTFD (1)
49# include <sys/eventfd.h>
50#else
51# define USE_EVENTFD (0)
52#endif
53
54#if defined(SIGVTALRM) && !defined(__CYGWIN__) && !defined(__EMSCRIPTEN__)
55# define USE_UBF_LIST 1
56#endif
57
58/*
59 * UBF_TIMER and ubf_list both use SIGVTALRM.
60 *
61 * UBF_TIMER has NOTHING to do with thread timeslices (TIMER_INTERRUPT_MASK)
62 *
63 * UBF_TIMER is to close TOCTTOU signal race on programs where we
64 * cannot rely on GVL contention (vm->gvl.timer) to perform wakeups
65 * while a thread is doing blocking I/O on sockets or pipes. With
66 * rb_thread_call_without_gvl and similar functions:
67 *
68 * (1) Check interrupts.
69 * (2) release GVL.
70 * (2a) signal received
71 * (3) call func with data1 (blocks for a long time without ubf_timer)
72 * (4) acquire GVL.
73 * Other Ruby threads can not run in parallel any more.
74 * (5) Check interrupts.
75 *
76 * We need UBF_TIMER to break out of (3) if (2a) happens.
77 *
78 * ubf_list wakeups may be triggered on gvl_yield.
79 *
80 * If we have vm->gvl.timer (on GVL contention), we don't need UBF_TIMER
81 * as it can perform the same tasks while doing timeslices.
82 */
83#define UBF_TIMER_NONE 0
84#define UBF_TIMER_POSIX 1
85#define UBF_TIMER_PTHREAD 2
86
87#ifndef UBF_TIMER
88# if defined(HAVE_TIMER_SETTIME) && defined(HAVE_TIMER_CREATE) && \
89 defined(CLOCK_MONOTONIC) && defined(USE_UBF_LIST)
90 /* preferred */
91# define UBF_TIMER UBF_TIMER_POSIX
92# elif defined(USE_UBF_LIST)
93 /* safe, but inefficient */
94# define UBF_TIMER UBF_TIMER_PTHREAD
95# else
96 /* we'll be racy without SIGVTALRM for ubf_list */
97# define UBF_TIMER UBF_TIMER_NONE
98# endif
99#endif
100
101struct rb_internal_thread_event_hook {
102 rb_internal_thread_event_callback callback;
103 rb_event_flag_t event;
104 void *user_data;
105
106 struct rb_internal_thread_event_hook *next;
107};
108
109static rb_internal_thread_event_hook_t *rb_internal_thread_event_hooks = NULL;
110static pthread_rwlock_t rb_internal_thread_event_hooks_rw_lock = PTHREAD_RWLOCK_INITIALIZER;
111
112#define RB_INTERNAL_THREAD_HOOK(event) if (rb_internal_thread_event_hooks) { rb_thread_execute_hooks(event); }
113
114rb_internal_thread_event_hook_t *
115rb_internal_thread_add_event_hook(rb_internal_thread_event_callback callback, rb_event_flag_t internal_event, void *user_data)
116{
117 rb_internal_thread_event_hook_t *hook = ALLOC_N(rb_internal_thread_event_hook_t, 1);
118 hook->callback = callback;
119 hook->user_data = user_data;
120 hook->event = internal_event;
121
122 int r;
123 if ((r = pthread_rwlock_wrlock(&rb_internal_thread_event_hooks_rw_lock))) {
124 rb_bug_errno("pthread_rwlock_wrlock", r);
125 }
126
127 hook->next = rb_internal_thread_event_hooks;
128 ATOMIC_PTR_EXCHANGE(rb_internal_thread_event_hooks, hook);
129
130 if ((r = pthread_rwlock_unlock(&rb_internal_thread_event_hooks_rw_lock))) {
131 rb_bug_errno("pthread_rwlock_unlock", r);
132 }
133 return hook;
134}
135
136bool
137rb_internal_thread_remove_event_hook(rb_internal_thread_event_hook_t * hook)
138{
139 int r;
140 if ((r = pthread_rwlock_wrlock(&rb_internal_thread_event_hooks_rw_lock))) {
141 rb_bug_errno("pthread_rwlock_wrlock", r);
142 }
143
144 bool success = FALSE;
145
146 if (rb_internal_thread_event_hooks == hook) {
147 ATOMIC_PTR_EXCHANGE(rb_internal_thread_event_hooks, hook->next);
148 success = TRUE;
149 }
150 else {
151 rb_internal_thread_event_hook_t *h = rb_internal_thread_event_hooks;
152
153 do {
154 if (h->next == hook) {
155 h->next = hook->next;
156 success = TRUE;
157 break;
158 }
159 } while ((h = h->next));
160 }
161
162 if ((r = pthread_rwlock_unlock(&rb_internal_thread_event_hooks_rw_lock))) {
163 rb_bug_errno("pthread_rwlock_unlock", r);
164 }
165
166 if (success) {
167 ruby_xfree(hook);
168 }
169 return success;
170}
171
172static void
173rb_thread_execute_hooks(rb_event_flag_t event)
174{
175 int r;
176 if ((r = pthread_rwlock_rdlock(&rb_internal_thread_event_hooks_rw_lock))) {
177 rb_bug_errno("pthread_rwlock_rdlock", r);
178 }
179
180 if (rb_internal_thread_event_hooks) {
181 rb_internal_thread_event_hook_t *h = rb_internal_thread_event_hooks;
182 do {
183 if (h->event & event) {
184 (*h->callback)(event, NULL, h->user_data);
185 }
186 } while((h = h->next));
187 }
188 if ((r = pthread_rwlock_unlock(&rb_internal_thread_event_hooks_rw_lock))) {
189 rb_bug_errno("pthread_rwlock_unlock", r);
190 }
191}
192
193enum rtimer_state {
194 /* alive, after timer_create: */
195 RTIMER_DISARM,
196 RTIMER_ARMING,
197 RTIMER_ARMED,
198
199 RTIMER_DEAD
200};
201
202#if UBF_TIMER == UBF_TIMER_POSIX
203static const struct itimerspec zero;
204static struct {
205 rb_atomic_t state_; /* rtimer_state */
206 rb_pid_t owner;
207 timer_t timerid;
208} timer_posix = {
209 /* .state = */ RTIMER_DEAD,
210};
211
212#define TIMER_STATE_DEBUG 0
213
214static const char *
215rtimer_state_name(enum rtimer_state state)
216{
217 switch (state) {
218 case RTIMER_DISARM: return "disarm";
219 case RTIMER_ARMING: return "arming";
220 case RTIMER_ARMED: return "armed";
221 case RTIMER_DEAD: return "dead";
222 default: rb_bug("unreachable");
223 }
224}
225
226static enum rtimer_state
227timer_state_exchange(enum rtimer_state state)
228{
229 enum rtimer_state prev = ATOMIC_EXCHANGE(timer_posix.state_, state);
230 if (TIMER_STATE_DEBUG) fprintf(stderr, "state (exc): %s->%s\n", rtimer_state_name(prev), rtimer_state_name(state));
231 return prev;
232}
233
234static enum rtimer_state
235timer_state_cas(enum rtimer_state expected_prev, enum rtimer_state state)
236{
237 enum rtimer_state prev = ATOMIC_CAS(timer_posix.state_, expected_prev, state);
238
239 if (TIMER_STATE_DEBUG) {
240 if (prev == expected_prev) {
241 fprintf(stderr, "state (cas): %s->%s\n", rtimer_state_name(prev), rtimer_state_name(state));
242 }
243 else {
244 fprintf(stderr, "state (cas): %s (expected:%s)\n", rtimer_state_name(prev), rtimer_state_name(expected_prev));
245 }
246 }
247
248 return prev;
249}
250
251#elif UBF_TIMER == UBF_TIMER_PTHREAD
252static void *timer_pthread_fn(void *);
253static struct {
254 int low[2];
255 rb_atomic_t armed; /* boolean */
256 rb_pid_t owner;
257 pthread_t thid;
258} timer_pthread = {
259 { -1, -1 },
260};
261#endif
262
263static const rb_hrtime_t *sigwait_timeout(rb_thread_t *, int sigwait_fd,
264 const rb_hrtime_t *,
265 int *drained_p);
266static void ubf_timer_disarm(void);
267static void threadptr_trap_interrupt(rb_thread_t *);
268static void ubf_wakeup_all_threads(void);
269static int ubf_threads_empty(void);
270
271#define TIMER_THREAD_CREATED_P() (signal_self_pipe.owner_process == getpid())
272
273/* for testing, and in case we come across a platform w/o pipes: */
274#define BUSY_WAIT_SIGNALS (0)
275
276/*
277 * sigwait_th is the thread which owns sigwait_fd and sleeps on it
278 * (using ppoll). MJIT worker can be sigwait_th==0, so we initialize
279 * it to THREAD_INVALID at startup and fork time. It is the ONLY thread
280 * allowed to read from sigwait_fd, otherwise starvation can occur.
281 */
282#define THREAD_INVALID ((const rb_thread_t *)-1)
283static const rb_thread_t *sigwait_th;
284
285#ifdef HAVE_SCHED_YIELD
286#define native_thread_yield() (void)sched_yield()
287#else
288#define native_thread_yield() ((void)0)
289#endif
290
291#if defined(HAVE_PTHREAD_CONDATTR_SETCLOCK) && \
292 defined(CLOCK_REALTIME) && defined(CLOCK_MONOTONIC) && \
293 defined(HAVE_CLOCK_GETTIME)
294static pthread_condattr_t condattr_mono;
295static pthread_condattr_t *condattr_monotonic = &condattr_mono;
296#else
297static const void *const condattr_monotonic = NULL;
298#endif
299
300/* 100ms. 10ms is too small for user level thread scheduling
301 * on recent Linux (tested on 2.6.35)
302 */
303#define TIME_QUANTUM_MSEC (100)
304#define TIME_QUANTUM_USEC (TIME_QUANTUM_MSEC * 1000)
305#define TIME_QUANTUM_NSEC (TIME_QUANTUM_USEC * 1000)
306
307static rb_hrtime_t native_cond_timeout(rb_nativethread_cond_t *, rb_hrtime_t);
308static int native_cond_timedwait(rb_nativethread_cond_t *cond, pthread_mutex_t *mutex, const rb_hrtime_t *abs);
309
310/*
311 * Designate the next sched.timer thread, favor the last thread in
312 * the readyq since it will be in readyq longest
313 */
314static int
315designate_timer_thread(struct rb_thread_sched *sched)
316{
317 rb_thread_t *last;
318
319 last = ccan_list_tail(&sched->readyq, rb_thread_t, sched.node.readyq);
320
321 if (last) {
322 rb_native_cond_signal(&last->nt->cond.readyq);
323 return TRUE;
324 }
325 else {
326 return FALSE;
327 }
328}
329
330/*
331 * We become designated timer thread to kick vm->gvl.owner
332 * periodically. Continue on old timeout if it expired.
333 */
334static void
335do_gvl_timer(struct rb_thread_sched *sched, rb_thread_t *th)
336{
337 rb_vm_t *vm = GET_VM();
338 static rb_hrtime_t abs;
339
340 sched->timer = th;
341
342 /* take over wakeups from UBF_TIMER */
343 ubf_timer_disarm();
344
345 if (sched->timer_err == ETIMEDOUT) {
346 abs = native_cond_timeout(&th->nt->cond.readyq, TIME_QUANTUM_NSEC);
347 }
348 sched->timer_err = native_cond_timedwait(&th->nt->cond.readyq, &sched->lock, &abs);
349
350 ubf_wakeup_all_threads();
351 ruby_sigchld_handler(vm);
352
353 if (UNLIKELY(rb_signal_buff_size())) {
354 if (th == vm->ractor.main_thread) {
355 RUBY_VM_SET_TRAP_INTERRUPT(th->ec);
356 }
357 else {
358 threadptr_trap_interrupt(vm->ractor.main_thread);
359 }
360 }
361
362 /*
363 * Timeslice. Warning: the process may fork while this
364 * thread is contending for GVL:
365 */
366 const rb_thread_t *running;
367 if ((running = sched->running) != 0) {
368 // strictly speaking, accessing "running" is not thread-safe
369 RUBY_VM_SET_TIMER_INTERRUPT(running->ec);
370 }
371 sched->timer = 0;
372}
373
374static void
375thread_sched_to_ready_common(struct rb_thread_sched *sched, rb_thread_t *th)
376{
377 ccan_list_add_tail(&sched->readyq, &th->sched.node.readyq);
378}
379
380static void
381thread_sched_to_running_common(struct rb_thread_sched *sched, rb_thread_t *th)
382{
383 RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_READY);
384 if (sched->running) {
385 VM_ASSERT(th->unblock.func == 0 &&
386 "we must not be in ubf_list and GVL readyq at the same time");
387
388 // waiting -> ready
389 thread_sched_to_ready_common(sched, th);
390
391 // wait for running chance
392 do {
393 if (!sched->timer) {
394 do_gvl_timer(sched, th);
395 }
396 else {
397 rb_native_cond_wait(&th->nt->cond.readyq, &sched->lock);
398 }
399 } while (sched->running);
400
401 ccan_list_del_init(&th->sched.node.readyq);
402
403 if (sched->need_yield) {
404 sched->need_yield = 0;
405 rb_native_cond_signal(&sched->switch_cond);
406 }
407 }
408 else { /* reset timer if uncontended */
409 sched->timer_err = ETIMEDOUT;
410 }
411
412 // ready -> running
413 sched->running = th;
414
415 RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_RESUMED);
416
417 if (!sched->timer) {
418 if (!designate_timer_thread(sched) && !ubf_threads_empty()) {
419 rb_thread_wakeup_timer_thread(-1);
420 }
421 }
422}
423
424static void
425thread_sched_to_running(struct rb_thread_sched *sched, rb_thread_t *th)
426{
427 rb_native_mutex_lock(&sched->lock);
428 thread_sched_to_running_common(sched, th);
429 rb_native_mutex_unlock(&sched->lock);
430}
431
432static rb_thread_t *
433thread_sched_to_waiting_common(struct rb_thread_sched *sched)
434{
435 rb_thread_t *next;
436 sched->running = NULL;
437 next = ccan_list_top(&sched->readyq, rb_thread_t, sched.node.readyq);
438 if (next) rb_native_cond_signal(&next->nt->cond.readyq);
439
440 return next;
441}
442
443static void
444thread_sched_to_waiting(struct rb_thread_sched *sched)
445{
446 RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_SUSPENDED);
447 rb_native_mutex_lock(&sched->lock);
448 thread_sched_to_waiting_common(sched);
449 rb_native_mutex_unlock(&sched->lock);
450}
451
452static void
453thread_sched_yield(struct rb_thread_sched *sched, rb_thread_t *th)
454{
455 rb_thread_t *next;
456
457 /*
458 * Perhaps other threads are stuck in blocking region w/o GVL, too,
459 * (perhaps looping in io_close_fptr) so we kick them:
460 */
461 ubf_wakeup_all_threads();
462 rb_native_mutex_lock(&sched->lock);
463 next = thread_sched_to_waiting_common(sched);
464
465 /* An another thread is processing GVL yield. */
466 if (UNLIKELY(sched->wait_yield)) {
467 while (sched->wait_yield)
468 rb_native_cond_wait(&sched->switch_wait_cond, &sched->lock);
469 }
470 else if (next) {
471 /* Wait until another thread task takes GVL. */
472 sched->need_yield = 1;
473 sched->wait_yield = 1;
474 while (sched->need_yield)
475 rb_native_cond_wait(&sched->switch_cond, &sched->lock);
476 sched->wait_yield = 0;
477 rb_native_cond_broadcast(&sched->switch_wait_cond);
478 }
479 else {
480 rb_native_mutex_unlock(&sched->lock);
481 native_thread_yield();
482 rb_native_mutex_lock(&sched->lock);
483 rb_native_cond_broadcast(&sched->switch_wait_cond);
484 }
485 thread_sched_to_running_common(sched, th);
486 rb_native_mutex_unlock(&sched->lock);
487}
488
489void
490rb_thread_sched_init(struct rb_thread_sched *sched)
491{
492 rb_native_mutex_initialize(&sched->lock);
493 rb_native_cond_initialize(&sched->switch_cond);
494 rb_native_cond_initialize(&sched->switch_wait_cond);
495 ccan_list_head_init(&sched->readyq);
496 sched->running = NULL;
497 sched->timer = 0;
498 sched->timer_err = ETIMEDOUT;
499 sched->need_yield = 0;
500 sched->wait_yield = 0;
501}
502
503#if 0
504// TODO
505
506static void clear_thread_cache_altstack(void);
507
508static void
509rb_thread_sched_destroy(struct rb_thread_sched *sched)
510{
511 /*
512 * only called once at VM shutdown (not atfork), another thread
513 * may still grab vm->gvl.lock when calling gvl_release at
514 * the end of thread_start_func_2
515 */
516 if (0) {
517 rb_native_cond_destroy(&sched->switch_wait_cond);
518 rb_native_cond_destroy(&sched->switch_cond);
519 rb_native_mutex_destroy(&sched->lock);
520 }
521 clear_thread_cache_altstack();
522}
523#endif
524
525#if defined(HAVE_WORKING_FORK)
526static void thread_cache_reset(void);
527static void
528thread_sched_atfork(struct rb_thread_sched *sched)
529{
530 thread_cache_reset();
531 rb_thread_sched_init(sched);
532 thread_sched_to_running(sched, GET_THREAD());
533}
534#endif
535
536#define NATIVE_MUTEX_LOCK_DEBUG 0
537
538static void
539mutex_debug(const char *msg, void *lock)
540{
541 if (NATIVE_MUTEX_LOCK_DEBUG) {
542 int r;
543 static pthread_mutex_t dbglock = PTHREAD_MUTEX_INITIALIZER;
544
545 if ((r = pthread_mutex_lock(&dbglock)) != 0) {exit(EXIT_FAILURE);}
546 fprintf(stdout, "%s: %p\n", msg, lock);
547 if ((r = pthread_mutex_unlock(&dbglock)) != 0) {exit(EXIT_FAILURE);}
548 }
549}
550
551void
553{
554 int r;
555 mutex_debug("lock", lock);
556 if ((r = pthread_mutex_lock(lock)) != 0) {
557 rb_bug_errno("pthread_mutex_lock", r);
558 }
559}
560
561void
563{
564 int r;
565 mutex_debug("unlock", lock);
566 if ((r = pthread_mutex_unlock(lock)) != 0) {
567 rb_bug_errno("pthread_mutex_unlock", r);
568 }
569}
570
571int
573{
574 int r;
575 mutex_debug("trylock", lock);
576 if ((r = pthread_mutex_trylock(lock)) != 0) {
577 if (r == EBUSY) {
578 return EBUSY;
579 }
580 else {
581 rb_bug_errno("pthread_mutex_trylock", r);
582 }
583 }
584 return 0;
585}
586
587void
589{
590 int r = pthread_mutex_init(lock, 0);
591 mutex_debug("init", lock);
592 if (r != 0) {
593 rb_bug_errno("pthread_mutex_init", r);
594 }
595}
596
597void
599{
600 int r = pthread_mutex_destroy(lock);
601 mutex_debug("destroy", lock);
602 if (r != 0) {
603 rb_bug_errno("pthread_mutex_destroy", r);
604 }
605}
606
607void
609{
610 int r = pthread_cond_init(cond, condattr_monotonic);
611 if (r != 0) {
612 rb_bug_errno("pthread_cond_init", r);
613 }
614}
615
616void
618{
619 int r = pthread_cond_destroy(cond);
620 if (r != 0) {
621 rb_bug_errno("pthread_cond_destroy", r);
622 }
623}
624
625/*
626 * In OS X 10.7 (Lion), pthread_cond_signal and pthread_cond_broadcast return
627 * EAGAIN after retrying 8192 times. You can see them in the following page:
628 *
629 * http://www.opensource.apple.com/source/Libc/Libc-763.11/pthreads/pthread_cond.c
630 *
631 * The following rb_native_cond_signal and rb_native_cond_broadcast functions
632 * need to retrying until pthread functions don't return EAGAIN.
633 */
634
635void
637{
638 int r;
639 do {
640 r = pthread_cond_signal(cond);
641 } while (r == EAGAIN);
642 if (r != 0) {
643 rb_bug_errno("pthread_cond_signal", r);
644 }
645}
646
647void
649{
650 int r;
651 do {
652 r = pthread_cond_broadcast(cond);
653 } while (r == EAGAIN);
654 if (r != 0) {
655 rb_bug_errno("rb_native_cond_broadcast", r);
656 }
657}
658
659void
661{
662 int r = pthread_cond_wait(cond, mutex);
663 if (r != 0) {
664 rb_bug_errno("pthread_cond_wait", r);
665 }
666}
667
668static int
669native_cond_timedwait(rb_nativethread_cond_t *cond, pthread_mutex_t *mutex, const rb_hrtime_t *abs)
670{
671 int r;
672 struct timespec ts;
673
674 /*
675 * An old Linux may return EINTR. Even though POSIX says
676 * "These functions shall not return an error code of [EINTR]".
677 * http://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_cond_timedwait.html
678 * Let's hide it from arch generic code.
679 */
680 do {
681 rb_hrtime2timespec(&ts, abs);
682 r = pthread_cond_timedwait(cond, mutex, &ts);
683 } while (r == EINTR);
684
685 if (r != 0 && r != ETIMEDOUT) {
686 rb_bug_errno("pthread_cond_timedwait", r);
687 }
688
689 return r;
690}
691
692void
693rb_native_cond_timedwait(rb_nativethread_cond_t *cond, pthread_mutex_t *mutex, unsigned long msec)
694{
695 rb_hrtime_t hrmsec = native_cond_timeout(cond, RB_HRTIME_PER_MSEC * msec);
696 native_cond_timedwait(cond, mutex, &hrmsec);
697}
698
699static rb_hrtime_t
700native_cond_timeout(rb_nativethread_cond_t *cond, const rb_hrtime_t rel)
701{
702 if (condattr_monotonic) {
703 return rb_hrtime_add(rb_hrtime_now(), rel);
704 }
705 else {
706 struct timespec ts;
707
708 rb_timespec_now(&ts);
709 return rb_hrtime_add(rb_timespec2hrtime(&ts), rel);
710 }
711}
712
713#define native_cleanup_push pthread_cleanup_push
714#define native_cleanup_pop pthread_cleanup_pop
715
716#ifdef RB_THREAD_LOCAL_SPECIFIER
717static RB_THREAD_LOCAL_SPECIFIER rb_thread_t *ruby_native_thread;
718#else
719static pthread_key_t ruby_native_thread_key;
720#endif
721
722static void
723null_func(int i)
724{
725 /* null */
726}
727
729ruby_thread_from_native(void)
730{
731#ifdef RB_THREAD_LOCAL_SPECIFIER
732 return ruby_native_thread;
733#else
734 return pthread_getspecific(ruby_native_thread_key);
735#endif
736}
737
738int
739ruby_thread_set_native(rb_thread_t *th)
740{
741 if (th) {
742#ifdef USE_UBF_LIST
743 ccan_list_node_init(&th->sched.node.ubf);
744#endif
745 }
746
747 // setup TLS
748
749 if (th && th->ec) {
750 rb_ractor_set_current_ec(th->ractor, th->ec);
751 }
752#ifdef RB_THREAD_LOCAL_SPECIFIER
753 ruby_native_thread = th;
754 return 1;
755#else
756 return pthread_setspecific(ruby_native_thread_key, th) == 0;
757#endif
758}
759
760#ifdef RB_THREAD_T_HAS_NATIVE_ID
761static int
762get_native_thread_id(void)
763{
764#ifdef __linux__
765 return (int)syscall(SYS_gettid);
766#elif defined(__FreeBSD__)
767 return pthread_getthreadid_np();
768#endif
769}
770#endif
771
772static void
773native_thread_init(struct rb_native_thread *nt)
774{
775#ifdef RB_THREAD_T_HAS_NATIVE_ID
776 nt->tid = get_native_thread_id();
777#endif
778 rb_native_cond_initialize(&nt->cond.readyq);
779 if (&nt->cond.readyq != &nt->cond.intr)
780 rb_native_cond_initialize(&nt->cond.intr);
781}
782
783void
784Init_native_thread(rb_thread_t *main_th)
785{
786#if defined(HAVE_PTHREAD_CONDATTR_SETCLOCK)
787 if (condattr_monotonic) {
788 int r = pthread_condattr_init(condattr_monotonic);
789 if (r == 0) {
790 r = pthread_condattr_setclock(condattr_monotonic, CLOCK_MONOTONIC);
791 }
792 if (r) condattr_monotonic = NULL;
793 }
794#endif
795
796#ifndef RB_THREAD_LOCAL_SPECIFIER
797 if (pthread_key_create(&ruby_native_thread_key, 0) == EAGAIN) {
798 rb_bug("pthread_key_create failed (ruby_native_thread_key)");
799 }
800 if (pthread_key_create(&ruby_current_ec_key, 0) == EAGAIN) {
801 rb_bug("pthread_key_create failed (ruby_current_ec_key)");
802 }
803#endif
804 posix_signal(SIGVTALRM, null_func);
805
806 // setup main thread
807 main_th->nt->thread_id = pthread_self();
808 ruby_thread_set_native(main_th);
809 native_thread_init(main_th->nt);
810}
811
812#ifndef USE_THREAD_CACHE
813#define USE_THREAD_CACHE 1
814#endif
815
816static void
817native_thread_destroy(rb_thread_t *th)
818{
819 struct rb_native_thread *nt = th->nt;
820
821 rb_native_cond_destroy(&nt->cond.readyq);
822
823 if (&nt->cond.readyq != &nt->cond.intr)
824 rb_native_cond_destroy(&nt->cond.intr);
825
826 /*
827 * prevent false positive from ruby_thread_has_gvl_p if that
828 * gets called from an interposing function wrapper
829 */
830 if (USE_THREAD_CACHE)
831 ruby_thread_set_native(0);
832}
833
834#if USE_THREAD_CACHE
835static rb_thread_t *register_cached_thread_and_wait(void *);
836#endif
837
838#if defined HAVE_PTHREAD_GETATTR_NP || defined HAVE_PTHREAD_ATTR_GET_NP
839#define STACKADDR_AVAILABLE 1
840#elif defined HAVE_PTHREAD_GET_STACKADDR_NP && defined HAVE_PTHREAD_GET_STACKSIZE_NP
841#define STACKADDR_AVAILABLE 1
842#undef MAINSTACKADDR_AVAILABLE
843#define MAINSTACKADDR_AVAILABLE 1
844void *pthread_get_stackaddr_np(pthread_t);
845size_t pthread_get_stacksize_np(pthread_t);
846#elif defined HAVE_THR_STKSEGMENT || defined HAVE_PTHREAD_STACKSEG_NP
847#define STACKADDR_AVAILABLE 1
848#elif defined HAVE_PTHREAD_GETTHRDS_NP
849#define STACKADDR_AVAILABLE 1
850#elif defined __HAIKU__
851#define STACKADDR_AVAILABLE 1
852#endif
853
854#ifndef MAINSTACKADDR_AVAILABLE
855# ifdef STACKADDR_AVAILABLE
856# define MAINSTACKADDR_AVAILABLE 1
857# else
858# define MAINSTACKADDR_AVAILABLE 0
859# endif
860#endif
861#if MAINSTACKADDR_AVAILABLE && !defined(get_main_stack)
862# define get_main_stack(addr, size) get_stack(addr, size)
863#endif
864
865#ifdef STACKADDR_AVAILABLE
866/*
867 * Get the initial address and size of current thread's stack
868 */
869static int
870get_stack(void **addr, size_t *size)
871{
872#define CHECK_ERR(expr) \
873 {int err = (expr); if (err) return err;}
874#ifdef HAVE_PTHREAD_GETATTR_NP /* Linux */
875 pthread_attr_t attr;
876 size_t guard = 0;
877 STACK_GROW_DIR_DETECTION;
878 CHECK_ERR(pthread_getattr_np(pthread_self(), &attr));
879# ifdef HAVE_PTHREAD_ATTR_GETSTACK
880 CHECK_ERR(pthread_attr_getstack(&attr, addr, size));
881 STACK_DIR_UPPER((void)0, (void)(*addr = (char *)*addr + *size));
882# else
883 CHECK_ERR(pthread_attr_getstackaddr(&attr, addr));
884 CHECK_ERR(pthread_attr_getstacksize(&attr, size));
885# endif
886# ifdef HAVE_PTHREAD_ATTR_GETGUARDSIZE
887 CHECK_ERR(pthread_attr_getguardsize(&attr, &guard));
888# else
889 guard = getpagesize();
890# endif
891 *size -= guard;
892 pthread_attr_destroy(&attr);
893#elif defined HAVE_PTHREAD_ATTR_GET_NP /* FreeBSD, DragonFly BSD, NetBSD */
894 pthread_attr_t attr;
895 CHECK_ERR(pthread_attr_init(&attr));
896 CHECK_ERR(pthread_attr_get_np(pthread_self(), &attr));
897# ifdef HAVE_PTHREAD_ATTR_GETSTACK
898 CHECK_ERR(pthread_attr_getstack(&attr, addr, size));
899# else
900 CHECK_ERR(pthread_attr_getstackaddr(&attr, addr));
901 CHECK_ERR(pthread_attr_getstacksize(&attr, size));
902# endif
903 STACK_DIR_UPPER((void)0, (void)(*addr = (char *)*addr + *size));
904 pthread_attr_destroy(&attr);
905#elif (defined HAVE_PTHREAD_GET_STACKADDR_NP && defined HAVE_PTHREAD_GET_STACKSIZE_NP) /* MacOS X */
906 pthread_t th = pthread_self();
907 *addr = pthread_get_stackaddr_np(th);
908 *size = pthread_get_stacksize_np(th);
909#elif defined HAVE_THR_STKSEGMENT || defined HAVE_PTHREAD_STACKSEG_NP
910 stack_t stk;
911# if defined HAVE_THR_STKSEGMENT /* Solaris */
912 CHECK_ERR(thr_stksegment(&stk));
913# else /* OpenBSD */
914 CHECK_ERR(pthread_stackseg_np(pthread_self(), &stk));
915# endif
916 *addr = stk.ss_sp;
917 *size = stk.ss_size;
918#elif defined HAVE_PTHREAD_GETTHRDS_NP /* AIX */
919 pthread_t th = pthread_self();
920 struct __pthrdsinfo thinfo;
921 char reg[256];
922 int regsiz=sizeof(reg);
923 CHECK_ERR(pthread_getthrds_np(&th, PTHRDSINFO_QUERY_ALL,
924 &thinfo, sizeof(thinfo),
925 &reg, &regsiz));
926 *addr = thinfo.__pi_stackaddr;
927 /* Must not use thinfo.__pi_stacksize for size.
928 It is around 3KB smaller than the correct size
929 calculated by thinfo.__pi_stackend - thinfo.__pi_stackaddr. */
930 *size = thinfo.__pi_stackend - thinfo.__pi_stackaddr;
931 STACK_DIR_UPPER((void)0, (void)(*addr = (char *)*addr + *size));
932#elif defined __HAIKU__
933 thread_info info;
934 STACK_GROW_DIR_DETECTION;
935 CHECK_ERR(get_thread_info(find_thread(NULL), &info));
936 *addr = info.stack_base;
937 *size = (uintptr_t)info.stack_end - (uintptr_t)info.stack_base;
938 STACK_DIR_UPPER((void)0, (void)(*addr = (char *)*addr + *size));
939#else
940#error STACKADDR_AVAILABLE is defined but not implemented.
941#endif
942 return 0;
943#undef CHECK_ERR
944}
945#endif
946
947static struct {
949 size_t stack_maxsize;
950 VALUE *stack_start;
951} native_main_thread;
952
953#ifdef STACK_END_ADDRESS
954extern void *STACK_END_ADDRESS;
955#endif
956
957enum {
958 RUBY_STACK_SPACE_LIMIT = 1024 * 1024, /* 1024KB */
959 RUBY_STACK_SPACE_RATIO = 5
960};
961
962static size_t
963space_size(size_t stack_size)
964{
965 size_t space_size = stack_size / RUBY_STACK_SPACE_RATIO;
966 if (space_size > RUBY_STACK_SPACE_LIMIT) {
967 return RUBY_STACK_SPACE_LIMIT;
968 }
969 else {
970 return space_size;
971 }
972}
973
974#ifdef __linux__
975static __attribute__((noinline)) void
976reserve_stack(volatile char *limit, size_t size)
977{
978# ifdef C_ALLOCA
979# error needs alloca()
980# endif
981 struct rlimit rl;
982 volatile char buf[0x100];
983 enum {stack_check_margin = 0x1000}; /* for -fstack-check */
984
985 STACK_GROW_DIR_DETECTION;
986
987 if (!getrlimit(RLIMIT_STACK, &rl) && rl.rlim_cur == RLIM_INFINITY)
988 return;
989
990 if (size < stack_check_margin) return;
991 size -= stack_check_margin;
992
993 size -= sizeof(buf); /* margin */
994 if (IS_STACK_DIR_UPPER()) {
995 const volatile char *end = buf + sizeof(buf);
996 limit += size;
997 if (limit > end) {
998 /* |<-bottom (=limit(a)) top->|
999 * | .. |<-buf 256B |<-end | stack check |
1000 * | 256B | =size= | margin (4KB)|
1001 * | =size= limit(b)->| 256B | |
1002 * | | alloca(sz) | | |
1003 * | .. |<-buf |<-limit(c) [sz-1]->0> | |
1004 */
1005 size_t sz = limit - end;
1006 limit = alloca(sz);
1007 limit[sz-1] = 0;
1008 }
1009 }
1010 else {
1011 limit -= size;
1012 if (buf > limit) {
1013 /* |<-top (=limit(a)) bottom->|
1014 * | .. | 256B buf->| | stack check |
1015 * | 256B | =size= | margin (4KB)|
1016 * | =size= limit(b)->| 256B | |
1017 * | | alloca(sz) | | |
1018 * | .. | buf->| limit(c)-><0> | |
1019 */
1020 size_t sz = buf - limit;
1021 limit = alloca(sz);
1022 limit[0] = 0;
1023 }
1024 }
1025}
1026#else
1027# define reserve_stack(limit, size) ((void)(limit), (void)(size))
1028#endif
1029
1030#undef ruby_init_stack
1031void
1032ruby_init_stack(volatile VALUE *addr)
1033{
1034 native_main_thread.id = pthread_self();
1035
1036#if MAINSTACKADDR_AVAILABLE
1037 if (native_main_thread.stack_maxsize) return;
1038 {
1039 void* stackaddr;
1040 size_t size;
1041 if (get_main_stack(&stackaddr, &size) == 0) {
1042 native_main_thread.stack_maxsize = size;
1043 native_main_thread.stack_start = stackaddr;
1044 reserve_stack(stackaddr, size);
1045 goto bound_check;
1046 }
1047 }
1048#endif
1049#ifdef STACK_END_ADDRESS
1050 native_main_thread.stack_start = STACK_END_ADDRESS;
1051#else
1052 if (!native_main_thread.stack_start ||
1053 STACK_UPPER((VALUE *)(void *)&addr,
1054 native_main_thread.stack_start > addr,
1055 native_main_thread.stack_start < addr)) {
1056 native_main_thread.stack_start = (VALUE *)addr;
1057 }
1058#endif
1059 {
1060#if defined(HAVE_GETRLIMIT)
1061#if defined(PTHREAD_STACK_DEFAULT)
1062# if PTHREAD_STACK_DEFAULT < RUBY_STACK_SPACE*5
1063# error "PTHREAD_STACK_DEFAULT is too small"
1064# endif
1065 size_t size = PTHREAD_STACK_DEFAULT;
1066#else
1067 size_t size = RUBY_VM_THREAD_VM_STACK_SIZE;
1068#endif
1069 size_t space;
1070 int pagesize = getpagesize();
1071 struct rlimit rlim;
1072 STACK_GROW_DIR_DETECTION;
1073 if (getrlimit(RLIMIT_STACK, &rlim) == 0) {
1074 size = (size_t)rlim.rlim_cur;
1075 }
1076 addr = native_main_thread.stack_start;
1077 if (IS_STACK_DIR_UPPER()) {
1078 space = ((size_t)((char *)addr + size) / pagesize) * pagesize - (size_t)addr;
1079 }
1080 else {
1081 space = (size_t)addr - ((size_t)((char *)addr - size) / pagesize + 1) * pagesize;
1082 }
1083 native_main_thread.stack_maxsize = space;
1084#endif
1085 }
1086
1087#if MAINSTACKADDR_AVAILABLE
1088 bound_check:
1089#endif
1090 /* If addr is out of range of main-thread stack range estimation, */
1091 /* it should be on co-routine (alternative stack). [Feature #2294] */
1092 {
1093 void *start, *end;
1094 STACK_GROW_DIR_DETECTION;
1095
1096 if (IS_STACK_DIR_UPPER()) {
1097 start = native_main_thread.stack_start;
1098 end = (char *)native_main_thread.stack_start + native_main_thread.stack_maxsize;
1099 }
1100 else {
1101 start = (char *)native_main_thread.stack_start - native_main_thread.stack_maxsize;
1102 end = native_main_thread.stack_start;
1103 }
1104
1105 if ((void *)addr < start || (void *)addr > end) {
1106 /* out of range */
1107 native_main_thread.stack_start = (VALUE *)addr;
1108 native_main_thread.stack_maxsize = 0; /* unknown */
1109 }
1110 }
1111}
1112
1113#define CHECK_ERR(expr) \
1114 {int err = (expr); if (err) {rb_bug_errno(#expr, err);}}
1115
1116static int
1117native_thread_init_stack(rb_thread_t *th)
1118{
1119 rb_nativethread_id_t curr = pthread_self();
1120
1121 if (pthread_equal(curr, native_main_thread.id)) {
1122 th->ec->machine.stack_start = native_main_thread.stack_start;
1123 th->ec->machine.stack_maxsize = native_main_thread.stack_maxsize;
1124 }
1125 else {
1126#ifdef STACKADDR_AVAILABLE
1127 void *start;
1128 size_t size;
1129
1130 if (get_stack(&start, &size) == 0) {
1131 uintptr_t diff = (uintptr_t)start - (uintptr_t)&curr;
1132 th->ec->machine.stack_start = (VALUE *)&curr;
1133 th->ec->machine.stack_maxsize = size - diff;
1134 }
1135#else
1136 rb_raise(rb_eNotImpError, "ruby engine can initialize only in the main thread");
1137#endif
1138 }
1139
1140 return 0;
1141}
1142
1143#ifndef __CYGWIN__
1144#define USE_NATIVE_THREAD_INIT 1
1145#endif
1146
1147static void *
1148thread_start_func_1(void *th_ptr)
1149{
1150 rb_thread_t *th = th_ptr;
1151 RB_ALTSTACK_INIT(void *altstack, th->nt->altstack);
1152#if USE_THREAD_CACHE
1153 thread_start:
1154#endif
1155 {
1156#if !defined USE_NATIVE_THREAD_INIT
1157 VALUE stack_start;
1158#endif
1159
1160#if defined USE_NATIVE_THREAD_INIT
1161 native_thread_init_stack(th);
1162#endif
1163
1164 native_thread_init(th->nt);
1165
1166 RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_STARTED);
1167
1168 /* run */
1169#if defined USE_NATIVE_THREAD_INIT
1170 thread_start_func_2(th, th->ec->machine.stack_start);
1171#else
1172 thread_start_func_2(th, &stack_start);
1173#endif
1174
1175 RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_EXITED);
1176 }
1177#if USE_THREAD_CACHE
1178 /* cache thread */
1179 if ((th = register_cached_thread_and_wait(RB_ALTSTACK(altstack))) != 0) {
1180 goto thread_start;
1181 }
1182#else
1183 RB_ALTSTACK_FREE(altstack);
1184#endif
1185 return 0;
1186}
1187
1188struct cached_thread_entry {
1190 rb_nativethread_id_t thread_id;
1191 rb_thread_t *th;
1192 void *altstack;
1193 struct ccan_list_node node;
1194};
1195
1196#if USE_THREAD_CACHE
1197static rb_nativethread_lock_t thread_cache_lock = RB_NATIVETHREAD_LOCK_INIT;
1198static CCAN_LIST_HEAD(cached_thread_head);
1199
1200# if defined(HAVE_WORKING_FORK)
1201static void
1202thread_cache_reset(void)
1203{
1204 rb_native_mutex_initialize(&thread_cache_lock);
1205 ccan_list_head_init(&cached_thread_head);
1206}
1207# endif
1208
1209/*
1210 * number of seconds to cache for, I think 1-5s is sufficient to obviate
1211 * the need for thread pool in many network programs (taking into account
1212 * worst case network latency across the globe) without wasting memory
1213 */
1214#ifndef THREAD_CACHE_TIME
1215# define THREAD_CACHE_TIME ((rb_hrtime_t)3 * RB_HRTIME_PER_SEC)
1216#endif
1217
1218static rb_thread_t *
1219register_cached_thread_and_wait(void *altstack)
1220{
1221 rb_hrtime_t end = THREAD_CACHE_TIME;
1222 struct cached_thread_entry entry;
1223
1224 rb_native_cond_initialize(&entry.cond);
1225 entry.altstack = altstack;
1226 entry.th = NULL;
1227 entry.thread_id = pthread_self();
1228 end = native_cond_timeout(&entry.cond, end);
1229
1230 rb_native_mutex_lock(&thread_cache_lock);
1231 {
1232 ccan_list_add(&cached_thread_head, &entry.node);
1233
1234 native_cond_timedwait(&entry.cond, &thread_cache_lock, &end);
1235
1236 if (entry.th == NULL) { /* unused */
1237 ccan_list_del(&entry.node);
1238 }
1239 }
1240 rb_native_mutex_unlock(&thread_cache_lock);
1241
1242 rb_native_cond_destroy(&entry.cond);
1243 if (!entry.th) {
1244 RB_ALTSTACK_FREE(entry.altstack);
1245 }
1246
1247 return entry.th;
1248}
1249#else
1250# if defined(HAVE_WORKING_FORK)
1251static void thread_cache_reset(void) { }
1252# endif
1253#endif
1254
1255static int
1256use_cached_thread(rb_thread_t *th)
1257{
1258#if USE_THREAD_CACHE
1259 struct cached_thread_entry *entry;
1260
1261 rb_native_mutex_lock(&thread_cache_lock);
1262 entry = ccan_list_pop(&cached_thread_head, struct cached_thread_entry, node);
1263 if (entry) {
1264 entry->th = th;
1265 /* th->nt->thread_id must be set before signal for Thread#name= */
1266 th->nt->thread_id = entry->thread_id;
1267 rb_native_cond_signal(&entry->cond);
1268 }
1269 rb_native_mutex_unlock(&thread_cache_lock);
1270 return !!entry;
1271#endif
1272 return 0;
1273}
1274
1275#if 0
1276// TODO
1277static void
1278clear_thread_cache_altstack(void)
1279{
1280#if USE_THREAD_CACHE
1281 struct cached_thread_entry *entry;
1282
1283 rb_native_mutex_lock(&thread_cache_lock);
1284 ccan_list_for_each(&cached_thread_head, entry, node) {
1285 void MAYBE_UNUSED(*altstack) = entry->altstack;
1286 entry->altstack = 0;
1287 RB_ALTSTACK_FREE(altstack);
1288 }
1289 rb_native_mutex_unlock(&thread_cache_lock);
1290#endif
1291}
1292#endif
1293
1294static int
1295native_thread_create(rb_thread_t *th)
1296{
1297 int err = 0;
1298
1299 VM_ASSERT(th->nt == 0);
1300 th->nt = ZALLOC(struct rb_native_thread);
1301
1302 if (use_cached_thread(th)) {
1303 RUBY_DEBUG_LOG("use cached nt. th:%u", rb_th_serial(th));
1304 }
1305 else {
1306 pthread_attr_t attr;
1307 const size_t stack_size = th->vm->default_params.thread_machine_stack_size + th->vm->default_params.thread_vm_stack_size;
1308 const size_t space = space_size(stack_size);
1309
1310#ifdef USE_SIGALTSTACK
1311 th->nt->altstack = rb_allocate_sigaltstack();
1312#endif
1313 th->ec->machine.stack_maxsize = stack_size - space;
1314
1315 CHECK_ERR(pthread_attr_init(&attr));
1316
1317# ifdef PTHREAD_STACK_MIN
1318 RUBY_DEBUG_LOG("stack size: %lu", (unsigned long)stack_size);
1319 CHECK_ERR(pthread_attr_setstacksize(&attr, stack_size));
1320# endif
1321
1322# ifdef HAVE_PTHREAD_ATTR_SETINHERITSCHED
1323 CHECK_ERR(pthread_attr_setinheritsched(&attr, PTHREAD_INHERIT_SCHED));
1324# endif
1325 CHECK_ERR(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED));
1326
1327 err = pthread_create(&th->nt->thread_id, &attr, thread_start_func_1, th);
1328
1329 RUBY_DEBUG_LOG("th:%u err:%d", rb_th_serial(th), err);
1330
1331 /* should be done in the created thread */
1332 CHECK_ERR(pthread_attr_destroy(&attr));
1333 }
1334 return err;
1335}
1336
1337#if USE_NATIVE_THREAD_PRIORITY
1338
1339static void
1340native_thread_apply_priority(rb_thread_t *th)
1341{
1342#if defined(_POSIX_PRIORITY_SCHEDULING) && (_POSIX_PRIORITY_SCHEDULING > 0)
1343 struct sched_param sp;
1344 int policy;
1345 int priority = 0 - th->priority;
1346 int max, min;
1347 pthread_getschedparam(th->nt->thread_id, &policy, &sp);
1348 max = sched_get_priority_max(policy);
1349 min = sched_get_priority_min(policy);
1350
1351 if (min > priority) {
1352 priority = min;
1353 }
1354 else if (max < priority) {
1355 priority = max;
1356 }
1357
1358 sp.sched_priority = priority;
1359 pthread_setschedparam(th->nt->thread_id, policy, &sp);
1360#else
1361 /* not touched */
1362#endif
1363}
1364
1365#endif /* USE_NATIVE_THREAD_PRIORITY */
1366
1367static int
1368native_fd_select(int n, rb_fdset_t *readfds, rb_fdset_t *writefds, rb_fdset_t *exceptfds, struct timeval *timeout, rb_thread_t *th)
1369{
1370 return rb_fd_select(n, readfds, writefds, exceptfds, timeout);
1371}
1372
1373static void
1374ubf_pthread_cond_signal(void *ptr)
1375{
1376 rb_thread_t *th = (rb_thread_t *)ptr;
1377 RUBY_DEBUG_LOG("th:%u", rb_th_serial(th));
1378 rb_native_cond_signal(&th->nt->cond.intr);
1379}
1380
1381static void
1382native_cond_sleep(rb_thread_t *th, rb_hrtime_t *rel)
1383{
1384 rb_nativethread_lock_t *lock = &th->interrupt_lock;
1385 rb_nativethread_cond_t *cond = &th->nt->cond.intr;
1386
1387 /* Solaris cond_timedwait() return EINVAL if an argument is greater than
1388 * current_time + 100,000,000. So cut up to 100,000,000. This is
1389 * considered as a kind of spurious wakeup. The caller to native_sleep
1390 * should care about spurious wakeup.
1391 *
1392 * See also [Bug #1341] [ruby-core:29702]
1393 * http://download.oracle.com/docs/cd/E19683-01/816-0216/6m6ngupgv/index.html
1394 */
1395 const rb_hrtime_t max = (rb_hrtime_t)100000000 * RB_HRTIME_PER_SEC;
1396
1397 THREAD_BLOCKING_BEGIN(th);
1398 {
1400 th->unblock.func = ubf_pthread_cond_signal;
1401 th->unblock.arg = th;
1402
1403 if (RUBY_VM_INTERRUPTED(th->ec)) {
1404 /* interrupted. return immediate */
1405 RUBY_DEBUG_LOG("interrupted before sleep th:%u", rb_th_serial(th));
1406 }
1407 else {
1408 if (!rel) {
1409 rb_native_cond_wait(cond, lock);
1410 }
1411 else {
1412 rb_hrtime_t end;
1413
1414 if (*rel > max) {
1415 *rel = max;
1416 }
1417
1418 end = native_cond_timeout(cond, *rel);
1419 native_cond_timedwait(cond, lock, &end);
1420 }
1421 }
1422 th->unblock.func = 0;
1423
1425 }
1426 THREAD_BLOCKING_END(th);
1427
1428 RUBY_DEBUG_LOG("done th:%u", rb_th_serial(th));
1429}
1430
1431#ifdef USE_UBF_LIST
1432static CCAN_LIST_HEAD(ubf_list_head);
1433static rb_nativethread_lock_t ubf_list_lock = RB_NATIVETHREAD_LOCK_INIT;
1434
1435static void
1436ubf_list_atfork(void)
1437{
1438 ccan_list_head_init(&ubf_list_head);
1439 rb_native_mutex_initialize(&ubf_list_lock);
1440}
1441
1442/* The thread 'th' is registered to be trying unblock. */
1443static void
1444register_ubf_list(rb_thread_t *th)
1445{
1446 struct ccan_list_node *node = &th->sched.node.ubf;
1447
1448 if (ccan_list_empty((struct ccan_list_head*)node)) {
1449 rb_native_mutex_lock(&ubf_list_lock);
1450 ccan_list_add(&ubf_list_head, node);
1451 rb_native_mutex_unlock(&ubf_list_lock);
1452 }
1453}
1454
1455/* The thread 'th' is unblocked. It no longer need to be registered. */
1456static void
1457unregister_ubf_list(rb_thread_t *th)
1458{
1459 struct ccan_list_node *node = &th->sched.node.ubf;
1460
1461 /* we can't allow re-entry into ubf_list_head */
1462 VM_ASSERT(th->unblock.func == 0);
1463
1464 if (!ccan_list_empty((struct ccan_list_head*)node)) {
1465 rb_native_mutex_lock(&ubf_list_lock);
1466 ccan_list_del_init(node);
1467 if (ccan_list_empty(&ubf_list_head) && !rb_signal_buff_size()) {
1468 ubf_timer_disarm();
1469 }
1470 rb_native_mutex_unlock(&ubf_list_lock);
1471 }
1472}
1473
1474/*
1475 * send a signal to intent that a target thread return from blocking syscall.
1476 * Maybe any signal is ok, but we chose SIGVTALRM.
1477 */
1478static void
1479ubf_wakeup_thread(rb_thread_t *th)
1480{
1481 RUBY_DEBUG_LOG("th:%u", rb_th_serial(th));
1482 pthread_kill(th->nt->thread_id, SIGVTALRM);
1483}
1484
1485static void
1486ubf_select(void *ptr)
1487{
1488 rb_thread_t *th = (rb_thread_t *)ptr;
1489 struct rb_thread_sched *sched = TH_SCHED(th);
1490 const rb_thread_t *cur = ruby_thread_from_native(); /* may be 0 */
1491
1492 register_ubf_list(th);
1493
1494 /*
1495 * ubf_wakeup_thread() doesn't guarantee to wake up a target thread.
1496 * Therefore, we repeatedly call ubf_wakeup_thread() until a target thread
1497 * exit from ubf function. We must have a timer to perform this operation.
1498 * We use double-checked locking here because this function may be called
1499 * while vm->gvl.lock is held in do_gvl_timer.
1500 * There is also no need to start a timer if we're the designated
1501 * sigwait_th thread, otherwise we can deadlock with a thread
1502 * in unblock_function_clear.
1503 */
1504 if (cur != sched->timer && cur != sigwait_th) {
1505 /*
1506 * Double-checked locking above was to prevent nested locking
1507 * by the SAME thread. We use trylock here to prevent deadlocks
1508 * between DIFFERENT threads
1509 */
1510 if (rb_native_mutex_trylock(&sched->lock) == 0) {
1511 if (!sched->timer) {
1512 rb_thread_wakeup_timer_thread(-1);
1513 }
1514 rb_native_mutex_unlock(&sched->lock);
1515 }
1516 }
1517
1518 ubf_wakeup_thread(th);
1519}
1520
1521static int
1522ubf_threads_empty(void)
1523{
1524 return ccan_list_empty(&ubf_list_head);
1525}
1526
1527static void
1528ubf_wakeup_all_threads(void)
1529{
1530 if (!ubf_threads_empty()) {
1531 rb_native_mutex_lock(&ubf_list_lock);
1532 rb_thread_t *th;
1533
1534 ccan_list_for_each(&ubf_list_head, th, sched.node.ubf) {
1535 ubf_wakeup_thread(th);
1536 }
1537 rb_native_mutex_unlock(&ubf_list_lock);
1538 }
1539}
1540
1541#else /* USE_UBF_LIST */
1542#define register_ubf_list(th) (void)(th)
1543#define unregister_ubf_list(th) (void)(th)
1544#define ubf_select 0
1545static void ubf_wakeup_all_threads(void) { return; }
1546static int ubf_threads_empty(void) { return 1; }
1547#define ubf_list_atfork() do {} while (0)
1548#endif /* USE_UBF_LIST */
1549
1550#define TT_DEBUG 0
1551#define WRITE_CONST(fd, str) (void)(write((fd),(str),sizeof(str)-1)<0)
1552
1553static struct {
1554 /* pipes are closed in forked children when owner_process does not match */
1555 int normal[2]; /* [0] == sigwait_fd */
1556 int ub_main[2]; /* unblock main thread from native_ppoll_sleep */
1557
1558 /* volatile for signal handler use: */
1559 volatile rb_pid_t owner_process;
1560} signal_self_pipe = {
1561 {-1, -1},
1562 {-1, -1},
1563};
1564
1565/* only use signal-safe system calls here */
1566static void
1567rb_thread_wakeup_timer_thread_fd(int fd)
1568{
1569#if USE_EVENTFD
1570 const uint64_t buff = 1;
1571#else
1572 const char buff = '!';
1573#endif
1574 ssize_t result;
1575
1576 /* already opened */
1577 if (fd >= 0) {
1578 retry:
1579 if ((result = write(fd, &buff, sizeof(buff))) <= 0) {
1580 int e = errno;
1581 switch (e) {
1582 case EINTR: goto retry;
1583 case EAGAIN:
1584#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN
1585 case EWOULDBLOCK:
1586#endif
1587 break;
1588 default:
1589 async_bug_fd("rb_thread_wakeup_timer_thread: write", e, fd);
1590 }
1591 }
1592 if (TT_DEBUG) WRITE_CONST(2, "rb_thread_wakeup_timer_thread: write\n");
1593 }
1594 else {
1595 /* ignore wakeup */
1596 }
1597}
1598
1599/*
1600 * This ensures we get a SIGVTALRM in TIME_QUANTUM_MSEC if our
1601 * process could not react to the original signal in time.
1602 */
1603static void
1604ubf_timer_arm(rb_pid_t current) /* async signal safe */
1605{
1606#if UBF_TIMER == UBF_TIMER_POSIX
1607 if ((!current || timer_posix.owner == current) &&
1608 timer_state_cas(RTIMER_DISARM, RTIMER_ARMING) == RTIMER_DISARM) {
1609 struct itimerspec it;
1610
1611 it.it_interval.tv_sec = it.it_value.tv_sec = 0;
1612 it.it_interval.tv_nsec = it.it_value.tv_nsec = TIME_QUANTUM_NSEC;
1613
1614 if (timer_settime(timer_posix.timerid, 0, &it, 0))
1615 rb_async_bug_errno("timer_settime (arm)", errno);
1616
1617 switch (timer_state_cas(RTIMER_ARMING, RTIMER_ARMED)) {
1618 case RTIMER_DISARM:
1619 /* somebody requested a disarm while we were arming */
1620 /* may race harmlessly with ubf_timer_destroy */
1621 (void)timer_settime(timer_posix.timerid, 0, &zero, 0);
1622
1623 case RTIMER_ARMING: return; /* success */
1624 case RTIMER_ARMED:
1625 /*
1626 * it is possible to have another thread disarm, and
1627 * a third thread arm finish re-arming before we get
1628 * here, so we wasted a syscall with timer_settime but
1629 * probably unavoidable in a signal handler.
1630 */
1631 return;
1632 case RTIMER_DEAD:
1633 /* may race harmlessly with ubf_timer_destroy */
1634 (void)timer_settime(timer_posix.timerid, 0, &zero, 0);
1635 return;
1636 default:
1637 rb_async_bug_errno("UBF_TIMER_POSIX unknown state", ERANGE);
1638 }
1639 }
1640#elif UBF_TIMER == UBF_TIMER_PTHREAD
1641 if (!current || current == timer_pthread.owner) {
1642 if (ATOMIC_EXCHANGE(timer_pthread.armed, 1) == 0)
1643 rb_thread_wakeup_timer_thread_fd(timer_pthread.low[1]);
1644 }
1645#endif
1646}
1647
1648void
1649rb_thread_wakeup_timer_thread(int sig)
1650{
1651 rb_pid_t current;
1652
1653 /* non-sighandler path */
1654 if (sig <= 0) {
1655 rb_thread_wakeup_timer_thread_fd(signal_self_pipe.normal[1]);
1656 if (sig < 0) {
1657 ubf_timer_arm(0);
1658 }
1659 return;
1660 }
1661
1662 /* must be safe inside sighandler, so no mutex */
1663 current = getpid();
1664 if (signal_self_pipe.owner_process == current) {
1665 rb_thread_wakeup_timer_thread_fd(signal_self_pipe.normal[1]);
1666
1667 /*
1668 * system_working check is required because vm and main_thread are
1669 * freed during shutdown
1670 */
1671 if (system_working > 0) {
1672 volatile rb_execution_context_t *ec;
1673 rb_vm_t *vm = GET_VM();
1674 rb_thread_t *mth;
1675
1676 /*
1677 * FIXME: root VM and main_thread should be static and not
1678 * on heap for maximum safety (and startup/shutdown speed)
1679 */
1680 if (!vm) return;
1681 mth = vm->ractor.main_thread;
1682 if (!mth || system_working <= 0) return;
1683
1684 /* this relies on GC for grace period before cont_free */
1685 ec = ACCESS_ONCE(rb_execution_context_t *, mth->ec);
1686
1687 if (ec) {
1688 RUBY_VM_SET_TRAP_INTERRUPT(ec);
1689 ubf_timer_arm(current);
1690
1691 /* some ubfs can interrupt single-threaded process directly */
1692 if (vm->ubf_async_safe && mth->unblock.func) {
1693 (mth->unblock.func)(mth->unblock.arg);
1694 }
1695 }
1696 }
1697 }
1698}
1699
1700#define CLOSE_INVALIDATE_PAIR(expr) \
1701 close_invalidate_pair(expr,"close_invalidate: "#expr)
1702static void
1703close_invalidate(int *fdp, const char *msg)
1704{
1705 int fd = *fdp;
1706
1707 *fdp = -1;
1708 if (close(fd) < 0) {
1709 async_bug_fd(msg, errno, fd);
1710 }
1711}
1712
1713static void
1714close_invalidate_pair(int fds[2], const char *msg)
1715{
1716 if (USE_EVENTFD && fds[0] == fds[1]) {
1717 close_invalidate(&fds[0], msg);
1718 fds[1] = -1;
1719 }
1720 else {
1721 close_invalidate(&fds[0], msg);
1722 close_invalidate(&fds[1], msg);
1723 }
1724}
1725
1726static void
1727set_nonblock(int fd)
1728{
1729 int oflags;
1730 int err;
1731
1732 oflags = fcntl(fd, F_GETFL);
1733 if (oflags == -1)
1734 rb_sys_fail(0);
1735 oflags |= O_NONBLOCK;
1736 err = fcntl(fd, F_SETFL, oflags);
1737 if (err == -1)
1738 rb_sys_fail(0);
1739}
1740
1741/* communication pipe with timer thread and signal handler */
1742static int
1743setup_communication_pipe_internal(int pipes[2])
1744{
1745 int err;
1746
1747 if (pipes[0] >= 0 || pipes[1] >= 0) {
1748 VM_ASSERT(pipes[0] >= 0);
1749 VM_ASSERT(pipes[1] >= 0);
1750 return 0;
1751 }
1752
1753 /*
1754 * Don't bother with eventfd on ancient Linux 2.6.22..2.6.26 which were
1755 * missing EFD_* flags, they can fall back to pipe
1756 */
1757#if USE_EVENTFD && defined(EFD_NONBLOCK) && defined(EFD_CLOEXEC)
1758 pipes[0] = pipes[1] = eventfd(0, EFD_NONBLOCK|EFD_CLOEXEC);
1759 if (pipes[0] >= 0) {
1760 rb_update_max_fd(pipes[0]);
1761 return 0;
1762 }
1763#endif
1764
1765 err = rb_cloexec_pipe(pipes);
1766 if (err != 0) {
1767 rb_warn("pipe creation failed for timer: %s, scheduling broken",
1768 strerror(errno));
1769 return -1;
1770 }
1771 rb_update_max_fd(pipes[0]);
1772 rb_update_max_fd(pipes[1]);
1773 set_nonblock(pipes[0]);
1774 set_nonblock(pipes[1]);
1775 return 0;
1776}
1777
1778#if !defined(SET_CURRENT_THREAD_NAME) && defined(__linux__) && defined(PR_SET_NAME)
1779# define SET_CURRENT_THREAD_NAME(name) prctl(PR_SET_NAME, name)
1780#endif
1781
1782enum {
1783 THREAD_NAME_MAX =
1784#if defined(__linux__)
1785 16
1786#elif defined(__APPLE__)
1787/* Undocumented, and main thread seems unlimited */
1788 64
1789#else
1790 16
1791#endif
1792};
1793
1794static VALUE threadptr_invoke_proc_location(rb_thread_t *th);
1795
1796static void
1797native_set_thread_name(rb_thread_t *th)
1798{
1799#ifdef SET_CURRENT_THREAD_NAME
1800 VALUE loc;
1801 if (!NIL_P(loc = th->name)) {
1802 SET_CURRENT_THREAD_NAME(RSTRING_PTR(loc));
1803 }
1804 else if ((loc = threadptr_invoke_proc_location(th)) != Qnil) {
1805 char *name, *p;
1806 char buf[THREAD_NAME_MAX];
1807 size_t len;
1808 int n;
1809
1810 name = RSTRING_PTR(RARRAY_AREF(loc, 0));
1811 p = strrchr(name, '/'); /* show only the basename of the path. */
1812 if (p && p[1])
1813 name = p + 1;
1814
1815 n = snprintf(buf, sizeof(buf), "%s:%d", name, NUM2INT(RARRAY_AREF(loc, 1)));
1816 RB_GC_GUARD(loc);
1817
1818 len = (size_t)n;
1819 if (len >= sizeof(buf)) {
1820 buf[sizeof(buf)-2] = '*';
1821 buf[sizeof(buf)-1] = '\0';
1822 }
1823 SET_CURRENT_THREAD_NAME(buf);
1824 }
1825#endif
1826}
1827
1828static void
1829native_set_another_thread_name(rb_nativethread_id_t thread_id, VALUE name)
1830{
1831#if defined SET_ANOTHER_THREAD_NAME || defined SET_CURRENT_THREAD_NAME
1832 char buf[THREAD_NAME_MAX];
1833 const char *s = "";
1834# if !defined SET_ANOTHER_THREAD_NAME
1835 if (!pthread_equal(pthread_self(), thread_id)) return;
1836# endif
1837 if (!NIL_P(name)) {
1838 long n;
1839 RSTRING_GETMEM(name, s, n);
1840 if (n >= (int)sizeof(buf)) {
1841 memcpy(buf, s, sizeof(buf)-1);
1842 buf[sizeof(buf)-1] = '\0';
1843 s = buf;
1844 }
1845 }
1846# if defined SET_ANOTHER_THREAD_NAME
1847 SET_ANOTHER_THREAD_NAME(thread_id, s);
1848# elif defined SET_CURRENT_THREAD_NAME
1849 SET_CURRENT_THREAD_NAME(s);
1850# endif
1851#endif
1852}
1853
1854#if defined(RB_THREAD_T_HAS_NATIVE_ID) || defined(__APPLE__)
1855static VALUE
1856native_thread_native_thread_id(rb_thread_t *target_th)
1857{
1858#ifdef RB_THREAD_T_HAS_NATIVE_ID
1859 int tid = target_th->nt->tid;
1860 if (tid == 0) return Qnil;
1861 return INT2FIX(tid);
1862#elif defined(__APPLE__)
1863 uint64_t tid;
1864# if (!defined(MAC_OS_X_VERSION_10_6) || \
1865 (MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6) || \
1866 defined(__POWERPC__) /* never defined for PowerPC platforms */)
1867 const bool no_pthread_threadid_np = true;
1868# define NO_PTHREAD_MACH_THREAD_NP 1
1869# elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
1870 const bool no_pthread_threadid_np = false;
1871# else
1872# if !(defined(__has_attribute) && __has_attribute(availability))
1873 /* __API_AVAILABLE macro does nothing on gcc */
1874 __attribute__((weak)) int pthread_threadid_np(pthread_t, uint64_t*);
1875# endif
1876 /* Check weakly linked symbol */
1877 const bool no_pthread_threadid_np = !&pthread_threadid_np;
1878# endif
1879 if (no_pthread_threadid_np) {
1880 return ULL2NUM(pthread_mach_thread_np(pthread_self()));
1881 }
1882# ifndef NO_PTHREAD_MACH_THREAD_NP
1883 int e = pthread_threadid_np(target_th->nt->thread_id, &tid);
1884 if (e != 0) rb_syserr_fail(e, "pthread_threadid_np");
1885 return ULL2NUM((unsigned long long)tid);
1886# endif
1887#endif
1888}
1889# define USE_NATIVE_THREAD_NATIVE_THREAD_ID 1
1890#else
1891# define USE_NATIVE_THREAD_NATIVE_THREAD_ID 0
1892#endif
1893
1894static void
1895ubf_timer_invalidate(void)
1896{
1897#if UBF_TIMER == UBF_TIMER_PTHREAD
1898 CLOSE_INVALIDATE_PAIR(timer_pthread.low);
1899#endif
1900}
1901
1902static void
1903ubf_timer_pthread_create(rb_pid_t current)
1904{
1905#if UBF_TIMER == UBF_TIMER_PTHREAD
1906 int err;
1907 if (timer_pthread.owner == current)
1908 return;
1909
1910 if (setup_communication_pipe_internal(timer_pthread.low) < 0)
1911 return;
1912
1913 err = pthread_create(&timer_pthread.thid, 0, timer_pthread_fn, GET_VM());
1914 if (!err)
1915 timer_pthread.owner = current;
1916 else
1917 rb_warn("pthread_create failed for timer: %s, signals racy",
1918 strerror(err));
1919#endif
1920}
1921
1922static void
1923ubf_timer_create(rb_pid_t current)
1924{
1925#if UBF_TIMER == UBF_TIMER_POSIX
1926# if defined(__sun)
1927# define UBF_TIMER_CLOCK CLOCK_REALTIME
1928# else /* Tested Linux and FreeBSD: */
1929# define UBF_TIMER_CLOCK CLOCK_MONOTONIC
1930# endif
1931
1932 struct sigevent sev;
1933
1934 sev.sigev_notify = SIGEV_SIGNAL;
1935 sev.sigev_signo = SIGVTALRM;
1936 sev.sigev_value.sival_ptr = &timer_posix;
1937
1938 if (!timer_create(UBF_TIMER_CLOCK, &sev, &timer_posix.timerid)) {
1939 rb_atomic_t prev = timer_state_exchange(RTIMER_DISARM);
1940
1941 if (prev != RTIMER_DEAD) {
1942 rb_bug("timer_posix was not dead: %u\n", (unsigned)prev);
1943 }
1944 timer_posix.owner = current;
1945 }
1946 else {
1947 rb_warn("timer_create failed: %s, signals racy", strerror(errno));
1948 }
1949#endif
1950 if (UBF_TIMER == UBF_TIMER_PTHREAD)
1951 ubf_timer_pthread_create(current);
1952}
1953
1954static void
1955rb_thread_create_timer_thread(void)
1956{
1957 /* we only create the pipe, and lazy-spawn */
1958 rb_pid_t current = getpid();
1959 rb_pid_t owner = signal_self_pipe.owner_process;
1960
1961 if (owner && owner != current) {
1962 CLOSE_INVALIDATE_PAIR(signal_self_pipe.normal);
1963 CLOSE_INVALIDATE_PAIR(signal_self_pipe.ub_main);
1964 ubf_timer_invalidate();
1965 }
1966
1967 if (setup_communication_pipe_internal(signal_self_pipe.normal) < 0) return;
1968 if (setup_communication_pipe_internal(signal_self_pipe.ub_main) < 0) return;
1969
1970 ubf_timer_create(current);
1971 if (owner != current) {
1972 /* validate pipe on this process */
1973 sigwait_th = THREAD_INVALID;
1974 signal_self_pipe.owner_process = current;
1975 }
1976}
1977
1978static void
1979ubf_timer_disarm(void)
1980{
1981#if UBF_TIMER == UBF_TIMER_POSIX
1982 rb_atomic_t prev;
1983
1984 if (timer_posix.owner && timer_posix.owner != getpid()) return;
1985 prev = timer_state_cas(RTIMER_ARMED, RTIMER_DISARM);
1986 switch (prev) {
1987 case RTIMER_DISARM: return; /* likely */
1988 case RTIMER_ARMING: return; /* ubf_timer_arm will disarm itself */
1989 case RTIMER_ARMED:
1990 if (timer_settime(timer_posix.timerid, 0, &zero, 0)) {
1991 int err = errno;
1992
1993 if (err == EINVAL) {
1994 prev = timer_state_cas(RTIMER_DISARM, RTIMER_DISARM);
1995
1996 /* main thread may have killed the timer */
1997 if (prev == RTIMER_DEAD) return;
1998
1999 rb_bug_errno("timer_settime (disarm)", err);
2000 }
2001 }
2002 return;
2003 case RTIMER_DEAD: return; /* stay dead */
2004 default:
2005 rb_bug("UBF_TIMER_POSIX bad state: %u\n", (unsigned)prev);
2006 }
2007
2008#elif UBF_TIMER == UBF_TIMER_PTHREAD
2009 ATOMIC_SET(timer_pthread.armed, 0);
2010#endif
2011}
2012
2013static void
2014ubf_timer_destroy(void)
2015{
2016#if UBF_TIMER == UBF_TIMER_POSIX
2017 if (timer_posix.owner == getpid()) {
2018 rb_atomic_t expect = RTIMER_DISARM;
2019 size_t i, max = 10000000;
2020
2021 /* prevent signal handler from arming: */
2022 for (i = 0; i < max; i++) {
2023 switch (timer_state_cas(expect, RTIMER_DEAD)) {
2024 case RTIMER_DISARM:
2025 if (expect == RTIMER_DISARM) goto done;
2026 expect = RTIMER_DISARM;
2027 break;
2028 case RTIMER_ARMING:
2029 native_thread_yield(); /* let another thread finish arming */
2030 expect = RTIMER_ARMED;
2031 break;
2032 case RTIMER_ARMED:
2033 if (expect == RTIMER_ARMED) {
2034 if (timer_settime(timer_posix.timerid, 0, &zero, 0))
2035 rb_bug_errno("timer_settime (destroy)", errno);
2036 goto done;
2037 }
2038 expect = RTIMER_ARMED;
2039 break;
2040 case RTIMER_DEAD:
2041 rb_bug("RTIMER_DEAD unexpected");
2042 }
2043 }
2044 rb_bug("timed out waiting for timer to arm");
2045done:
2046 if (timer_delete(timer_posix.timerid) < 0)
2047 rb_sys_fail("timer_delete");
2048
2049 VM_ASSERT(timer_state_exchange(RTIMER_DEAD) == RTIMER_DEAD);
2050 }
2051#elif UBF_TIMER == UBF_TIMER_PTHREAD
2052 int err;
2053
2054 timer_pthread.owner = 0;
2055 ubf_timer_disarm();
2056 rb_thread_wakeup_timer_thread_fd(timer_pthread.low[1]);
2057 err = pthread_join(timer_pthread.thid, 0);
2058 if (err) {
2059 rb_raise(rb_eThreadError, "native_thread_join() failed (%d)", err);
2060 }
2061#endif
2062}
2063
2064static int
2065native_stop_timer_thread(void)
2066{
2067 int stopped;
2068 stopped = --system_working <= 0;
2069 if (stopped)
2070 ubf_timer_destroy();
2071
2072 if (TT_DEBUG) fprintf(stderr, "stop timer thread\n");
2073 return stopped;
2074}
2075
2076static void
2077native_reset_timer_thread(void)
2078{
2079 if (TT_DEBUG) fprintf(stderr, "reset timer thread\n");
2080}
2081
2082#ifdef HAVE_SIGALTSTACK
2083int
2084ruby_stack_overflowed_p(const rb_thread_t *th, const void *addr)
2085{
2086 void *base;
2087 size_t size;
2088 const size_t water_mark = 1024 * 1024;
2089 STACK_GROW_DIR_DETECTION;
2090
2091#ifdef STACKADDR_AVAILABLE
2092 if (get_stack(&base, &size) == 0) {
2093# ifdef __APPLE__
2094 if (pthread_equal(th->nt->thread_id, native_main_thread.id)) {
2095 struct rlimit rlim;
2096 if (getrlimit(RLIMIT_STACK, &rlim) == 0 && rlim.rlim_cur > size) {
2097 size = (size_t)rlim.rlim_cur;
2098 }
2099 }
2100# endif
2101 base = (char *)base + STACK_DIR_UPPER(+size, -size);
2102 }
2103 else
2104#endif
2105 if (th) {
2106 size = th->ec->machine.stack_maxsize;
2107 base = (char *)th->ec->machine.stack_start - STACK_DIR_UPPER(0, size);
2108 }
2109 else {
2110 return 0;
2111 }
2112 size /= RUBY_STACK_SPACE_RATIO;
2113 if (size > water_mark) size = water_mark;
2114 if (IS_STACK_DIR_UPPER()) {
2115 if (size > ~(size_t)base+1) size = ~(size_t)base+1;
2116 if (addr > base && addr <= (void *)((char *)base + size)) return 1;
2117 }
2118 else {
2119 if (size > (size_t)base) size = (size_t)base;
2120 if (addr > (void *)((char *)base - size) && addr <= base) return 1;
2121 }
2122 return 0;
2123}
2124#endif
2125
2126int
2127rb_reserved_fd_p(int fd)
2128{
2129 /* no false-positive if out-of-FD at startup */
2130 if (fd < 0)
2131 return 0;
2132
2133#if UBF_TIMER == UBF_TIMER_PTHREAD
2134 if (fd == timer_pthread.low[0] || fd == timer_pthread.low[1])
2135 goto check_pid;
2136#endif
2137 if (fd == signal_self_pipe.normal[0] || fd == signal_self_pipe.normal[1])
2138 goto check_pid;
2139 if (fd == signal_self_pipe.ub_main[0] || fd == signal_self_pipe.ub_main[1])
2140 goto check_pid;
2141 return 0;
2142check_pid:
2143 if (signal_self_pipe.owner_process == getpid()) /* async-signal-safe */
2144 return 1;
2145 return 0;
2146}
2147
2150{
2151 return pthread_self();
2152}
2153
2154int
2155rb_sigwait_fd_get(const rb_thread_t *th)
2156{
2157 if (signal_self_pipe.normal[0] >= 0) {
2158 VM_ASSERT(signal_self_pipe.owner_process == getpid());
2159 /*
2160 * no need to keep firing the timer if any thread is sleeping
2161 * on the signal self-pipe
2162 */
2163 ubf_timer_disarm();
2164
2165 if (ATOMIC_PTR_CAS(sigwait_th, THREAD_INVALID, th) == THREAD_INVALID) {
2166 return signal_self_pipe.normal[0];
2167 }
2168 }
2169 return -1; /* avoid thundering herd and work stealing/starvation */
2170}
2171
2172void
2173rb_sigwait_fd_put(const rb_thread_t *th, int fd)
2174{
2175 const rb_thread_t *old;
2176
2177 VM_ASSERT(signal_self_pipe.normal[0] == fd);
2178 old = ATOMIC_PTR_EXCHANGE(sigwait_th, THREAD_INVALID);
2179 if (old != th) assert(old == th);
2180}
2181
2182#ifndef HAVE_PPOLL
2183/* TODO: don't ignore sigmask */
2184static int
2185ruby_ppoll(struct pollfd *fds, nfds_t nfds,
2186 const struct timespec *ts, const sigset_t *sigmask)
2187{
2188 int timeout_ms;
2189
2190 if (ts) {
2191 int tmp, tmp2;
2192
2193 if (ts->tv_sec > INT_MAX/1000)
2194 timeout_ms = INT_MAX;
2195 else {
2196 tmp = (int)(ts->tv_sec * 1000);
2197 /* round up 1ns to 1ms to avoid excessive wakeups for <1ms sleep */
2198 tmp2 = (int)((ts->tv_nsec + 999999L) / (1000L * 1000L));
2199 if (INT_MAX - tmp < tmp2)
2200 timeout_ms = INT_MAX;
2201 else
2202 timeout_ms = (int)(tmp + tmp2);
2203 }
2204 }
2205 else
2206 timeout_ms = -1;
2207
2208 return poll(fds, nfds, timeout_ms);
2209}
2210# define ppoll(fds,nfds,ts,sigmask) ruby_ppoll((fds),(nfds),(ts),(sigmask))
2211#endif
2212
2213void
2214rb_sigwait_sleep(rb_thread_t *th, int sigwait_fd, const rb_hrtime_t *rel)
2215{
2216 struct pollfd pfd;
2217 struct timespec ts;
2218
2219 pfd.fd = sigwait_fd;
2220 pfd.events = POLLIN;
2221
2222 if (!BUSY_WAIT_SIGNALS && ubf_threads_empty()) {
2223 (void)ppoll(&pfd, 1, rb_hrtime2timespec(&ts, rel), 0);
2224 check_signals_nogvl(th, sigwait_fd);
2225 }
2226 else {
2227 rb_hrtime_t to = RB_HRTIME_MAX, end = 0;
2228 int n = 0;
2229
2230 if (rel) {
2231 to = *rel;
2232 end = rb_hrtime_add(rb_hrtime_now(), to);
2233 }
2234 /*
2235 * tricky: this needs to return on spurious wakeup (no auto-retry).
2236 * But we also need to distinguish between periodic quantum
2237 * wakeups, so we care about the result of consume_communication_pipe
2238 *
2239 * We want to avoid spurious wakeup for Mutex#sleep compatibility
2240 * [ruby-core:88102]
2241 */
2242 for (;;) {
2243 const rb_hrtime_t *sto = sigwait_timeout(th, sigwait_fd, &to, &n);
2244
2245 if (n) return;
2246 n = ppoll(&pfd, 1, rb_hrtime2timespec(&ts, sto), 0);
2247 if (check_signals_nogvl(th, sigwait_fd))
2248 return;
2249 if (n || (th && RUBY_VM_INTERRUPTED(th->ec)))
2250 return;
2251 if (rel && hrtime_update_expire(&to, end))
2252 return;
2253 }
2254 }
2255}
2256
2257/*
2258 * we need to guarantee wakeups from native_ppoll_sleep because
2259 * ubf_select may not be going through ubf_list if other threads
2260 * are all sleeping.
2261 */
2262static void
2263ubf_ppoll_sleep(void *ignore)
2264{
2265 rb_thread_wakeup_timer_thread_fd(signal_self_pipe.ub_main[1]);
2266}
2267
2268/*
2269 * Single CPU setups benefit from explicit sched_yield() before ppoll(),
2270 * since threads may be too starved to enter the GVL waitqueue for
2271 * us to detect contention. Instead, we want to kick other threads
2272 * so they can run and possibly prevent us from entering slow paths
2273 * in ppoll() or similar syscalls.
2274 *
2275 * Confirmed on FreeBSD 11.2 and Linux 4.19.
2276 * [ruby-core:90417] [Bug #15398]
2277 */
2278#define THREAD_BLOCKING_YIELD(th) do { \
2279 const rb_thread_t *next; \
2280 struct rb_thread_sched *sched = TH_SCHED(th); \
2281 RB_GC_SAVE_MACHINE_CONTEXT(th); \
2282 rb_native_mutex_lock(&sched->lock); \
2283 next = thread_sched_to_waiting_common(sched); \
2284 rb_native_mutex_unlock(&sched->lock); \
2285 if (!next && rb_ractor_living_thread_num(th->ractor) > 1) { \
2286 native_thread_yield(); \
2287 }
2288
2289/*
2290 * This function does not exclusively acquire sigwait_fd, so it
2291 * cannot safely read from it. However, it can be woken up in
2292 * 4 ways:
2293 *
2294 * 1) ubf_ppoll_sleep (from another thread)
2295 * 2) rb_thread_wakeup_timer_thread (from signal handler)
2296 * 3) any unmasked signal hitting the process
2297 * 4) periodic ubf timer wakeups (after 3)
2298 */
2299static void
2300native_ppoll_sleep(rb_thread_t *th, rb_hrtime_t *rel)
2301{
2302 rb_native_mutex_lock(&th->interrupt_lock);
2303 th->unblock.func = ubf_ppoll_sleep;
2304 rb_native_mutex_unlock(&th->interrupt_lock);
2305
2306 THREAD_BLOCKING_YIELD(th);
2307 {
2308 if (!RUBY_VM_INTERRUPTED(th->ec)) {
2309 struct pollfd pfd[2];
2310 struct timespec ts;
2311
2312 pfd[0].fd = signal_self_pipe.normal[0]; /* sigwait_fd */
2313 pfd[1].fd = signal_self_pipe.ub_main[0];
2314 pfd[0].events = pfd[1].events = POLLIN;
2315 if (ppoll(pfd, 2, rb_hrtime2timespec(&ts, rel), 0) > 0) {
2316 if (pfd[1].revents & POLLIN) {
2317 (void)consume_communication_pipe(pfd[1].fd);
2318 }
2319 }
2320 /*
2321 * do not read the sigwait_fd, here, let uplevel callers
2322 * or other threads that, otherwise we may steal and starve
2323 * other threads
2324 */
2325 }
2326 unblock_function_clear(th);
2327 }
2328 THREAD_BLOCKING_END(th);
2329}
2330
2331static void
2332native_sleep(rb_thread_t *th, rb_hrtime_t *rel)
2333{
2334 int sigwait_fd = rb_sigwait_fd_get(th);
2335 rb_ractor_blocking_threads_inc(th->ractor, __FILE__, __LINE__);
2336
2337 RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_SUSPENDED);
2338
2339 if (sigwait_fd >= 0) {
2340 rb_native_mutex_lock(&th->interrupt_lock);
2341 th->unblock.func = ubf_sigwait;
2342 rb_native_mutex_unlock(&th->interrupt_lock);
2343
2344 THREAD_BLOCKING_YIELD(th);
2345 {
2346 if (!RUBY_VM_INTERRUPTED(th->ec)) {
2347 rb_sigwait_sleep(th, sigwait_fd, rel);
2348 }
2349 else {
2350 check_signals_nogvl(th, sigwait_fd);
2351 }
2352 unblock_function_clear(th);
2353 }
2354 THREAD_BLOCKING_END(th);
2355
2356 rb_sigwait_fd_put(th, sigwait_fd);
2357 rb_sigwait_fd_migrate(th->vm);
2358 }
2359 else if (th == th->vm->ractor.main_thread) { /* always able to handle signals */
2360 native_ppoll_sleep(th, rel);
2361 }
2362 else {
2363 native_cond_sleep(th, rel);
2364 }
2365
2366 rb_ractor_blocking_threads_dec(th->ractor, __FILE__, __LINE__);
2367}
2368
2369#if UBF_TIMER == UBF_TIMER_PTHREAD
2370static void *
2371timer_pthread_fn(void *p)
2372{
2373 rb_vm_t *vm = p;
2374 pthread_t main_thread_id = vm->ractor.main_thread->nt->thread_id;
2375 struct pollfd pfd;
2376 int timeout = -1;
2377 int ccp;
2378
2379 pfd.fd = timer_pthread.low[0];
2380 pfd.events = POLLIN;
2381
2382 while (system_working > 0) {
2383 (void)poll(&pfd, 1, timeout);
2384 ccp = consume_communication_pipe(pfd.fd);
2385
2386 if (system_working > 0) {
2387 if (ATOMIC_CAS(timer_pthread.armed, 1, 1)) {
2388 pthread_kill(main_thread_id, SIGVTALRM);
2389
2390 if (rb_signal_buff_size() || !ubf_threads_empty()) {
2391 timeout = TIME_QUANTUM_MSEC;
2392 }
2393 else {
2394 ATOMIC_SET(timer_pthread.armed, 0);
2395 timeout = -1;
2396 }
2397 }
2398 else if (ccp) {
2399 pthread_kill(main_thread_id, SIGVTALRM);
2400 ATOMIC_SET(timer_pthread.armed, 0);
2401 timeout = -1;
2402 }
2403 }
2404 }
2405
2406 return 0;
2407}
2408#endif /* UBF_TIMER_PTHREAD */
2409
2410static VALUE
2411ubf_caller(void *ignore)
2412{
2414
2415 return Qfalse;
2416}
2417
2418/*
2419 * Called if and only if one thread is running, and
2420 * the unblock function is NOT async-signal-safe
2421 * This assumes USE_THREAD_CACHE is true for performance reasons
2422 */
2423static VALUE
2424rb_thread_start_unblock_thread(void)
2425{
2426 return rb_thread_create(ubf_caller, 0);
2427}
2428#endif /* THREAD_SYSTEM_DEPENDENT_IMPLEMENTATION */
std::atomic< unsigned > rb_atomic_t
Type that is eligible for atomic operations.
Definition: atomic.h:69
uint32_t rb_event_flag_t
Represents event(s).
Definition: event.h:103
#define INT2FIX
Old name of RB_INT2FIX.
Definition: long.h:48
#define ZALLOC
Old name of RB_ZALLOC.
Definition: memory.h:396
#define ALLOC_N
Old name of RB_ALLOC_N.
Definition: memory.h:393
#define ULL2NUM
Old name of RB_ULL2NUM.
Definition: long_long.h:31
#define NUM2INT
Old name of RB_NUM2INT.
Definition: int.h:44
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define NIL_P
Old name of RB_NIL_P.
void ruby_init_stack(volatile VALUE *addr)
Set stack bottom of Ruby implementation.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
Definition: error.c:3148
VALUE rb_eNotImpError
NotImplementedError exception.
Definition: error.c:1101
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
Definition: error.c:3260
void rb_bug(const char *fmt,...)
Interpreter panic switch.
Definition: error.c:794
void rb_sys_fail(const char *mesg)
Converts a C errno into a Ruby exception, then raises it.
Definition: error.c:3272
void rb_warn(const char *fmt,...)
Identical to rb_warning(), except it reports always regardless of runtime -W flag.
Definition: error.c:411
void rb_bug_errno(const char *mesg, int errno_arg)
This is a wrapper of rb_bug() which automatically constructs appropriate message from the passed errn...
Definition: error.c:822
VALUE rb_eThreadError
ThreadError exception.
Definition: eval.c:882
int rb_cloexec_pipe(int fildes[2])
Opens a pipe with closing on exec.
Definition: io.c:409
void rb_update_max_fd(int fd)
Informs the interpreter that the passed fd can be the max.
Definition: io.c:230
int rb_reserved_fd_p(int fd)
Queries if the given FD is reserved or not.
void rb_thread_sleep_forever(void)
Blocks indefinitely.
Definition: thread.c:1360
VALUE rb_thread_create(VALUE(*f)(void *g), void *g)
Creates a Ruby thread that is backended by a C function.
void rb_timespec_now(struct timespec *ts)
Fills the current time into the given struct.
Definition: time.c:1933
rb_internal_thread_event_hook_t * rb_internal_thread_add_event_hook(rb_internal_thread_event_callback func, rb_event_flag_t events, void *data)
Registers a thread event hook function.
bool rb_internal_thread_remove_event_hook(rb_internal_thread_event_hook_t *hook)
Unregister the passed hook.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
Definition: memory.h:161
#define rb_fd_select
Waits for multiple file descriptors at once.
Definition: posix.h:66
#define RARRAY_AREF(a, i)
Definition: rarray.h:583
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
Definition: rstring.h:574
The data structure which wraps the fd_set bitmap used by select(2).
Definition: largesize.h:71
rb_nativethread_id_t rb_nativethread_self(void)
Queries the ID of the native thread that is calling this function.
void rb_native_mutex_lock(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_lock.
void rb_native_cond_initialize(rb_nativethread_cond_t *cond)
Fills the passed condition variable with an initial value.
int rb_native_mutex_trylock(rb_nativethread_lock_t *lock)
Identical to rb_native_mutex_lock(), except it doesn't block in case rb_native_mutex_lock() would.
void rb_native_cond_broadcast(rb_nativethread_cond_t *cond)
Signals a condition variable.
void rb_native_mutex_initialize(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_initialize.
void rb_native_mutex_unlock(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_unlock.
void rb_native_mutex_destroy(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_destroy.
void rb_native_cond_destroy(rb_nativethread_cond_t *cond)
Destroys the passed condition variable.
void rb_native_cond_signal(rb_nativethread_cond_t *cond)
Signals a condition variable.
void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lock_t *mutex)
Waits for the passed condition variable to be signalled.
void rb_native_cond_timedwait(rb_nativethread_cond_t *cond, rb_nativethread_lock_t *mutex, unsigned long msec)
Identical to rb_native_cond_wait(), except it additionally takes timeout in msec resolution.
uintptr_t VALUE
Type that represents a Ruby object.
Definition: value.h:40