1 | /* $NetBSD: linux_machdep.c,v 1.48 2014/02/19 20:50:56 dsl Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. |
5 | * |
6 | * Redistribution and use in source and binary forms, with or without |
7 | * modification, are permitted provided that the following conditions |
8 | * are met: |
9 | * 1. Redistributions of source code must retain the above copyright |
10 | * notice, this list of conditions and the following disclaimer. |
11 | * 2. Redistributions in binary form must reproduce the above copyright |
12 | * notice, this list of conditions and the following disclaimer in the |
13 | * documentation and/or other materials provided with the distribution. |
14 | * 3. All advertising materials mentioning features or use of this software |
15 | * must display the following acknowledgement: |
16 | * This product includes software developed by Emmanuel Dreyfus |
17 | * 4. The name of the author may not be used to endorse or promote |
18 | * products derived from this software without specific prior written |
19 | * permission. |
20 | * |
21 | * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' |
22 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
23 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
24 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS |
25 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
26 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
27 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
28 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
29 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
30 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
31 | * POSSIBILITY OF SUCH DAMAGE. |
32 | */ |
33 | |
34 | #include <sys/cdefs.h> |
35 | |
36 | __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.48 2014/02/19 20:50:56 dsl Exp $" ); |
37 | |
38 | #include <sys/param.h> |
39 | #include <sys/types.h> |
40 | #include <sys/systm.h> |
41 | #include <sys/signal.h> |
42 | #include <sys/exec.h> |
43 | #include <sys/proc.h> |
44 | #include <sys/ptrace.h> /* for process_read_fpregs() */ |
45 | #include <sys/ucontext.h> |
46 | #include <sys/conf.h> |
47 | |
48 | #include <machine/reg.h> |
49 | #include <machine/pcb.h> |
50 | #include <machine/mcontext.h> |
51 | #include <machine/specialreg.h> |
52 | #include <machine/vmparam.h> |
53 | #include <machine/cpufunc.h> |
54 | #include <x86/include/sysarch.h> |
55 | |
56 | /* |
57 | * To see whether wscons is configured (for virtual console ioctl calls). |
58 | */ |
59 | #if defined(_KERNEL_OPT) |
60 | #include "wsdisplay.h" |
61 | #endif |
62 | #if (NWSDISPLAY > 0) |
63 | #include <dev/wscons/wsconsio.h> |
64 | #include <dev/wscons/wsdisplay_usl_io.h> |
65 | #endif |
66 | |
67 | |
68 | #include <compat/linux/common/linux_signal.h> |
69 | #include <compat/linux/common/linux_errno.h> |
70 | #include <compat/linux/common/linux_exec.h> |
71 | #include <compat/linux/common/linux_ioctl.h> |
72 | #include <compat/linux/common/linux_prctl.h> |
73 | #include <compat/linux/common/linux_machdep.h> |
74 | #include <compat/linux/common/linux_ipc.h> |
75 | #include <compat/linux/common/linux_sem.h> |
76 | #include <compat/linux/linux_syscall.h> |
77 | #include <compat/linux/linux_syscallargs.h> |
78 | |
79 | static void linux_buildcontext(struct lwp *, void *, void *); |
80 | |
81 | void |
82 | linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack) |
83 | { |
84 | struct pcb *pcb = lwp_getpcb(l); |
85 | struct trapframe *tf; |
86 | |
87 | fpu_save_area_clear(l, __NetBSD_NPXCW__); |
88 | pcb->pcb_flags = 0; |
89 | |
90 | l->l_proc->p_flag &= ~PK_32; |
91 | |
92 | tf = l->l_md.md_regs; |
93 | tf->tf_rax = 0; |
94 | tf->tf_rbx = 0; |
95 | tf->tf_rcx = epp->ep_entry; |
96 | tf->tf_rdx = 0; |
97 | tf->tf_rsi = 0; |
98 | tf->tf_rdi = 0; |
99 | tf->tf_rbp = 0; |
100 | tf->tf_rsp = stack; |
101 | tf->tf_r8 = 0; |
102 | tf->tf_r9 = 0; |
103 | tf->tf_r10 = 0; |
104 | tf->tf_r11 = 0; |
105 | tf->tf_r12 = 0; |
106 | tf->tf_r13 = 0; |
107 | tf->tf_r14 = 0; |
108 | tf->tf_r15 = 0; |
109 | tf->tf_rip = epp->ep_entry; |
110 | tf->tf_rflags = PSL_USERSET; |
111 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); |
112 | tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); |
113 | tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); |
114 | tf->tf_es = 0; |
115 | cpu_fsgs_zero(l); |
116 | |
117 | return; |
118 | } |
119 | |
120 | void |
121 | linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) |
122 | { |
123 | struct lwp *l = curlwp; |
124 | struct proc *p = l->l_proc; |
125 | struct pcb *pcb = lwp_getpcb(l); |
126 | struct sigacts *ps = p->p_sigacts; |
127 | int onstack, error; |
128 | int sig = ksi->ksi_signo; |
129 | struct linux_rt_sigframe *sfp, sigframe; |
130 | struct linux__fpstate *fpsp; |
131 | struct fpreg fpregs; |
132 | struct trapframe *tf = l->l_md.md_regs; |
133 | sig_t catcher = SIGACTION(p, sig).sa_handler; |
134 | linux_sigset_t lmask; |
135 | char *sp; |
136 | |
137 | /* Do we need to jump onto the signal stack? */ |
138 | onstack = |
139 | (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && |
140 | (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; |
141 | |
142 | /* Allocate space for the signal handler context. */ |
143 | if (onstack) |
144 | sp = ((char *)l->l_sigstk.ss_sp + |
145 | l->l_sigstk.ss_size); |
146 | else |
147 | sp = (char *)tf->tf_rsp - 128; |
148 | |
149 | /* Save FPU state */ |
150 | sp = (char *) (((long)sp - sizeof (*fpsp)) & ~0xfUL); |
151 | fpsp = (struct linux__fpstate *)sp; |
152 | |
153 | /* |
154 | * Populate the rt_sigframe |
155 | */ |
156 | sp = (char *) |
157 | ((((long)sp - sizeof(struct linux_rt_sigframe)) & ~0xfUL) - 8); |
158 | sfp = (struct linux_rt_sigframe *)sp; |
159 | |
160 | memset(&sigframe, 0, sizeof(sigframe)); |
161 | if (ps->sa_sigdesc[sig].sd_vers != 0) |
162 | sigframe.pretcode = |
163 | (char *)(u_long)ps->sa_sigdesc[sig].sd_tramp; |
164 | else |
165 | sigframe.pretcode = NULL; |
166 | |
167 | /* |
168 | * The user context |
169 | */ |
170 | sigframe.uc.luc_flags = 0; |
171 | sigframe.uc.luc_link = NULL; |
172 | |
173 | /* This is used regardless of SA_ONSTACK in Linux */ |
174 | sigframe.uc.luc_stack.ss_sp = l->l_sigstk.ss_sp; |
175 | sigframe.uc.luc_stack.ss_size = l->l_sigstk.ss_size; |
176 | sigframe.uc.luc_stack.ss_flags = 0; |
177 | if (l->l_sigstk.ss_flags & SS_ONSTACK) |
178 | sigframe.uc.luc_stack.ss_flags |= LINUX_SS_ONSTACK; |
179 | if (l->l_sigstk.ss_flags & SS_DISABLE) |
180 | sigframe.uc.luc_stack.ss_flags |= LINUX_SS_DISABLE; |
181 | |
182 | sigframe.uc.luc_mcontext.r8 = tf->tf_r8; |
183 | sigframe.uc.luc_mcontext.r9 = tf->tf_r9; |
184 | sigframe.uc.luc_mcontext.r10 = tf->tf_r10; |
185 | sigframe.uc.luc_mcontext.r11 = tf->tf_r11; |
186 | sigframe.uc.luc_mcontext.r12 = tf->tf_r12; |
187 | sigframe.uc.luc_mcontext.r13 = tf->tf_r13; |
188 | sigframe.uc.luc_mcontext.r14 = tf->tf_r14; |
189 | sigframe.uc.luc_mcontext.r15 = tf->tf_r15; |
190 | sigframe.uc.luc_mcontext.rdi = tf->tf_rdi; |
191 | sigframe.uc.luc_mcontext.rsi = tf->tf_rsi; |
192 | sigframe.uc.luc_mcontext.rbp = tf->tf_rbp; |
193 | sigframe.uc.luc_mcontext.rbx = tf->tf_rbx; |
194 | sigframe.uc.luc_mcontext.rdx = tf->tf_rdx; |
195 | sigframe.uc.luc_mcontext.rax = tf->tf_rax; |
196 | sigframe.uc.luc_mcontext.rcx = tf->tf_rcx; |
197 | sigframe.uc.luc_mcontext.rsp = tf->tf_rsp; |
198 | sigframe.uc.luc_mcontext.rip = tf->tf_rip; |
199 | sigframe.uc.luc_mcontext.eflags = tf->tf_rflags; |
200 | sigframe.uc.luc_mcontext.cs = tf->tf_cs; |
201 | sigframe.uc.luc_mcontext.gs = tf->tf_gs; |
202 | sigframe.uc.luc_mcontext.fs = tf->tf_fs; |
203 | sigframe.uc.luc_mcontext.err = tf->tf_err; |
204 | sigframe.uc.luc_mcontext.trapno = tf->tf_trapno; |
205 | native_to_linux_sigset(&lmask, mask); |
206 | sigframe.uc.luc_mcontext.oldmask = lmask.sig[0]; |
207 | sigframe.uc.luc_mcontext.cr2 = (long)pcb->pcb_onfault; |
208 | sigframe.uc.luc_mcontext.fpstate = fpsp; |
209 | native_to_linux_sigset(&sigframe.uc.luc_sigmask, mask); |
210 | native_to_linux_siginfo(&sigframe.info, &ksi->ksi_info); |
211 | sendsig_reset(l, sig); |
212 | mutex_exit(p->p_lock); |
213 | error = 0; |
214 | |
215 | /* |
216 | * Save FPU state, if any |
217 | */ |
218 | if (fpsp != NULL) { |
219 | size_t fp_size = sizeof fpregs; |
220 | /* The netbsd and linux structures both match the fxsave data */ |
221 | (void)process_read_fpregs(l, &fpregs, &fp_size); |
222 | error = copyout(&fpregs, fpsp, sizeof(*fpsp)); |
223 | } |
224 | |
225 | if (error == 0) |
226 | error = copyout(&sigframe, sp, sizeof(sigframe)); |
227 | |
228 | mutex_enter(p->p_lock); |
229 | |
230 | if (error != 0) { |
231 | sigexit(l, SIGILL); |
232 | return; |
233 | } |
234 | |
235 | linux_buildcontext(l, catcher, sp); |
236 | tf->tf_rdi = sigframe.info.lsi_signo; |
237 | tf->tf_rax = 0; |
238 | tf->tf_rsi = (long)&sfp->info; |
239 | tf->tf_rdx = (long)&sfp->uc; |
240 | |
241 | /* |
242 | * Remember we use signal stack |
243 | */ |
244 | if (onstack) |
245 | l->l_sigstk.ss_flags |= SS_ONSTACK; |
246 | return; |
247 | } |
248 | |
249 | int |
250 | linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *v, register_t *retval) |
251 | { |
252 | printf("linux_sys_modify_ldt\n" ); |
253 | return 0; |
254 | } |
255 | |
256 | int |
257 | linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *v, register_t *retval) |
258 | { |
259 | return 0; |
260 | } |
261 | |
262 | int |
263 | linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *v, register_t *retval) |
264 | { |
265 | return 0; |
266 | } |
267 | |
268 | dev_t |
269 | linux_fakedev(dev_t dev, int raw) |
270 | { |
271 | |
272 | extern const struct cdevsw ptc_cdevsw, pts_cdevsw; |
273 | const struct cdevsw *cd = cdevsw_lookup(dev); |
274 | |
275 | if (raw) { |
276 | #if (NWSDISPLAY > 0) |
277 | extern const struct cdevsw wsdisplay_cdevsw; |
278 | if (cd == &wsdisplay_cdevsw) |
279 | return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1)); |
280 | #endif |
281 | } |
282 | |
283 | if (cd == &ptc_cdevsw) |
284 | return makedev(LINUX_PTC_MAJOR, minor(dev)); |
285 | if (cd == &pts_cdevsw) |
286 | return makedev(LINUX_PTS_MAJOR, minor(dev)); |
287 | |
288 | return ((minor(dev) & 0xff) | ((major(dev) & 0xfff) << 8) |
289 | | (((unsigned long long int) (minor(dev) & ~0xff)) << 12) |
290 | | (((unsigned long long int) (major(dev) & ~0xfff)) << 32)); |
291 | } |
292 | |
293 | int |
294 | linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *v, register_t *retval) |
295 | { |
296 | return 0; |
297 | } |
298 | |
299 | int |
300 | linux_sys_rt_sigreturn(struct lwp *l, const void *v, register_t *retval) |
301 | { |
302 | struct linux_ucontext *luctx; |
303 | struct trapframe *tf = l->l_md.md_regs; |
304 | struct linux_sigcontext *lsigctx; |
305 | struct linux_rt_sigframe frame, *fp; |
306 | ucontext_t uctx; |
307 | mcontext_t *mctx; |
308 | struct fxsave *fxarea; |
309 | int error; |
310 | |
311 | fp = (struct linux_rt_sigframe *)(tf->tf_rsp - 8); |
312 | if ((error = copyin(fp, &frame, sizeof(frame))) != 0) { |
313 | mutex_enter(l->l_proc->p_lock); |
314 | sigexit(l, SIGILL); |
315 | return error; |
316 | } |
317 | luctx = &frame.uc; |
318 | lsigctx = &luctx->luc_mcontext; |
319 | |
320 | memset(&uctx, 0, sizeof(uctx)); |
321 | mctx = (mcontext_t *)&uctx.uc_mcontext; |
322 | fxarea = (struct fxsave *)&mctx->__fpregs; |
323 | |
324 | /* |
325 | * Set the flags. Linux always have CPU, stack and signal state, |
326 | * FPU is optional. uc_flags is not used to tell what we have. |
327 | */ |
328 | uctx.uc_flags = (_UC_SIGMASK|_UC_CPU|_UC_STACK|_UC_CLRSTACK); |
329 | if (lsigctx->fpstate != NULL) |
330 | uctx.uc_flags |= _UC_FPU; |
331 | uctx.uc_link = NULL; |
332 | |
333 | /* |
334 | * Signal set |
335 | */ |
336 | linux_to_native_sigset(&uctx.uc_sigmask, &luctx->luc_sigmask); |
337 | |
338 | /* |
339 | * CPU state |
340 | */ |
341 | mctx->__gregs[_REG_R8] = lsigctx->r8; |
342 | mctx->__gregs[_REG_R9] = lsigctx->r9; |
343 | mctx->__gregs[_REG_R10] = lsigctx->r10; |
344 | mctx->__gregs[_REG_R11] = lsigctx->r11; |
345 | mctx->__gregs[_REG_R12] = lsigctx->r12; |
346 | mctx->__gregs[_REG_R13] = lsigctx->r13; |
347 | mctx->__gregs[_REG_R14] = lsigctx->r14; |
348 | mctx->__gregs[_REG_R15] = lsigctx->r15; |
349 | mctx->__gregs[_REG_RDI] = lsigctx->rdi; |
350 | mctx->__gregs[_REG_RSI] = lsigctx->rsi; |
351 | mctx->__gregs[_REG_RBP] = lsigctx->rbp; |
352 | mctx->__gregs[_REG_RBX] = lsigctx->rbx; |
353 | mctx->__gregs[_REG_RAX] = lsigctx->rax; |
354 | mctx->__gregs[_REG_RDX] = lsigctx->rdx; |
355 | mctx->__gregs[_REG_RCX] = lsigctx->rcx; |
356 | mctx->__gregs[_REG_RIP] = lsigctx->rip; |
357 | mctx->__gregs[_REG_RFLAGS] = lsigctx->eflags; |
358 | mctx->__gregs[_REG_CS] = lsigctx->cs; |
359 | mctx->__gregs[_REG_GS] = lsigctx->gs; |
360 | mctx->__gregs[_REG_FS] = lsigctx->fs; |
361 | mctx->__gregs[_REG_ERR] = lsigctx->err; |
362 | mctx->__gregs[_REG_TRAPNO] = lsigctx->trapno; |
363 | mctx->__gregs[_REG_ES] = tf->tf_es; |
364 | mctx->__gregs[_REG_DS] = tf->tf_ds; |
365 | mctx->__gregs[_REG_RSP] = lsigctx->rsp; /* XXX */ |
366 | mctx->__gregs[_REG_SS] = tf->tf_ss; |
367 | |
368 | /* |
369 | * FPU state |
370 | */ |
371 | if (lsigctx->fpstate != NULL) { |
372 | /* Both structures match the fxstate data */ |
373 | error = copyin(lsigctx->fpstate, fxarea, sizeof(*fxarea)); |
374 | if (error != 0) { |
375 | mutex_enter(l->l_proc->p_lock); |
376 | sigexit(l, SIGILL); |
377 | return error; |
378 | } |
379 | } |
380 | |
381 | /* |
382 | * And the stack |
383 | */ |
384 | uctx.uc_stack.ss_flags = 0; |
385 | if (luctx->luc_stack.ss_flags & LINUX_SS_ONSTACK) |
386 | uctx.uc_stack.ss_flags |= SS_ONSTACK; |
387 | |
388 | if (luctx->luc_stack.ss_flags & LINUX_SS_DISABLE) |
389 | uctx.uc_stack.ss_flags |= SS_DISABLE; |
390 | |
391 | uctx.uc_stack.ss_sp = luctx->luc_stack.ss_sp; |
392 | uctx.uc_stack.ss_size = luctx->luc_stack.ss_size; |
393 | |
394 | /* |
395 | * And let setucontext deal with that. |
396 | */ |
397 | mutex_enter(l->l_proc->p_lock); |
398 | error = setucontext(l, &uctx); |
399 | mutex_exit(l->l_proc->p_lock); |
400 | if (error) |
401 | return error; |
402 | |
403 | return EJUSTRETURN; |
404 | } |
405 | |
406 | int |
407 | linux_sys_arch_prctl(struct lwp *l, |
408 | const struct linux_sys_arch_prctl_args *uap, register_t *retval) |
409 | { |
410 | /* { |
411 | syscallarg(int) code; |
412 | syscallarg(unsigned long) addr; |
413 | } */ |
414 | void *addr = (void *)SCARG(uap, addr); |
415 | |
416 | switch(SCARG(uap, code)) { |
417 | case LINUX_ARCH_SET_GS: |
418 | return x86_set_sdbase(addr, 'g', l, true); |
419 | |
420 | case LINUX_ARCH_GET_GS: |
421 | return x86_get_sdbase(addr, 'g'); |
422 | |
423 | case LINUX_ARCH_SET_FS: |
424 | return x86_set_sdbase(addr, 'f', l, true); |
425 | |
426 | case LINUX_ARCH_GET_FS: |
427 | return x86_get_sdbase(addr, 'f'); |
428 | |
429 | default: |
430 | #ifdef DEBUG_LINUX |
431 | printf("linux_sys_arch_prctl: unexpected code %d\n" , |
432 | SCARG(uap, code)); |
433 | #endif |
434 | return EINVAL; |
435 | } |
436 | /* NOTREACHED */ |
437 | } |
438 | |
439 | const int linux_vsyscall_to_syscall[] = { |
440 | LINUX_SYS_gettimeofday, |
441 | LINUX_SYS_time, |
442 | LINUX_SYS_nosys, /* nosys */ |
443 | LINUX_SYS_nosys, /* nosys */ |
444 | }; |
445 | |
446 | int |
447 | linux_usertrap(struct lwp *l, vaddr_t trapaddr, void *arg) |
448 | { |
449 | struct trapframe *tf = arg; |
450 | uint64_t retaddr; |
451 | int vsyscallnr; |
452 | |
453 | /* |
454 | * Check for a vsyscall. %rip must be the fault address, |
455 | * and the address must be in the Linux vsyscall area. |
456 | * Also, vsyscalls are only done at 1024-byte boundaries. |
457 | */ |
458 | |
459 | if (__predict_true(trapaddr < LINUX_VSYSCALL_START)) |
460 | return 0; |
461 | |
462 | if (trapaddr != tf->tf_rip) |
463 | return 0; |
464 | |
465 | if ((tf->tf_rip & (LINUX_VSYSCALL_SIZE - 1)) != 0) |
466 | return 0; |
467 | |
468 | vsyscallnr = (tf->tf_rip - LINUX_VSYSCALL_START) / LINUX_VSYSCALL_SIZE; |
469 | |
470 | if (vsyscallnr > LINUX_VSYSCALL_MAXNR) |
471 | return 0; |
472 | |
473 | /* |
474 | * Get the return address from the top of the stack, |
475 | * and fix up the return address. |
476 | * This assumes the faulting instruction was callq *reg, |
477 | * which is the only way that vsyscalls are ever entered. |
478 | */ |
479 | if (copyin((void *)tf->tf_rsp, &retaddr, sizeof retaddr) != 0) |
480 | return 0; |
481 | tf->tf_rip = retaddr; |
482 | tf->tf_rax = linux_vsyscall_to_syscall[vsyscallnr]; |
483 | tf->tf_rsp += 8; /* "pop" the return address */ |
484 | |
485 | #if 0 |
486 | printf("usertrap: rip %p rsp %p retaddr %p vsys %d sys %d\n" , |
487 | (void *)tf->tf_rip, (void *)tf->tf_rsp, (void *)retaddr, |
488 | vsyscallnr, (int)tf->tf_rax); |
489 | #endif |
490 | |
491 | (*l->l_proc->p_md.md_syscall)(tf); |
492 | |
493 | return 1; |
494 | } |
495 | |
496 | static void |
497 | linux_buildcontext(struct lwp *l, void *catcher, void *f) |
498 | { |
499 | struct trapframe *tf = l->l_md.md_regs; |
500 | |
501 | tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); |
502 | tf->tf_rip = (u_int64_t)catcher; |
503 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); |
504 | tf->tf_rflags &= ~PSL_CLEARSIG; |
505 | tf->tf_rsp = (u_int64_t)f; |
506 | tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); |
507 | } |
508 | |