aboutsummaryrefslogtreecommitdiff
path: root/en_US.ISO8859-1/books/arch-handbook/jail/chapter.xml
blob: b89c74b7607617ee98141b8d692102cbc488775c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
<?xml version="1.0" encoding="iso-8859-1"?>
<!--
     The FreeBSD Documentation Project

     $FreeBSD$
-->

<chapter id="jail">
  <chapterinfo>
    <author>
      <firstname>Evan</firstname>
      <surname>Sarmiento</surname>
      <affiliation>
	<address><email>evms@cs.bu.edu</email></address>
      </affiliation>
    </author>
    <copyright>
      <year>2001</year>
      <holder role="mailto:evms@cs.bu.edu">Evan Sarmiento</holder>
    </copyright>
  </chapterinfo>
  <title>The Jail Subsystem</title>

  <indexterm><primary>security</primary></indexterm>
  <indexterm><primary>Jail</primary></indexterm>
  <indexterm><primary>root</primary></indexterm>

  <para>On most &unix; systems, <literal>root</literal> has omnipotent power.
    This promotes insecurity. If an attacker gained <literal>root</literal>
    on a system, he would have every function at his fingertips. In FreeBSD
    there are sysctls which dilute the power of <literal>root</literal>, in
    order to minimize the damage caused by an attacker. Specifically, one of
    these functions is called <literal>secure levels</literal>. Similarly,
    another function which is present from FreeBSD 4.0 and onward, is a utility
    called &man.jail.8;. <application>Jail</application> chroots an environment
    and sets certain restrictions on processes which are forked within
    the <application>jail</application>. For example, a jailed process
    cannot affect processes outside the <application>jail</application>,
    utilize certain system calls, or inflict any damage on the host
    environment.</para>

  <para><application>Jail</application> is becoming the new security
    model. People are running potentially vulnerable servers such as
    <application>Apache</application>, <application>BIND</application>, and
    <application>sendmail</application> within jails, so that if an attacker
    gains <literal>root</literal> within the <application>jail</application>,
    it is only an annoyance, and not a devastation. This article mainly
    focuses on the internals (source code) of <application>jail</application>.
    If you are looking for a how-to on setting up a
    <application>jail</application>, I suggest you look at my other article
    in Sys Admin Magazine, May 2001, entitled "Securing FreeBSD using
    <application>Jail</application>."</para>

  <sect1 id="jail-arch">
    <title>Architecture</title>

    <para>
      <application>Jail</application> consists of two realms: the
      userland program, &man.jail.8;, and the code implemented within
      the kernel: the &man.jail.2; system call and associated
      restrictions. I will be discussing the userland program and
      then how <application>jail</application> is implemented within
      the kernel.</para>

    <sect2>
      <title>Userland Code</title>

      <indexterm><primary>Jail</primary>
	<secondary>Userland Program</secondary></indexterm>

      <para>The source for the userland <application>jail</application>
        is located in <filename>/usr/src/usr.sbin/jail</filename>,
        consisting of one file, <filename>jail.c</filename>. The program
        takes these arguments: the path of the <application>jail</application>,
        hostname, IP address, and the command to be executed.</para>

      <sect3>
        <title>Data Structures</title>

        <para>In <filename>jail.c</filename>, the first thing I would
          note is the declaration of an important structure
          <literal>struct jail j;</literal> which was included from
          <filename>/usr/include/sys/jail.h</filename>.</para>

        <para>The definition of the <literal>jail</literal> structure is:
</para>

<programlisting><filename>/usr/include/sys/jail.h</filename>:

struct jail {
        u_int32_t       version;
        char            *path;
        char            *hostname;
        u_int32_t       ip_number;
};</programlisting>

        <para>As you can see, there is an entry for each of the
          arguments passed to the &man.jail.8; program, and indeed,
          they are set during its execution.</para>

        <programlisting><filename>/usr/src/usr.sbin/jail/jail.c</filename>
char path[PATH_MAX];
...
if (realpath(argv[0], path) == NULL)
    err(1, "realpath: %s", argv[0]);
if (chdir(path) != 0)
    err(1, "chdir: %s", path);
memset(&amp;j, 0, sizeof(j));
j.version = 0;
j.path = path;
j.hostname = argv[1];</programlisting>

      </sect3>

      <sect3>
        <title>Networking</title>

        <para>One of the arguments passed to the &man.jail.8; program is
          an IP address with which the <application>jail</application>
          can be accessed over the network. &man.jail.8; translates the
          IP address given into host byte order and then stores it in
          <literal>j</literal> (the <literal>jail</literal> structure).</para>

        <programlisting><filename>/usr/src/usr.sbin/jail/jail.c</filename>:
struct in_addr in;
...
if (inet_aton(argv[2], &amp;in) == 0)
    errx(1, "Could not make sense of ip-number: %s", argv[2]);
j.ip_number = ntohl(in.s_addr);</programlisting>

        <para>The &man.inet.aton.3; function "interprets the specified
          character string as an Internet address, placing the address
          into the structure provided." The <literal>ip_number</literal>
          member in the <literal>jail</literal> structure is set only
          when the IP address placed onto the <literal>in</literal>
          structure by &man.inet.aton.3; is translated into host byte
          order by &man.ntohl.3;.</para>

      </sect3>

      <sect3>
        <title>Jailing The Process</title>

        <para>Finally, the userland program jails the process.
          <application>Jail</application> now becomes an imprisoned
          process itself and then executes the command given using
          &man.execv.3;.</para>
        <programlisting><filename>/usr/src/usr.sbin/jail/jail.c</filename>
i = jail(&amp;j);
...
if (execv(argv[3], argv + 3) != 0)
    err(1, "execv: %s", argv[3]);</programlisting>

        <para>As you can see, the <literal>jail()</literal> function is
          called, and its argument is the <literal>jail</literal> structure
          which has been filled with the arguments given to the program.
          Finally, the program you specify is executed. I will now discuss
          how <application>jail</application> is implemented within the
          kernel.</para>
      </sect3>
    </sect2>

    <sect2>
      <title>Kernel Space</title>

      <indexterm><primary>Jail</primary>
	<secondary>Kernel Architecture</secondary></indexterm>

      <para>We will now be looking at the file
        <filename>/usr/src/sys/kern/kern_jail.c</filename>.  This is
        the file where the &man.jail.2; system call, appropriate sysctls,
        and networking functions are defined.</para>

      <sect3>
        <title>sysctls</title>

      <indexterm><primary>sysctl</primary></indexterm>

        <para>In <filename>kern_jail.c</filename>, the following
          sysctls are defined:</para>

        <programlisting><filename>/usr/src/sys/kern/kern_jail.c:</filename>

int     jail_set_hostname_allowed = 1;
SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
    &amp;jail_set_hostname_allowed, 0,
    "Processes in jail can set their hostnames");

int     jail_socket_unixiproute_only = 1;
SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
    &amp;jail_socket_unixiproute_only, 0,
    "Processes in jail are limited to creating UNIX/IPv4/route sockets only");

int     jail_sysvipc_allowed = 0;
SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
    &amp;jail_sysvipc_allowed, 0,
    "Processes in jail can use System V IPC primitives");

static int jail_enforce_statfs = 2;
SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW,
    &amp;jail_enforce_statfs, 0,
    "Processes in jail cannot see all mounted file systems");

int    jail_allow_raw_sockets = 0;
SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
    &amp;jail_allow_raw_sockets, 0,
    "Prison root can create raw sockets");

int    jail_chflags_allowed = 0;
SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW,
    &amp;jail_chflags_allowed, 0,
    "Processes in jail can alter system file flags");

int     jail_mount_allowed = 0;
SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW,
    &amp;jail_mount_allowed, 0,
    "Processes in jail can mount/unmount jail-friendly file systems");</programlisting>

        <para>Each of these sysctls can be accessed by the user
          through the &man.sysctl.8; program. Throughout the kernel, these
          specific sysctls are recognized by their name. For example,
          the name of the first sysctl is
          <literal>security.jail.set_hostname_allowed</literal>.</para>
      </sect3>

      <sect3>
        <title>&man.jail.2; system call</title>

        <para>Like all system calls, the &man.jail.2; system call takes
          two arguments, <literal>struct thread *td</literal> and
          <literal>struct jail_args *uap</literal>.
          <literal>td</literal> is a pointer to the <literal>thread</literal>
          structure which describes the calling thread. In this
          context, <literal>uap</literal> is a pointer to the structure
          in which a pointer to the <literal>jail</literal> structure
          passed by the userland <filename>jail.c</filename> is contained.
          When I described the userland program before, you saw that the
          &man.jail.2; system call was given a <literal>jail</literal>
          structure as its own argument.</para>

        <programlisting><filename>/usr/src/sys/kern/kern_jail.c:</filename>
/*
 * struct jail_args {
 *  struct jail *jail;
 * };
 */
int
jail(struct thread *td, struct jail_args *uap)</programlisting>

        <para>Therefore, <literal>uap-&gt;jail</literal> can be used to
          access the <literal>jail</literal> structure which was passed
          to the system call. Next, the system call copies the
          <literal>jail</literal> structure into kernel space using
          the &man.copyin.9; function. &man.copyin.9; takes three arguments:
          the address of the data which is to be copied into kernel space,
          <literal>uap-&gt;jail</literal>, where to store it,
          <literal>j</literal> and the size of the storage. The
          <literal>jail</literal> structure pointed by
          <literal>uap-&gt;jail</literal> is copied into kernel space and
          is stored in another <literal>jail</literal> structure,
          <literal>j</literal>.</para>

        <programlisting><filename>/usr/src/sys/kern/kern_jail.c: </filename>
error = copyin(uap-&gt;jail, &amp;j, sizeof(j));</programlisting>

        <para>There is another important structure defined in
          <filename>jail.h</filename>. It is the <literal>prison</literal>
          structure. The <literal>prison</literal> structure is used
          exclusively within kernel space. Here is the definition of the
          <literal>prison</literal> structure.</para>

        <programlisting><filename>/usr/include/sys/jail.h</filename>:
struct prison {
        LIST_ENTRY(prison) pr_list;                     /* (a) all prisons */
        int              pr_id;                         /* (c) prison id */
        int              pr_ref;                        /* (p) refcount */
        char             pr_path[MAXPATHLEN];           /* (c) chroot path */
        struct vnode    *pr_root;                       /* (c) vnode to rdir */
        char             pr_host[MAXHOSTNAMELEN];       /* (p) jail hostname */
        u_int32_t        pr_ip;                         /* (c) ip addr host */
        void            *pr_linux;                      /* (p) linux abi */
        int              pr_securelevel;                /* (p) securelevel */
        struct task      pr_task;                       /* (d) destroy task */
        struct mtx       pr_mtx;
      void            **pr_slots;                     /* (p) additional data */
};</programlisting>

        <para>The &man.jail.2; system call then allocates memory for
        a <literal>prison</literal> structure and copies data between
        the <literal>jail</literal> and <literal>prison</literal>
        structure.</para>

        <programlisting><filename>/usr/src/sys/kern/kern_jail.c</filename>:
MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
...
error = copyinstr(j.path, &amp;pr-&gt;pr_path, sizeof(pr-&gt;pr_path), 0);
if (error)
    goto e_killmtx;
...
error = copyinstr(j.hostname, &amp;pr-&gt;pr_host, sizeof(pr-&gt;pr_host), 0);
if (error)
     goto e_dropvnref;
pr-&gt;pr_ip = j.ip_number;</programlisting>
        <para>Next, we will discuss another important system call
          &man.jail.attach.2;, which implements the function to put
          a process into the <application>jail</application>.</para>
        <programlisting><filename>/usr/src/sys/kern/kern_jail.c</filename>:
/*
 * struct jail_attach_args {
 *      int jid;
 * };
 */
int
jail_attach(struct thread *td, struct jail_attach_args *uap)</programlisting>
        <para>This system call makes the changes that can distinguish
          a jailed process from those unjailed ones.
          To understand what &man.jail.attach.2; does for us, certain
          background information is needed.</para>
        <para>
          On FreeBSD, each kernel visible thread is identified by its
          <literal>thread</literal> structure, while the processes are
          described by their <literal>proc</literal> structures. You can
          find the definitions of the <literal>thread</literal> and
          <literal>proc</literal> structure in
          <filename>/usr/include/sys/proc.h</filename>.
          For example, the <literal>td</literal> argument in any system
          call is actually a pointer to the calling thread's
          <literal>thread</literal> structure, as stated before.
          The <literal>td_proc</literal> member in the
          <literal>thread</literal> structure pointed by <literal>td</literal>
          is a pointer to the <literal>proc</literal> structure which
          represents the process that contains the thread represented by
          <literal>td</literal>. The <literal>proc</literal> structure
          contains members which can describe the owner's
          identity(<literal>p_ucred</literal>), the process resource
          limits(<literal>p_limit</literal>), and so on. In the
          <literal>ucred</literal> structure pointed by
          <literal>p_ucred</literal> member in the <literal>proc</literal>
          structure, there is a pointer to the <literal>prison</literal>
          structure(<literal>cr_prison</literal>).</para>

        <programlisting><filename>/usr/include/sys/proc.h: </filename>
struct thread {
    ...
    struct proc *td_proc;
    ...
};
struct proc {
    ...
    struct ucred *p_ucred;
    ...
};
<filename>/usr/include/sys/ucred.h</filename>
struct ucred {
    ...
    struct prison *cr_prison;
    ...
};</programlisting>

        <para>In <filename>kern_jail.c</filename>, the function
          <literal>jail()</literal> then calls function
          <literal>jail_attach()</literal> with a given <literal>jid</literal>.
          And <literal>jail_attach()</literal> calls function
          <literal>change_root()</literal> to change the root directory of the
          calling process. The <literal>jail_attach()</literal> then creates
          a new <literal>ucred</literal> structure, and attaches the newly
          created <literal>ucred</literal> structure to the calling process
          after it has successfully attached the <literal>prison</literal>
          structure to the <literal>ucred</literal> structure. From then on,
          the calling process is recognized as jailed. When the kernel routine
          <literal>jailed()</literal> is called in the kernel with the newly
          created <literal>ucred</literal> structure as its argument, it
          returns 1 to tell that the credential is connected
          with a <application>jail</application>. The public ancestor process
          of all the process forked within the <application>jail</application>,
          is the process which runs &man.jail.8;, as it calls the
          &man.jail.2; system call. When a program is executed through
          &man.execve.2;, it inherits the jailed property of its parent's
          <literal>ucred</literal> structure, therefore it has a jailed
          <literal>ucred</literal> structure.</para>

        <programlisting><filename>/usr/src/sys/kern/kern_jail.c</filename>
int
jail(struct thread *td, struct jail_args *uap)
{
...
    struct jail_attach_args jaa;
...
    error = jail_attach(td, &amp;jaa);
    if (error)
        goto e_dropprref;
...
}

int
jail_attach(struct thread *td, struct jail_attach_args *uap)
{
    struct proc *p;
    struct ucred *newcred, *oldcred;
    struct prison *pr;
...
    p = td-&gt;td_proc;
...
    pr = prison_find(uap-&gt;jid);
...
    change_root(pr-&gt;pr_root, td);
...
    newcred-&gt;cr_prison = pr;
    p-&gt;p_ucred = newcred;
...
}</programlisting>
        <para>When a process is forked from its parent process, the
          &man.fork.2; system call uses <literal>crhold()</literal> to
          maintain the credential for the newly forked process. It inherently
          keep the newly forked child's credential consistent with its parent,
          so the child process is also jailed.</para>

        <programlisting><filename>/usr/src/sys/kern/kern_fork.c</filename>:
p2-&gt;p_ucred = crhold(td-&gt;td_ucred);
...
td2-&gt;td_ucred = crhold(p2-&gt;p_ucred);</programlisting>

      </sect3>
    </sect2>
  </sect1>

  <sect1 id="jail-restrictions">
    <title>Restrictions</title>

    <para>Throughout the kernel there are access restrictions relating
      to jailed processes. Usually, these restrictions only check whether
      the process is jailed, and if so, returns an error. For
      example:</para>

    <programlisting>
if (jailed(td-&gt;td_ucred))
    return (EPERM);</programlisting>

    <sect2>
      <title>SysV IPC</title>

      <indexterm><primary>System V IPC</primary></indexterm>

      <para>System V IPC is based on messages. Processes can send each
        other these messages which tell them how to act. The functions
        which deal with messages are:
        &man.msgctl.3;, &man.msgget.3;, &man.msgsnd.3; and &man.msgrcv.3;.
        Earlier, I mentioned that there were certain sysctls you could
        turn on or off in order to affect the behavior of
        <application>jail</application>. One of these sysctls was
        <literal>security.jail.sysvipc_allowed</literal>.  By default,
        this sysctl is set to 0. If it were set to 1, it would defeat the
        whole purpose of having a <application>jail</application>; privileged
        users from the <application>jail</application> would be able to
        affect processes outside the jailed environment. The difference
        between a message and a signal is that the message only consists
        of the signal number.</para>

      <para><filename>/usr/src/sys/kern/sysv_msg.c</filename>:</para>

      <itemizedlist>
        <listitem> <para><literal>msgget(key, msgflg)</literal>:
        <literal>msgget</literal> returns (and possibly creates) a message
        descriptor that designates a message queue for use in other
        functions.</para></listitem>

        <listitem> <para><literal>msgctl(msgid, cmd, buf)</literal>:
        Using this function, a process can query the status of a message
        descriptor.</para></listitem>

        <listitem> <para><literal>msgsnd(msgid, msgp, msgsz, msgflg)</literal>:
        <literal>msgsnd</literal> sends a message to a
        process.</para></listitem>

        <listitem> <para><literal>msgrcv(msgid, msgp, msgsz, msgtyp,
        msgflg)</literal>: a process receives messages using
        this function</para></listitem>

      </itemizedlist>

      <para>In each of the system calls corresponding to these functions,
        there is this conditional:</para>

      <programlisting><filename>/usr/src/sys/kern/sysv_msg.c</filename>:
if (!jail_sysvipc_allowed &amp;&amp; jailed(td-&gt;td_ucred))
    return (ENOSYS);</programlisting>

      <indexterm><primary>semaphores</primary></indexterm>
      <para>Semaphore system calls allow processes to synchronize
        execution by doing a set of operations atomically on a set of
        semaphores. Basically semaphores provide another way for
        processes lock resources. However, process waiting on a
        semaphore, that is being used, will sleep until the resources
        are relinquished. The following semaphore system calls are
        blocked inside a <application>jail</application>: &man.semget.2;,
        &man.semctl.2; and &man.semop.2;.</para>

      <para><filename>/usr/src/sys/kern/sysv_sem.c</filename>:</para>

      <itemizedlist>
        <listitem>
          <para><literal>semctl(semid, semnum, cmd, ...)</literal>:
            <literal>semctl</literal> does the specified <literal>cmd</literal>
            on the semaphore queue indicated by
            <literal>semid</literal>.</para></listitem>

        <listitem>
           <para><literal>semget(key, nsems, flag)</literal>:
            <literal>semget</literal> creates an array of semaphores,
            corresponding to <literal>key</literal>.</para>

          <para><literal>key and flag take on the same meaning as they
          do in msgget.</literal></para></listitem>

        <listitem><para><literal>semop(semid, array, nops)</literal>:
          <literal>semop</literal> performs a group of operations indicated
          by <literal>array</literal>, to the set of semaphores identified by
          <literal>semid</literal>.</para></listitem>
      </itemizedlist>

      <indexterm><primary>shared memory</primary></indexterm>
      <para>System V IPC allows for processes to share
        memory. Processes can communicate directly with each other by
        sharing parts of their virtual address space and then reading
        and writing data stored in the shared memory. These system
        calls are blocked within a jailed environment: &man.shmdt.2;,
        &man.shmat.2;, &man.shmctl.2; and &man.shmget.2;.</para>

      <para><filename>/usr/src/sys/kern/sysv_shm.c</filename>:</para>

      <itemizedlist>
        <listitem><para><literal>shmctl(shmid, cmd, buf)</literal>:
        <literal>shmctl</literal> does various control operations on the
        shared memory region identified by
        <literal>shmid</literal>.</para></listitem>

        <listitem><para><literal>shmget(key, size, flag)</literal>:
        <literal>shmget</literal> accesses or creates a shared memory
        region of <literal>size</literal> bytes.</para></listitem>

        <listitem><para><literal>shmat(shmid, addr, flag)</literal>:
        <literal>shmat</literal> attaches a shared memory region identified
        by <literal>shmid</literal> to the address space of a
        process.</para></listitem>

        <listitem><para><literal>shmdt(addr)</literal>:
        <literal>shmdt</literal> detaches the shared memory region
        previously attached at <literal>addr</literal>.</para></listitem>

      </itemizedlist>
    </sect2>

    <sect2>
      <title>Sockets</title>

      <indexterm><primary>sockets</primary></indexterm>
      <para><application>Jail</application> treats the &man.socket.2; system
        call and related lower-level socket functions in a special manner.
        In order to determine whether a certain socket is allowed to be
        created, it first checks to see if the sysctl
        <literal>security.jail.socket_unixiproute_only</literal> is set. If
        set, sockets are only allowed to be created if the family
        specified is either <literal>PF_LOCAL</literal>,
        <literal>PF_INET</literal> or
        <literal>PF_ROUTE</literal>. Otherwise, it returns an
        error.</para>

      <programlisting><filename>/usr/src/sys/kern/uipc_socket.c</filename>:
int
socreate(int dom, struct socket **aso, int type, int proto,
    struct ucred *cred, struct thread *td)
{
    struct protosw *prp;
...
    if (jailed(cred) &amp;&amp; jail_socket_unixiproute_only &amp;&amp;
        prp-&gt;pr_domain-&gt;dom_family != PF_LOCAL &amp;&amp;
        prp-&gt;pr_domain-&gt;dom_family != PF_INET &amp;&amp;
        prp-&gt;pr_domain-&gt;dom_family != PF_ROUTE) {
        return (EPROTONOSUPPORT);
    }
...
}</programlisting>

    </sect2>

    <sect2>
      <title>Berkeley Packet Filter</title>

      <indexterm><primary>Berkeley Packet Filter</primary></indexterm>
      <indexterm><primary>data link layer</primary></indexterm>

      <para>The <application>Berkeley Packet Filter</application> provides
        a raw interface to data link layers in a protocol independent
        fashion. <application>BPF</application> is now controlled by the
        &man.devfs.8; whether it can be used in a jailed environment.</para>

    </sect2>

    <sect2>
      <title>Protocols</title>

      <indexterm><primary>protocols</primary></indexterm>

      <para>There are certain protocols which are very common, such as
        TCP, UDP, IP and ICMP. IP and ICMP are on the same level: the
        network layer 2. There are certain precautions which are
        taken in order to prevent a jailed process from binding a
        protocol to a certain address only if the <literal>nam</literal>
        parameter is set. <literal>nam</literal> is a pointer to a
        <literal>sockaddr</literal> structure,
        which describes the address on which to bind the service. A
        more exact definition is that <literal>sockaddr</literal> "may be
        used as a template for referring to the identifying tag and length of
        each address". In the function
        <literal>in_pcbbind_setup()</literal>, <literal>sin</literal> is a
        pointer to a <literal>sockaddr_in</literal> structure, which
        contains the port, address, length and domain family of the socket
        which is to be bound. Basically, this disallows any processes from
        <application>jail</application> to be able to specify the address
        that doesn't belong to the <application>jail</application> in which
        the calling process exists.</para>

      <programlisting><filename>/usr/src/sys/netinet/in_pcb.c</filename>:
int
in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
    u_short *lportp, struct ucred *cred)
{
    ...
    struct sockaddr_in *sin;
    ...
    if (nam) {
        sin = (struct sockaddr_in *)nam;
        ...
        if (sin-&gt;sin_addr.s_addr != INADDR_ANY)
            if (prison_ip(cred, 0, &amp;sin-&gt;sin_addr.s_addr))
                return(EINVAL);
        ...
        if (lport) {
            ...
            if (prison &amp;&amp; prison_ip(cred, 0, &amp;sin-&gt;sin_addr.s_addr))
                return (EADDRNOTAVAIL);
            ...
        }
    }
    if (lport == 0) {
        ...
        if (laddr.s_addr != INADDR_ANY)
            if (prison_ip(cred, 0, &amp;laddr.s_addr))
                return (EINVAL);
        ...
    }
...
    if (prison_ip(cred, 0, &amp;laddr.s_addr))
        return (EINVAL);
...
}</programlisting>

      <para>You might be wondering what function
        <literal>prison_ip()</literal> does. <literal>prison_ip()</literal>
        is given three arguments, a pointer to the credential(represented by
        <literal>cred</literal>), any flags, and an IP address. It
        returns 1 if the IP address does NOT belong to the
        <application>jail</application> or 0 otherwise.  As you can see
        from the code, if it is indeed an IP address not belonging to the
        <application>jail</application>, the protocol is not allowed to bind
        to that address.</para>

      <programlisting><filename>/usr/src/sys/kern/kern_jail.c:</filename>
int
prison_ip(struct ucred *cred, int flag, u_int32_t *ip)
{
    u_int32_t tmp;

    if (!jailed(cred))
        return (0);
    if (flag)
        tmp = *ip;
    else
        tmp = ntohl(*ip);
    if (tmp == INADDR_ANY) {
        if (flag)
            *ip = cred-&gt;cr_prison-&gt;pr_ip;
        else
            *ip = htonl(cred-&gt;cr_prison-&gt;pr_ip);
        return (0);
    }
    if (tmp == INADDR_LOOPBACK) {
        if (flag)
            *ip = cred-&gt;cr_prison-&gt;pr_ip;
        else
            *ip = htonl(cred-&gt;cr_prison-&gt;pr_ip);
        return (0);
    }
    if (cred-&gt;cr_prison-&gt;pr_ip != tmp)
        return (1);
    return (0);
}</programlisting>
    </sect2>

    <sect2>
      <title>Filesystem</title>

      <indexterm><primary>filesystem</primary></indexterm>
      <para>Even <literal>root</literal> users within the
        <application>jail</application> are not allowed to unset or modify
        any file flags, such as immutable, append-only, and undeleteable
        flags, if the securelevel is greater than 0.</para>

      <programlisting><filename>/usr/src/sys/ufs/ufs/ufs_vnops.c:</filename>
static int
ufs_setattr(ap)
    ...
{
    ...
        if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) {
            if (ip-&gt;i_flags
                &amp; (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
                    error = securelevel_gt(cred, 0);
                    if (error)
                        return (error);
            }
            ...
        }
}
<filename>/usr/src/sys/kern/kern_priv.c</filename>
int
priv_check_cred(struct ucred *cred, int priv, int flags)
{
    ...
    error = prison_priv_check(cred, priv);
    if (error)
        return (error);
    ...
}
<filename>/usr/src/sys/kern/kern_jail.c</filename>
int
prison_priv_check(struct ucred *cred, int priv)
{
    ...
    switch (priv) {
    ...
    case PRIV_VFS_SYSFLAGS:
        if (jail_chflags_allowed)
            return (0);
        else
            return (EPERM);
    ...
    }
    ...
}</programlisting>
    </sect2>

  </sect1>

</chapter>