aboutsummaryrefslogtreecommitdiff
path: root/en_US.ISO8859-1/books/fdp-primer/sgml-primer/chapter.xml
blob: 7a9929df529a025105a2137e8967d41d96ec162e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
<!-- Copyright (c) 1998, 1999 Nik Clayton, All rights reserved.

     Redistribution and use in source (SGML DocBook) and 'compiled' forms
     (SGML, HTML, PDF, PostScript, RTF and so forth) with or without
     modification, are permitted provided that the following conditions
     are met:

      1. Redistributions of source code (SGML DocBook) must retain the above
         copyright notice, this list of conditions and the following
         disclaimer as the first lines of this file unmodified.

      2. Redistributions in compiled form (transformed to other DTDs,
         converted to PDF, PostScript, RTF and other formats) must reproduce
         the above copyright notice, this list of conditions and the
         following disclaimer in the documentation and/or other materials
         provided with the distribution.

     THIS DOCUMENTATION IS PROVIDED BY NIK CLAYTON "AS IS" AND ANY EXPRESS OR
     IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     DISCLAIMED. IN NO EVENT SHALL NIK CLAYTON BE LIABLE FOR ANY DIRECT,
     INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     ANY WAY OUT OF THE USE OF THIS DOCUMENTATION, EVEN IF ADVISED OF THE
     POSSIBILITY OF SUCH DAMAGE.

     $FreeBSD$
-->

<chapter id="xml-primer">
  <title>XML Primer</title>

  <para>The majority of FDP documentation is written in applications
    of XML.  This chapter explains exactly what that means, how to
    read and understand the source to the documentation, and the sort
    of XML tricks you will see used in the documentation.</para>

  <para>Portions of this section were inspired by Mark Galassi's
    <ulink
      url="http://www.galassi.org/mark/mydocs/docbook-intro/docbook-intro.html">Get
      Going With DocBook</ulink>.</para>

  <sect1 id="xml-primer-overview">
    <title>Overview</title>

    <para>Way back when, electronic text was simple to deal with.
      Admittedly, you had to know which character set your document
      was written in (ASCII, EBCDIC, or one of a number of others) but
      that was about it.  Text was text, and what you saw really was
      what you got.  No frills, no formatting, no intelligence.</para>

    <para>Inevitably, this was not enough.  Once you have text in a
      machine-usable format, you expect machines to be able to use it
      and manipulate it intelligently.  You would like to indicate
      that certain phrases should be emphasized, or added to a
      glossary, or be hyperlinks.  You might want filenames to be
      shown in a <quote>typewriter</quote> style font for viewing on
      screen, but as <quote>italics</quote> when printed, or any of a
      myriad of other options for presentation.</para>

    <para>It was once hoped that Artificial Intelligence (AI) would
      make this easy.  Your computer would read in the document and
      automatically identify key phrases, filenames, text that the
      reader should type in, examples, and more.  Unfortunately, real
      life has not happened quite like that, and our computers require
      some assistance before they can meaningfully process our
      text.</para>

    <para>More precisely, they need help identifying what is what.
      Let's look at this text:</para>

    <blockquote>
      <para>To remove <filename>/tmp/foo</filename> use
	&man.rm.1;.</para>

      <screen>&prompt.user; <userinput>rm /tmp/foo</userinput></screen>
    </blockquote>

    <para>It is easy to see which parts are filenames, which are
      commands to be typed in, which parts are references to manual
      pages, and so on.  But the computer processing the document
      cannot.  For this we need markup.</para>

    <para><quote>Markup</quote> is commonly used to describe
      <quote>adding value</quote> or <quote>increasing cost</quote>.
      The term takes on both these meanings when applied to text.
      Markup is additional text included in the document,
      distinguished from the document's content in some way, so that
      programs that process the document can read the markup and use
      it when making decisions about the document.  Editors can hide
      the markup from the user, so the user is not distracted by
      it.</para>

    <para>The extra information stored in the markup <emphasis>adds
	value</emphasis> to the document.  Adding the markup to the
      document must typically be done by a person&mdash;after all, if
      computers could recognize the text sufficiently well to add the
      markup then there would be no need to add it in the first place.
      This <emphasis>increases the cost</emphasis> (i.e., the effort
      required) to create the document.</para>

    <para>The previous example is actually represented in this
      document like this:</para>

    <programlisting><![CDATA[
<para>To remove <filename>/tmp/foo</filename> use &man.rm.1;.</para>

<screen>&prompt.user; <userinput>rm /tmp/foo</userinput></screen>]]></programlisting>

    <para>As you can see, the markup is clearly separate from the
      content.</para>

    <para>Obviously, if you are going to use markup you need to define
      what your markup means, and how it should be interpreted.  You
      will need a markup language that you can follow when marking up
      your documents.</para>

    <para>Of course, one markup language might not be enough.  A
      markup language for technical documentation has very different
      requirements than a markup language that was to be used for
      cookery recipes.  This, in turn, would be very different from a
      markup language used to describe poetry.  What you really need
      is a first language that you use to write these other markup
      languages.  A <emphasis>meta markup language</emphasis>.</para>

    <para>This is exactly what the eXtensible Markup
      Language (XML) is.  Many markup languages have been written in
      XML, including the two most used by the FDP, XHTML and
      DocBook.</para>

    <para>Each language definition is more properly called a grammar,
      vocabulary, schema or Document Type Definition (DTD).  There
      are various languages to specify an XML grammar, for example,
      DTD (yes, it also means the specification language itself),
      XML Schema (XSD) or RELANG NG.  The schema specifies the name
      of the elements that can be used, what order they appear in (and
      whether some markup can be used inside other markup) and related
      information.</para>

    <para id="xml-primer-validating">A schema is a
      <emphasis>complete</emphasis> specification of all the elements
      that are allowed to appear, the order in which they should
      appear, which elements are mandatory, which are optional, and so
      forth.  This makes it possible to write an XML
      <emphasis>parser</emphasis> which reads in both the schema and a
      document which claims to conform to the schema.  The parser can
      then confirm whether or not all the elements required by the vocabulary
      are in the document in the right order, and whether there are
      any errors in the markup.  This is normally referred to as
      <quote>validating the document</quote>.</para>

    <note>
      <para>This processing simply confirms that the choice of
	elements, their ordering, and so on, conforms to that listed
	in the grammar.  It does <emphasis>not</emphasis> check that you
	have used <emphasis>appropriate</emphasis> markup for the
	content.  If you tried to mark up all the filenames in your
	document as function names, the parser would not flag this as
	an error (assuming, of course, that your schema defines elements
	for filenames and functions, and that they are allowed to
	appear in the same place).</para>
    </note>

    <para>It is likely that most of your contributions to the
      Documentation Project will consist of content marked up in
      either XHTML or DocBook, rather than alterations to the schemas.
      For this reason this book will not touch on how to write a
      vocabulary.</para>
  </sect1>

  <sect1 id="xml-primer-elements">
    <title>Elements, Tags, and Attributes</title>

    <para>All the vocabularies written in XML share certain characteristics.
      This is hardly surprising, as the philosophy behind XML will
      inevitably show through.  One of the most obvious manifestations
      of this philosophy is that of <emphasis>content</emphasis> and
      <emphasis>elements</emphasis>.</para>

    <para>Your documentation (whether it is a single web page, or a
      lengthy book) is considered to consist of content.  This content
      is then divided (and further subdivided) into elements.  The
      purpose of adding markup is to name and identify the boundaries
      of these elements for further processing.</para>

    <para>For example, consider a typical book.  At the very top
      level, the book is itself an element.  This <quote>book</quote>
      element obviously contains chapters, which can be considered to
      be elements in their own right.  Each chapter will contain more
      elements, such as paragraphs, quotations, and footnotes.  Each
      paragraph might contain further elements, identifying content
      that was direct speech, or the name of a character in the
      story.</para>

    <para>You might like to think of this as <quote>chunking</quote>
      content.  At the very top level you have one chunk, the book.
      Look a little deeper, and you have more chunks, the individual
      chapters.  These are chunked further into paragraphs, footnotes,
      character names, and so on.</para>

    <para>Notice how you can make this differentiation between
      different elements of the content without resorting to any XML
      terms.  It really is surprisingly straightforward.  You could do
      this with a highlighter pen and a printout of the book, using
      different colors to indicate different chunks of content.</para>

    <para>Of course, we do not have an electronic highlighter pen, so
      we need some other way of indicating which element each piece of
      content belongs to.  In languages written in XML (XHTML,
      DocBook, et al) this is done by means of
      <emphasis>tags</emphasis>.</para>

    <para>A tag is used to identify where a particular element starts,
      and where the element ends.  <emphasis>The tag is not part of
      the element itself</emphasis>. Because each grammar was normally
      written to mark up specific types of information, each one will
      recognize different elements, and will therefore have different
      names for the tags.</para>

    <para>For an element called
      <replaceable>element-name</replaceable> the start tag will
      normally look like
      <sgmltag><replaceable>element-name</replaceable></sgmltag>.  The
      corresponding closing tag for this element is
      <sgmltag>/<replaceable>element-name</replaceable></sgmltag>.</para>

    <example>
      <title>Using an Element (Start and End Tags)</title>

      <para>XHTML has an element for indicating that the content
	enclosed by the element is a paragraph, called
	<sgmltag>p</sgmltag>.</para>

      <programlisting><![CDATA[<p>This is a paragraph.  It starts with the start tag for
  the 'p' element, and it will end with the end tag for the 'p'
  element.</p>

<p>This is another paragraph.  But this one is much shorter.</p>]]></programlisting>
    </example>

    <para>Some elements have no
      content.  For example, in XHTML you can indicate that you want a
      horizontal line to appear in the document.</para>

    <para>For such elements, that have no content at all, XML introduced
      a shorthand form, which is ccompletely equivalent to the above
      form:</para>

    <programlisting><![CDATA[<hr/>]]></programlisting>

    <example>
      <title>Using an Element (Without Content)</title>

      <para>XHTML has an element for indicating a horizontal rule,
	called <sgmltag>hr</sgmltag>.  This element does not wrap
	content, so it looks like this.</para>

      <programlisting><![CDATA[<p>One paragraph.</p>
<hr></hr>

<p>This is another paragraph.  A horizontal rule separates this
  from the previous paragraph.</p>]]></programlisting>

      <para>For such elements, that have no content at all, XML introduced
	a shorthand form, which is ccompletely equivalent to the above
	form:</para>

      <programlisting><![CDATA[<p>One paragraph.</p>
<hr/>

<p>This is another paragraph.  A horizontal rule separates this
  from the previous paragraph.</p>]]></programlisting>
    </example>

    <para>If it is not obvious by now, elements can contain other
      elements.  In the book example earlier, the book element
      contained all the chapter elements, which in turn contained all
      the paragraph elements, and so on.</para>

    <example>
      <title>Elements within Elements; <sgmltag>em</sgmltag></title>

      <programlisting><![CDATA[<p>This is a simple <em>paragraph</em> where some
  of the <em>words</em> have been <em>emphasized</em>.</p>]]></programlisting>
    </example>

    <para>The grammar will specify the rules detailing which elements can
      contain other elements, and exactly what they can
      contain.</para>

    <important>
      <para>People often confuse the terms tags and elements, and use
	the terms as if they were interchangeable.  They are
	not.</para>

      <para>An element is a conceptual part of your document.  An
	element has a defined start and end.  The tags mark where the
	element starts and end.</para>

      <para>When this document (or anyone else knowledgeable about
	XML) refers to <quote>the <sgmltag>p</sgmltag> tag</quote>
	they mean the literal text consisting of the three characters
	<literal>&lt;</literal>, <literal>p</literal>, and
	<literal>&gt;</literal>.  But the phrase <quote>the
	  <sgmltag>p</sgmltag> element</quote> refers to the whole
	element.</para>

      <para>This distinction <emphasis>is</emphasis> very subtle.  But
	keep it in mind.</para>
    </important>

    <para>Elements can have attributes.  An attribute has a name and a
      value, and is used for adding extra information to the element.
      This might be information that indicates how the content should
      be rendered, or might be something that uniquely identifies that
      occurrence of the element, or it might be something else.</para>

    <para>An element's attributes are written
      <emphasis>inside</emphasis> the start tag for that element, and
      take the form
      <literal><replaceable>attribute-name</replaceable>="<replaceable>attribute-value</replaceable>"</literal>.</para>

    <para>In XHTML, the
      <sgmltag>p</sgmltag> element has an attribute called
      <sgmltag>align</sgmltag>, which suggests an alignment
      (justification) for the paragraph to the program displaying the
      XHTML.</para>

    <para>The <literal>align</literal> attribute can take one of four
      defined values, <literal>left</literal>,
      <literal>center</literal>, <literal>right</literal> and
      <literal>justify</literal>.  If the attribute is not specified
      then the default is <literal>left</literal>.</para>

    <example>
      <title>Using An Element with An Attribute</title>

      <programlisting><![CDATA[<p align="left">The inclusion of the align attribute
  on this paragraph was superfluous, since the default is left.</p>

<p align="center">This may appear in the center.</p>]]></programlisting>
    </example>

    <para>Some attributes will only take specific values, such as
      <literal>left</literal> or <literal>justify</literal>.  Others
      will allow you to enter anything you want.</para>

    <example>
      <title>Single Quotes Around Attributes</title>

      <programlisting><![CDATA[<p align='right'>I am on the right!</p>]]></programlisting>
    </example>

    <para>XML requires you to quote each attribute value with either
      single or double quotes.  It is more habitual to use double quotes
      but you may use single quotes, as well.  Using single quotes is
      practical if you want to include double quotes in the attribute
      value.</para>

    <para>The information on attributes, elements, and tags is stored
      in XML catalogs.  The various Documentation Project tools use
      these catalog files to validate your work.  The tools in
      <filename role="package">textproc/docproj</filename> include a
      variety of XML catalog files.  The FreeBSD Documentation
      Project includes its own set of catalog files.  Your tools need
      to know about both sorts of catalog files.</para>

    <sect2>
      <title>For You to Do&hellip;</title>

      <para>In order to run the examples in this document you will
	need to install some software on your system and ensure that
	an environment variable is set correctly.</para>

      <procedure>
	<step>
	  <para>Download and install
	    <filename role="package">textproc/docproj</filename> from
	    the FreeBSD ports system.  This is a
	    <emphasis>meta-port</emphasis> that should download and
	    install all of the programs and supporting files that are
	    used by the Documentation Project.</para>
	</step>

	<step>
	  <para>Add lines to your shell startup files to set
	    <envar>SGML_CATALOG_FILES</envar>. (If you are not working
	    on the English version of the documentation, you will want
	    to substitute the correct directory for your
	    language.)</para>

	  <example id="xml-primer-envars">
	    <title><filename>.profile</filename>, for &man.sh.1; and
	      &man.bash.1; Users</title>

	    <programlisting>SGML_ROOT=/usr/local/share/xml
SGML_CATALOG_FILES=${SGML_ROOT}/jade/catalog
SGML_CATALOG_FILES=${SGML_ROOT}/docbook/4.1/catalog:$SGML_CATALOG_FILES
SGML_CATALOG_FILES=${SGML_ROOT}/html/catalog:$SGML_CATALOG_FILES
SGML_CATALOG_FILES=${SGML_ROOT}/iso8879/catalog:$SGML_CATALOG_FILES
SGML_CATALOG_FILES=/usr/doc/share/xml/catalog:$SGML_CATALOG_FILES
SGML_CATALOG_FILES=/usr/doc/en_US.ISO8859-1/share/xml/catalog:$SGML_CATALOG_FILES
export SGML_CATALOG_FILES</programlisting>
	  </example>

	  <example>
	    <title><filename>.cshrc</filename>, for &man.csh.1; and
	      &man.tcsh.1; Users</title>

	    <programlisting>setenv SGML_ROOT /usr/local/share/xml
setenv SGML_CATALOG_FILES ${SGML_ROOT}/jade/catalog
setenv SGML_CATALOG_FILES ${SGML_ROOT}/docbook/4.1/catalog:$SGML_CATALOG_FILES
setenv SGML_CATALOG_FILES ${SGML_ROOT}/html/catalog:$SGML_CATALOG_FILES
setenv SGML_CATALOG_FILES ${SGML_ROOT}/iso8879/catalog:$SGML_CATALOG_FILES
setenv SGML_CATALOG_FILES /usr/doc/share/xml/catalog:$SGML_CATALOG_FILES
setenv SGML_CATALOG_FILES /usr/doc/en_US.ISO8859-1/share/xml/catalog:$SGML_CATALOG_FILES</programlisting>
	  </example>

	  <para>Then either log out, and log back in again, or run
	    those commands from the command line to set the variable
	    values.</para>
	</step>
      </procedure>

      <procedure>
	<step>
	  <para>Create <filename>example.xml</filename>, and enter
	    the following text:</para>

	  <programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <title>An Example XHTML File</title>
  </head>

  <body>
    <p>This is a paragraph containing some text.</p>

    <p>This paragraph contains some more text.</p>

    <p align="right">This paragraph might be right-justified.</p>
  </body>
</html>]]></programlisting>
	</step>

	<step>
	  <para>Try to validate this file using an XML parser.</para>

	  <para>Part of
	    <filename role="package">textproc/docproj</filename> is
	    the <command>xmllint</command>
	    <link linkend="xml-primer-validating">validating
	      parser</link>.</para>

	  <para>Use <command>xmllint</command> in the following way to
	    check that your document is valid:</para>

	  <screen>&prompt.user; <userinput>xmllint --valid --noout example.xml</userinput></screen>

	  <para>As you will see, <command>xmllint</command> returns
	    without displaying any output.  This means that your
	    document validated successfully.</para>
	</step>

	<step>
	  <para>See what happens when required elements are omitted.
	    Try removing the <sgmltag>title</sgmltag> and
	    <sgmltag>/title</sgmltag> tags, and re-run the
	    validation.</para>

	  <screen>&prompt.user; <userinput>xmllint --valid --noout example.xml</userinput>
example.xml:5: element head: validity error : Element head content does not follow the DTD, expecting ((script | style | meta | link | object | isindex)* , ((title , (script | style | meta | link | object | isindex)* , (base , (script | style | meta | link | object | isindex)*)?) | (base , (script | style | meta | link | object | isindex)* , title , (script | style | meta | link | object | isindex)*))), got ()</screen>

	  <para>This line tells you that the validation error comes from
	    the <replaceable>fifth</replaceable> line of the
	    <replaceable>example.xml</replaceable> file and that the
	    content of the <sgmltag>head</sgmltag> is the part, which
	    does not follow the rules described by the XHTML grammar.</para>

	  <para>Below this line <command>xmllint</command> will show you
	    the line where the error has been found and will also mark the
	    exact character position with a ^ sign.</para>
	</step>

	<step>
	  <para>Put the <sgmltag>title</sgmltag> element back
	    in.</para>
	</step>
      </procedure>
    </sect2>
  </sect1>

  <sect1 id="xml-primer-doctype-declaration">
    <title>The DOCTYPE Declaration</title>

    <para>The beginning of each document that you write may specify
      the name of the DTD that the document conforms to in case you use
      the DTD specification language.  Other specification languages, like
      XML Schema and RELAX NG are not referred in the source document.
      This DOCTYPE declaration serves the XML parsers so that they can
      determine the DTD and ensure that the document does conform to it.</para>

    <para>A typical declaration for a document written to conform with
      version 1.0 of the XHTML DTD looks like this:</para>

    <programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">]]></programlisting>

    <para>That line contains a number of different components.</para>

    <variablelist>
      <varlistentry>
	<term><literal>&lt;!</literal></term>

	<listitem>
	  <para>Is the <emphasis>indicator</emphasis> that indicates
	    that this is an XML declaration.  This line is declaring
	    the document type.</para>
	</listitem>
      </varlistentry>

      <varlistentry>
	<term><literal>DOCTYPE</literal></term>

	<listitem>
	  <para>Shows that this is an XML declaration for the
	    document type.</para>
	</listitem>
      </varlistentry>

      <varlistentry>
	<term><literal>html</literal></term>

	<listitem>
	  <para>Names the first
	    <link linkend="xml-primer-elements">element</link> that
	    will appear in the document.</para>
	</listitem>
      </varlistentry>

      <varlistentry>
	<term><literal>PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"</literal></term>

	<listitem>
	  <para>Lists the Formal Public Identifier (FPI)
	    <indexterm>
	      <primary>Formal Public Identifier</primary>
	    </indexterm>
	    for the DTD that this document conforms to.  Your XML
	    parser will use this to find the correct DTD when
	    processing this document.</para>

	  <para><literal>PUBLIC</literal> is not a part of the FPI,
	    but indicates to the XML processor how to find the DTD
	    referenced in the FPI.  Other ways of telling the XML
	    parser how to find the DTD are shown <link
	      linkend="xml-primer-fpi-alternatives">later</link>.</para>
	</listitem>
      </varlistentry>

      <varlistentry>
	<term><literal>"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"</literal></term>

	<listitem>
          <para>A local filename or an URL to find the DTD.</para>
	</listitem>
      </varlistentry>

      <varlistentry>
	<term><literal>&gt;</literal></term>

	<listitem>
	  <para>Returns to the document.</para>
	</listitem>
      </varlistentry>
    </variablelist>

    <sect2>
      <title>Formal Public Identifiers (FPIs)
	<indexterm significance="preferred">
	  <primary>Formal Public Identifier</primary>
	</indexterm></title>

      <note>
	<para>You do not need to know this, but it is useful
	  background, and might help you debug problems when your XML
	  processor can not locate the DTD you are using.</para>
      </note>

      <para>FPIs must follow a specific syntax.  This syntax is as
	follows:</para>

      <programlisting>"<replaceable>Owner</replaceable>//<replaceable>Keyword</replaceable> <replaceable>Description</replaceable>//<replaceable>Language</replaceable>"</programlisting>

      <variablelist>
	<varlistentry>
	  <term><replaceable>Owner</replaceable></term>

	  <listitem>
	    <para>This indicates the owner of the FPI.</para>

	    <para>If this string starts with <quote>ISO</quote> then
	      this is an ISO owned FPI.  For example, the FPI
	      <literal>"ISO 8879:1986//ENTITIES Greek
		Symbols//EN"</literal> lists
	      <literal>ISO 8879:1986</literal> as being the owner for
	      the set of entities for Greek symbols.  ISO 8879:1986 is
	      the ISO number for the SGML standard, the predecessor
	      (and a superset) of XML.</para>

	    <para>Otherwise, this string will either look like
	      <literal>-//<replaceable>Owner</replaceable></literal>
	      or
	      <literal>+//<replaceable>Owner</replaceable></literal>
	      (notice the only difference is the leading
	      <literal>+</literal> or <literal>-</literal>).</para>

	    <para>If the string starts with <literal>-</literal> then
	      the owner information is unregistered, with a
	      <literal>+</literal> it identifies it as being
	      registered.</para>

	    <para>ISO 9070:1991 defines how registered names are
	      generated; it might be derived from the number of an ISO
	      publication, an ISBN code, or an organization code
	      assigned according to ISO 6523.  In addition, a
	      registration authority could be created in order to
	      assign registered names.  The ISO council delegated this
	      to the American National Standards Institute
	      (ANSI).</para>

	    <para>Because the FreeBSD Project has not been registered
	      the owner string is <literal>-//FreeBSD</literal>.  And
	      as you can see, the W3C are not a registered owner
	      either.</para>
	  </listitem>
	</varlistentry>

	<varlistentry>
	  <term><replaceable>Keyword</replaceable></term>

	  <listitem>
	    <para>There are several keywords that indicate the type of
	      information in the file.  Some of the most common
	      keywords are <literal>DTD</literal>,
	      <literal>ELEMENT</literal>, <literal>ENTITIES</literal>,
	      and <literal>TEXT</literal>. <literal>DTD</literal> is
	      used only for DTD files, <literal>ELEMENT</literal> is
	      usually used for DTD fragments that contain only entity
	      or element declarations.  <literal>TEXT</literal> is
	      used for XML content (text and tags).</para>
	  </listitem>
	</varlistentry>

	<varlistentry>
	  <term><replaceable>Description</replaceable></term>

	  <listitem>
	    <para>Any description you want to supply for the contents
	      of this file.  This may include version numbers or any
	      short text that is meaningful to you and unique for the
	      XML system.</para>
	  </listitem>
	</varlistentry>

	<varlistentry>
	  <term><replaceable>Language</replaceable></term>

	  <listitem>
	    <para>This is an ISO two-character code that identifies
	      the native language for the file.  <literal>EN</literal>
	      is used for English.</para>
	  </listitem>
	</varlistentry>
      </variablelist>

      <sect3>
	<title><filename>catalog</filename> Files</title>

	<para>If you use the syntax above and process this document
	  using an XML processor, the processor will need to have
	  some way of turning the FPI into the name of the file on
	  your computer that contains the DTD.</para>

	<para>In order to do this it can use a catalog file.  A
	  catalog file (typically called <filename>catalog</filename>)
	  contains lines that map FPIs to filenames.  For example, if
	  the catalog file contained the line:</para>

<!-- XXX: mention XML catalog or maybe replace this totally and only cover XML catalog -->

	<programlisting>PUBLIC  "-//W3C//DTD XHTML 1.0 Transitional//EN"             "1.0/transitional.dtd"</programlisting>

	<para>The XML processor would know to look up the DTD from
	  <filename>transitional.dtd</filename> in the
	  <filename>1.0</filename> subdirectory of whichever directory
	  held the <filename>catalog</filename> file that contained
	  that line.</para>

	<para>Look at the contents of
	  <filename>/usr/local/share/xml/dtd/xhtml/catalog.xml</filename>.
	  This is the catalog file for the XHTML DTDs that will have
	  been installed as part of the <filename
	    role="package">textproc/docproj</filename> port.</para>
      </sect3>

      <sect3>
	<title><envar>SGML_CATALOG_FILES</envar></title>

	<para>In order to locate a <filename>catalog</filename> file,
	  your XML processor will need to know where to look.  Many
	  of them feature command line parameters for specifying the
	  path to one or more catalogs.</para>

	<para>In addition, you can set
	  <envar>SGML_CATALOG_FILES</envar> to point to the files.
	  This environment variable should consist of a
	  colon-separated list of catalog files (including their full
	  path).</para>

	<para>Typically, you will want to include the following
	  files:</para>

	<itemizedlist>
	  <listitem>
	    <para><filename>/usr/local/share/xml/docbook/4.1/catalog</filename></para>
	  </listitem>

	  <listitem>
	    <para><filename>/usr/local/share/xml/html/catalog</filename></para>
	  </listitem>

	  <listitem>
	    <para><filename>/usr/local/share/xml/iso8879/catalog</filename></para>
	  </listitem>

	  <listitem>
	    <para><filename>/usr/local/share/xml/jade/catalog</filename></para>
	  </listitem>
	</itemizedlist>

	<para>You should <link linkend="xml-primer-envars">already
	    have done this</link>.</para>
      </sect3>
    </sect2>

    <sect2 id="xml-primer-fpi-alternatives">
      <title>Alternatives to FPIs</title>

      <para>Instead of using an FPI to indicate the DTD that the
	document conforms to (and therefore, which file on the system
	contains the DTD) you can explicitly specify the name of the
	file.</para>

      <para>The syntax for this is slightly different:</para>

      <programlisting><![CDATA[<!DOCTYPE html SYSTEM "/path/to/file.dtd">]]></programlisting>

      <para>The <literal>SYSTEM</literal> keyword indicates that the
	XML processor should locate the DTD in a system specific
	fashion.  This typically (but not always) means the DTD will
	be provided as a filename.</para>

      <para>Using FPIs is preferred for reasons of portability.  You
	do not want to have to ship a copy of the DTD around with your
	document, and if you used the <literal>SYSTEM</literal>
	identifier then everyone would need to keep their DTDs in the
	same place.</para>
    </sect2>
  </sect1>

  <sect1 id="xml-primer-xml-escape">
    <title>Escaping Back to SGML</title>

    <para>As mentioned earlier, XML is only used when writing a DTD.
      This is not strictly true.  There is certain XML syntax that
      you will want to be able to use within your documents.  For
      example, comments can be included in your document, and will be
      ignored by the parser.  Comments are entered using XML syntax.
      Other uses for XML syntax in your document will be shown later
      too.</para>

    <para>Obviously, you need some way of indicating to the XML
      processor that the following content is not elements within the
      document, but is XML that the parser should act upon.</para>

    <para>These sections are marked by
      <literal>&lt;! ... &gt;</literal> in your document.  Everything
      between these delimiters is XML syntax as you might find within
      a DTD.</para>

    <para>As you may just have realized, the
      <link linkend="xml-primer-doctype-declaration">DOCTYPE
	declaration</link> is an example of XML syntax that you need
      to include in your document&hellip;</para>
  </sect1>

  <sect1 id="xml-primer-comments">
    <title>Comments</title>

    <para>Comments are an XML construction, and are normally only
      valid inside a DTD.  However, as
      <xref linkend="xml-primer-xml-escape"/> shows, it is possible
      to use XML syntax within your document.</para>

    <para>The delimiter for XML comments is the string
      <quote><literal>--</literal></quote>.  The first occurrence of
      this string opens a comment, and the second closes it.</para>

    <example>
      <title>XML Generic Comment</title>

      <programlisting>&lt;!-- test comment --></programlisting>

      <programlisting><![CDATA[
<!-- This is inside the comment -->

<!-- This is another comment    -->

<!-- This is one way
     of doing multiline comments -->

<!-- This is another way of   --
  -- doing multiline comments -->]]></programlisting>
    </example>

    <![%output.print;[
    <important>
      <title>Use 2 Dashes</title>

      <para>There is a problem with producing the Postscript and PDF
	versions of this document.  The above example probably shows
	just one hyphen symbol, <literal>-</literal> after the
	<literal>&lt;!</literal> and before the
	<literal>&gt;</literal>.</para>

      <para>You <emphasis>must</emphasis> use two
	<literal>-</literal>, <emphasis>not</emphasis> one.  The
	Postscript and PDF versions have translated the two
	<literal>-</literal> in the original to a longer, more
	professional <emphasis>em-dash</emphasis>, and broken this
	example in the process.</para>

      <para>The XHTML, plain text, and RTF versions of this document
	are not affected.</para>
    </important>
    ]]>

    <para>If you have used XHTML before you may have been shown
      different rules for comments.  In particular, you may think that
      the string <literal>&lt;!--</literal> opens a comment, and it is
      only closed by <literal>--&gt;</literal>.</para>

    <para>This is <emphasis>not</emphasis> the case.  A lot of web
      browsers have broken XHTML parsers, and will accept that as
      valid.  However, the XML parsers used by the Documentation
      Project are much stricter, and will reject documents that make
      that error.</para>

    <example>
      <title>Erroneous XML Comments</title>

      <programlisting><![CDATA[
<!-- This is in the comment --

     THIS IS OUTSIDE THE COMMENT!

  -- back inside the comment -->]]></programlisting>

      <para>The XML parser will treat this as though it were
	actually:</para>

      <programlisting>&lt;!THIS IS OUTSIDE THE COMMENT&gt;</programlisting>

      <para>This is not valid XML, and may give confusing error
	messages.</para>

      <programlisting><![CDATA[<!--------------- This is a very bad idea --------------->]]></programlisting>

      <para>As the example suggests, <emphasis>do not</emphasis> write
	comments like that.</para>

      <programlisting><![CDATA[<!--===================================================-->]]></programlisting>

      <para>That is a (slightly) better approach, but it still
	potentially confusing to people new to XML.</para>
    </example>

    <sect2>
      <title>For You to Do&hellip;</title>

      <procedure>
	<step>
	  <para>Add some comments to
	    <filename>example.xml</filename>, and check that the file
	    still validates using <command>xmllint</command>.</para>
	</step>

	<step>
	  <para>Add some invalid comments to
	    <filename>example.xml</filename>, and see the error
	    messages that <command>xmllint</command> gives when it
	    encounters an invalid comment.</para>
	</step>
      </procedure>
    </sect2>
  </sect1>

  <sect1 id="xml-primer-entities">
    <title>Entities</title>

    <para>Entities are a mechanism for assigning names to chunks of
      content.  As an XML parser processes your document, any
      entities it finds are replaced by the content of the
      entity.</para>

    <para>This is a good way to have re-usable, easily changeable
      chunks of content in your XML documents.  It is also the only
      way to include one marked up file inside another using
      XML.</para>

    <para>There are two types of entities which can be used in two
      different situations; <emphasis>general entities</emphasis> and
      <emphasis>parameter entities</emphasis>.</para>

    <sect2 id="xml-primer-general-entities">
      <title>General Entities</title>

      <para>You cannot use general entities in an XML context
	(although you define them in one).  They can only be used in
	your document.  Contrast this with <link
	  linkend="xml-primer-parameter-entities">parameter
	  entities</link>.</para>

      <para>Each general entity has a name.  When you want to
	reference a general entity (and therefore include whatever
	text it represents in your document), you write
	<literal>&amp;<replaceable>entity-name</replaceable>;</literal>.
	For example, suppose you had an entity called
	<literal>current.version</literal> which expanded to the
	current version number of your product.  You could
	write:</para>

      <programlisting><![CDATA[<para>The current version of our product is
  &current.version;.</para>]]></programlisting>

      <para>When the version number changes you can simply change the
	definition of the value of the general entity and reprocess
	your document.</para>

      <para>You can also use general entities to enter characters that
	you could not otherwise include in an XML document.  For
	example, <literal>&lt;</literal> and <literal>&amp;</literal>
	cannot normally appear in an XML document.  When the XML
	parser sees the <literal>&lt;</literal> symbol it assumes that
	a tag (either a start tag or an end tag) is about to appear,
	and when it sees the <literal>&amp;</literal> symbol it
	assumes the next text will be the name of an entity.</para>

      <para>Fortunately, you can use the two general entities
	<literal>&amp;lt;</literal> and <literal>&amp;amp;</literal>
	whenever you need to include one or other of these.</para>

      <para>A general entity can only be defined within an XML
	context.  Typically, this is done immediately after the
	DOCTYPE declaration.</para>

      <example>
	<title>Defining General Entities</title>

	<programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [
<!ENTITY current.version    "3.0-RELEASE">
<!ENTITY last.version       "2.2.7-RELEASE">
]>]]></programlisting>

	<para>Notice how the DOCTYPE declaration has been extended by
	  adding a square bracket at the end of the first line.  The
	  two entities are then defined over the next two lines,
	  before the square bracket is closed, and then the DOCTYPE
	  declaration is closed.</para>

	<para>The square brackets are necessary to indicate that we
	  are extending the DTD indicated by the DOCTYPE
	  declaration.</para>
      </example>
    </sect2>

    <sect2 id="xml-primer-parameter-entities">
      <title>Parameter Entities</title>

      <para>Like <link linkend="xml-primer-general-entities">general
	  entities</link>, parameter entities are used to assign names
	to reusable chunks of text.  However, whereas general entities
	can only be used within your document, parameter entities can
	only be used within an <link
	  linkend="xml-primer-xml-escape">XML
	  context</link>.</para>

      <para>Parameter entities are defined in a similar way to general
	entities.  However, instead of using
	<literal>&amp;<replaceable>entity-name</replaceable>;</literal>
	to refer to them, use
	<literal>%<replaceable>entity-name</replaceable>;</literal>
	<footnote><para><emphasis>P</emphasis>arameter entities use
	    the <emphasis>P</emphasis>ercent
	    symbol.</para></footnote>.  The definition also includes
	the <literal>%</literal> between the <literal>ENTITY</literal>
	keyword and the name of the entity.</para>

      <example>
	<title>Defining Parameter Entities</title>

	<programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [
<!ENTITY % param.some "some">
<!ENTITY % param.text "text">
<!ENTITY % param.new  "%param.some more %param.text">

<!-- %param.new now contains "some more text" -->
]>]]></programlisting>
      </example>

      <para>This may not seem particularly useful.  It will be.</para>
    </sect2>

    <sect2>
      <title>For You to Do&hellip;</title>

      <procedure>
	<step>
	  <para>Add a general entity to
	    <filename>example.xml</filename>.</para>

	  <programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [
<!ENTITY version "1.1">
]>

<html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <title>An Example XHTML File</title>
  </head>

  <!-- You might well have some comments in here as well -->

  <body>
    <p>This is a paragraph containing some text.</p>

    <p>This paragraph contains some more text.</p>

    <p align="right">This paragraph might be right-justified.</p>

    <p>The current version of this document is: &version;</p>
  </body>
</html>]]></programlisting>
	</step>

	<step>
	  <para>Validate the document using
	    <command>xmllint</command>.</para>
	</step>

	<step>
	  <para>Load <filename>example.xml</filename> into your web
	    browser (you may need to copy it to
	    <filename>example.html</filename> before your browser
	    recognizes it as an XHTML document).</para>

	  <para>Unless your browser is very advanced, you will not see
	    the entity reference <literal>&amp;version;</literal>
	    replaced with the version number.  Most web browsers have
	    very simplistic parsers which do not handle XML DTD
	    constructs.  Furthermore, the closing <literal>]&lt;</literal>
	    of the XML context are not recognized properly by browser and
	    will probably be rendered.</para>
	</step>

	<step>
	  <para>The solution is to <emphasis>normalize</emphasis> your
	    document using an XML normalizer.  The normalizer reads
	    in valid XML and outputs equally valid XML which has
	    been transformed in some way.  One of the ways in which
	    the normalizer transforms the XML is to expand all the
	    entity references in the document, replacing the entities
	    with the text that they represent.</para>

	  <para>You can use <command>xmllint</command> to do
	    this.  It also has an option to drop the initial
	    DTD section so that the closing <literal>]&lt;</literal>
	    does not confuse browsers:</para>

	  <screen>&prompt.user; <userinput>xmllint --noent --dropdtd example.xml &gt; example.html</userinput></screen>

	  <para>You should find a normalized (i.e., entity references
	    expanded) copy of your document in
	    <filename>example.html</filename>, ready to load into your
	    web browser.</para>
	</step>
      </procedure>
    </sect2>
  </sect1>

  <sect1 id="xml-primer-include">
    <title>Using Entities to Include Files</title>

    <para>Entities (both
      <link linkend="xml-primer-general-entities">general</link> and
      <link linkend="xml-primer-parameter-entities">parameter</link>)
      are particularly useful when used to include one file inside
      another.</para>

    <sect2 id="xml-primer-include-using-gen-entities">
      <title>Using General Entities to Include Files</title>

      <para>Suppose you have some content for an XML book organized
	into files, one file per chapter, called
	<filename>chapter1.xml</filename>,
	<filename>chapter2.xml</filename>, and so forth, with a
	<filename>book.xml</filename> file that will contain these
	chapters.</para>

      <para>In order to use the contents of these files as the values
	for your entities, you declare them with the
	<literal>SYSTEM</literal> keyword.  This directs the XML
	parser to use the contents of the named file as the value of
	the entity.</para>

      <example>
	<title>Using General Entities to Include Files</title>

	<programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [
<!ENTITY chapter.1 SYSTEM "chapter1.xml">
<!ENTITY chapter.2 SYSTEM "chapter2.xml">
<!ENTITY chapter.3 SYSTEM "chapter3.xml">
<!-- And so forth -->
]>

<html xmlns="http://www.w3.org/1999/xhtml">
  <!-- Use the entities to load in the chapters -->

  &chapter.1;
  &chapter.2;
  &chapter.3;
</html>]]></programlisting>
      </example>

      <warning>
	<para>When using general entities to include other files
	  within a document, the files being included
	  (<filename>chapter1.xml</filename>,
	  <filename>chapter2.xml</filename>, and so on)
	  <emphasis>must not</emphasis> start with a DOCTYPE
	  declaration.  This is a syntax error because entities
	  are low-level constructs and they are resolved before
	  any parsing happens.</para>
      </warning>
    </sect2>

    <sect2>
      <title>Using Parameter Entities to Include Files</title>

      <para>Recall that parameter entities can only be used inside an
	XML context.  Why then would you want to include a file
	within an XML context?</para>

      <para>You can use this to ensure that you can reuse your general
	entities.</para>

      <para>Suppose that you had many chapters in your document, and
	you reused these chapters in two different books, each book
	organizing the chapters in a different fashion.</para>

      <para>You could list the entities at the top of each book, but
	this quickly becomes cumbersome to manage.</para>

      <para>Instead, place the general entity definitions inside one
	file, and use a parameter entity to include that file within
	your document.</para>

      <example>
	<title>Using Parameter Entities to Include Files</title>

	<para>First, place your entity definitions in a separate file,
	  called <filename>chapters.ent</filename>.  This file
	  contains the following:</para>

	<programlisting><![CDATA[<!ENTITY chapter.1 SYSTEM "chapter1.xml">
<!ENTITY chapter.2 SYSTEM "chapter2.xml">
<!ENTITY chapter.3 SYSTEM "chapter3.xml">]]></programlisting>

	<para>Now create a parameter entity to refer to the contents
	  of the file.  Then use the parameter entity to load the file
	  into the document, which will then make all the general
	  entities available for use.  Then use the general entities
	  as before:</para>

	<programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [
<!-- Define a parameter entity to load in the chapter general entities -->
<!ENTITY % chapters SYSTEM "chapters.ent">

<!-- Now use the parameter entity to load in this file -->
%chapters;
]>

<html xmlns="http://www.w3.org/1999/xhtml">
  &chapter.1;
  &chapter.2;
  &chapter.3;
</html>]]></programlisting>
      </example>
    </sect2>

    <sect2>
      <title>For You to Do&hellip;</title>

      <sect3>
	<title>Use General Entities to Include Files</title>

	<procedure>
	  <step>
	    <para>Create three files, <filename>para1.xml</filename>,
	      <filename>para2.xml</filename>, and
	      <filename>para3.xml</filename>.</para>

	    <para>Put content similar to the following in each
	      file:</para>

	    <programlisting><![CDATA[<p>This is the first paragraph.</p>]]></programlisting>
	  </step>

	  <step>
	    <para>Edit <filename>example.xml</filename> so that it
	      looks like this:</para>

	    <programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [
<!ENTITY version "1.1">
<!ENTITY para1 SYSTEM "para1.xml">
<!ENTITY para2 SYSTEM "para2.xml">
<!ENTITY para3 SYSTEM "para3.xml">
]>

<html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <title>An Example XHTML File</title>
  </head>

  <body>
    <p>The current version of this document is: &version;</p>

    &para1;
    &para2;
    &para3;
  </body>
</html>]]></programlisting>
	  </step>

	  <step>
	    <para>Produce <filename>example.html</filename> by
	      normalizing <filename>example.xml</filename>.</para>

	    <screen>&prompt.user; <userinput>xmllint --dropdtd --noent example.xml &gt; example.html</userinput></screen>
	  </step>

	  <step>
	    <para>Load <filename>example.html</filename> into your web
	      browser, and confirm that the
	      <filename>para<replaceable>n</replaceable>.xml</filename>
	      files have been included in
	      <filename>example.html</filename>.</para>
	  </step>
	</procedure>
      </sect3>

      <sect3>
	<title>Use Parameter Entities to Include Files</title>

	<note>
	  <para>You must have taken the previous steps first.</para>
	</note>

	<procedure>
	  <step>
	    <para>Edit <filename>example.xml</filename> so that it
	      looks like this:</para>

	    <programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [
<!ENTITY % entities SYSTEM "entities.ent"> %entities;
]>

<html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <title>An Example XHTML File</title>
  </head>

  <body>
    <p>The current version of this document is: &version;</p>

    &para1;
    &para2;
    &para3;
  </body>
</html>]]></programlisting>
	  </step>

	  <step>
	    <para>Create a new file,
	      <filename>entities.ent</filename>, with this
	      content:</para>

	    <programlisting><![CDATA[<!ENTITY version "1.1">
<!ENTITY para1 SYSTEM "para1.xml">
<!ENTITY para2 SYSTEM "para2.xml">
<!ENTITY para3 SYSTEM "para3.xml">]]></programlisting>
	  </step>

	  <step>
	    <para>Produce <filename>example.html</filename> by
	      normalizing <filename>example.xml</filename>.</para>

	    <screen>&prompt.user; <userinput>xmllint --dropdtd --noent example.xml &gt; example.html</userinput></screen>
	  </step>

	  <step>
	    <para>Load <filename>example.html</filename> into your web
	      browser, and confirm that the
	      <filename>para<replaceable>n</replaceable>.xml</filename>
	      files have been included in
	      <filename>example.html</filename>.</para>
	  </step>
	</procedure>
      </sect3>
    </sect2>
  </sect1>

  <sect1 id="xml-primer-marked-sections">
    <title>Marked Sections</title>

    <para>XML provides a mechanism to indicate that particular pieces
      of the document should be processed in a special way.  These are
      termed <quote>marked sections</quote>.</para>

    <example>
      <title>Structure of A Marked Section</title>

      <programlisting>&lt;![<replaceable>KEYWORD</replaceable>[
  Contents of marked section
]]&gt;</programlisting>
    </example>

    <para>As you would expect, being an XML construct, a marked
      section starts with <literal>&lt!</literal>.</para>

    <para>The first square bracket begins to delimit the marked
      section.</para>

    <para><replaceable>KEYWORD</replaceable> describes how this marked
      section should be processed by the parser.</para>

    <para>The second square bracket indicates that the content of the
      marked section starts here.</para>

    <para>The marked section is finished by closing the two square
      brackets, and then returning to the document context from the
      XGML context with <literal>&gt;</literal>.</para>

    <sect2>
      <title>Marked Section Keywords</title>

      <sect3>
	<title><literal>CDATA</literal></title>

	<para>These keywords denote the marked sections
	  <emphasis>content model</emphasis>, and allow you to change
	  it from the default.</para>

	<para>When an XML parser is processing a document it keeps
	  track of what is called the <quote>content
	    model</quote>.</para>

	<para>Briefly, the content model describes what sort of
	  content the parser is expecting to see, and what it will do
	  with it when it finds it.</para>

	<para>The content model you will probably find most
	  useful is <literal>CDATA</literal>.</para>

	<para><literal>CDATA</literal> is for <quote>Character
	    Data</quote>. If the parser is in this content model then
	  it is expecting to see characters, and characters only.  In
	  this model the <literal>&lt;</literal> and
	  <literal>&amp;</literal> symbols lose their special status,
	  and will be treated as ordinary characters.</para>

	<note>
	  <para>When you use <literal>CDATA</literal>
	    in examples of text marked up in
	    XML, keep in mind that the content of
	    <literal>CDATA</literal> is not validated.  You have to
	    check the included XML text using other means.  You
	    could, for example, write the example in another document,
	    validate the example code, and then paste it to your
	    <literal>CDATA</literal> content.</para>
	</note>

	<!-- The nesting of CDATA within the next example is disgusting -->
	<example>
	  <title>Using a <literal>CDATA</literal> Marked
	    Section</title>

	  <programlisting>&lt;para>Here is an example of how you would include some text
  that contained many &lt;literal>&amp;lt;&lt;/literal>
  and &lt;literal>&amp;amp;&lt;/literal> symbols.  The sample
  text is a fragment of XHTML.  The surrounding text (&lt;para> and
  &lt;programlisting>) are from DocBook.&lt;/para>

&lt;programlisting>
  &lt;![CDATA[<![CDATA[
    <p>This is a sample that shows you some of the elements within
      XHTML.  Since the angle brackets are used so many times, it is
      simpler to say the whole example is a CDATA marked section
      than to use the entity names for the left and right angle
      brackets throughout.</p>

    <ul>
      <li>This is a listitem</li>
      <li>This is a second listitem</li>
      <li>This is a third listitem</li>
    </ul>

    <p>This is the end of the example.</p>]]>
  ]]&gt;
&lt;/programlisting></programlisting>

	  <para>If you look at the source for this document you will
	    see this technique used throughout.</para>
	</example>
      </sect3>

      <sect3>
	<title><literal>INCLUDE</literal> and
	  <literal>IGNORE</literal></title>

	<para>If the keyword is <literal>INCLUDE</literal> then the
	  contents of the marked section will be processed.  If the
	  keyword is <literal>IGNORE</literal> then the marked section
	  is ignored and will not be processed.  It will not appear in
	  the output.</para>

	<example>
	  <title>Using <literal>INCLUDE</literal> and
	    <literal>IGNORE</literal> in Marked Sections</title>

	  <programlisting>&lt;![INCLUDE[
  This text will be processed and included.
]]&gt;

&lt;![IGNORE[
  This text will not be processed or included.
]]&gt;</programlisting>
	</example>

	<para>By itself, this is not too useful.  If you wanted to
	  remove text from your document you could cut it out, or wrap
	  it in comments.</para>

	<para>It becomes more useful when you realize you can use
	  <link linkend="xml-primer-parameter-entities">parameter
	    entities</link> to control this, yet this usage is limited
	  to entity files.</para>

	<para>For example, suppose that you produced a hard-copy
	  version of some documentation and an electronic version.  In
	  the electronic version you wanted to include some extra
	  content that was not to appear in the hard-copy.</para>

	<para>Create an entity file that defines general entities
	  to include each chapter and guard these definitions with
	  a parameter entity that can be set to either
	  <literal>INCLUDE</literal> or <literal>IGNORE</literal>
	  to control whether the entity is defined.  After these
	  conditional general entity definitions, place one more
	  definition for each general entity to set them to an
	  empty value.  This technique makes use of the fact that
	  entity definitions cannot be overridden but always the
	  first definition takes effect.  So you can control the
	  inclusion of your chapter with the corrsponding parameter
	  entity; if you set it to <literal>INCLUDE</literal>, the
	  first general entity definition will be read and the
	  second one will be ignored but if you set it to
	  <literal>IGNORE</literal>, the first definition will be
	  ignored and the second one will take effect.</para>

	<example>
	  <title>Using A Parameter Entity to Control a Marked
	    Section</title>

	  <programlisting>
&lt;!ENTITY % electronic.copy "INCLUDE">

&lt;![%electronic.copy;[
&lt;!ENTITY chap.preface	SYSTEM "preface.xml"&gt;
]]&gt;

&lt;!ENTITY chap.preface ""&gt;
</programlisting>

	  <para>When producing the hard-copy version, change the
	    parameter entity's definition to:</para>

	  <programlisting>&lt;!ENTITY % electronic.copy "IGNORE"&gt;</programlisting>
	</example>
      </sect3>
    </sect2>

    <sect2>
      <title>For You to Do&hellip;</title>

      <procedure>
	<step>
	  <para>Modify the <filename>entities.ent</filename> file to contain
	    the following:</para>

	  <programlisting>&lt;!ENTITY version "1.1"&gt;
&lt;!ENTITY % conditional.text "IGNORE"&gt;

&lt;![%conditional.text;[
&lt;!ENTITY para1 SYSTEM "para1.xml"&gt;
]]&gt;

&lt;!ENTITY para1 ""&gt;

&lt;!ENTITY para2 SYSTEM "para2.xml"&gt;
&lt;!ENTITY para3 SYSTEM "para3.xml"&gt;</programlisting>
	</step>

	<step>
	  <para>Normalize the <filename>example.xml</filename> file and notice
	    that the conditional text is not present on the output document.
	    Now if you set the parameter entity guard to <literal>INCLUDE</literal>
	    and regenerate the normalized document, it will appear there again.
	    Of course, this method makes more sense if you have more conditional
	    chunks that depend on the same condition, for example, whether you are
	    generating printed or online text.</para>
	</step>
      </procedure>
    </sect2>
  </sect1>

  <sect1 id="xml-primer-conclusion">
    <title>Conclusion</title>

    <para>That is the conclusion of this XML primer.  For reasons of
      space and complexity several things have not been covered in
      depth (or at all).  However, the previous sections cover enough
      XML for you to be able to follow the organization of the FDP
      documentation.</para>
  </sect1>
</chapter>