aboutsummaryrefslogtreecommitdiff
path: root/en_US.ISO8859-1/books/fdp-primer/sgml-primer/chapter.sgml
blob: b126788a0b900157dd23cafddb6c71a494f29f81 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
<!-- Copyright (c) 1998, 1999 Nik Clayton, All rights reserved.

     Redistribution and use in source (SGML DocBook) and 'compiled' forms
     (SGML, HTML, PDF, PostScript, RTF and so forth) with or without
     modification, are permitted provided that the following conditions
     are met:

      1. Redistributions of source code (SGML DocBook) must retain the above
         copyright notice, this list of conditions and the following
         disclaimer as the first lines of this file unmodified.

      2. Redistributions in compiled form (transformed to other DTDs,
         converted to PDF, PostScript, RTF and other formats) must reproduce
         the above copyright notice, this list of conditions and the
         following disclaimer in the documentation and/or other materials
         provided with the distribution.

     THIS DOCUMENTATION IS PROVIDED BY NIK CLAYTON "AS IS" AND ANY EXPRESS OR
     IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     DISCLAIMED. IN NO EVENT SHALL NIK CLAYTON BE LIABLE FOR ANY DIRECT,
     INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     ANY WAY OUT OF THE USE OF THIS DOCUMENTATION, EVEN IF ADVISED OF THE
     POSSIBILITY OF SUCH DAMAGE.

     $FreeBSD$
-->

<chapter id="sgml-primer">
  <title>SGML Primer</title>

  <para>The majority of FDP documentation is written in applications of
    SGML.  This chapter explains exactly what that means, how to read
    and understand the source to the documentation, and the sort of SGML
    tricks you will see used in the documentation.</para>

  <para>Portions of this section were inspired by Mark Galassi's <ulink
      url="http://nis-www.lanl.gov/~rosalia/mydocs/docbook-intro/docbook-intro.html">Get Going With DocBook</ulink>.</para>
  
  <sect1 id="sgml-primer-overview">
    <title>Overview</title>

    <para>Way back when, electronic text was simple to deal with.  Admittedly, 
      you had to know which character set your document was written in (ASCII, 
      EBCDIC, or one of a number of others) but that was about it.  Text was
      text, and what you saw really was what you got.  No frills, no
      formatting, no intelligence.</para>

    <para>Inevitably, this was not enough.  Once you have text in a
      machine-usable format, you expect machines to be able to use it and
      manipulate it intelligently.  You would like to indicate that certain
      phrases should be emphasized, or added to a glossary, or be hyperlinks.
      You might want filenames to be shown in a <quote>typewriter</quote> style 
      font for viewing on screen, but as <quote>italics</quote> when printed,
      or any of a myriad of other options for presentation.</para>

    <para>It was once hoped that Artificial Intelligence (AI) would make this
      easy.  Your computer would read in the document and automatically
      identify key phrases, filenames, text that the reader should type in,
      examples, and more.  Unfortunately, real life has not happened quite
      like that, and our computers require some assistance before they can
      meaningfully process our text.</para>

    <para>More precisely, they need help identifying what is what.  You or I
      can look at

      <blockquote>
	<para>To remove <filename>/tmp/foo</filename> use &man.rm.1;.</para>

	<screen>&prompt.user; <userinput>rm /tmp/foo</userinput></screen>
      </blockquote>

      and easily see which parts are filenames, which are commands to be typed
      in, which parts are references to manual pages, and so on.  But the
      computer processing the document cannot.  For this we need
      markup.</para>
    
    <para><quote>Markup</quote> is commonly used to describe <quote>adding
      value</quote> or <quote>increasing cost</quote>.  The term takes on both
      these meanings when applied to text.  Markup is additional text included 
      in the document, distinguished from the document's content in some way,
      so that programs that process the document can read the markup and use
      it when making decisions about the document.  Editors can hide the
      markup from the user, so the user is not distracted by it.</para>

    <para>The extra information stored in the markup <emphasis>adds
      value</emphasis> to the document.  Adding the markup to the document
      must typically be done by a person&mdash;after all, if computers could
      recognize the text sufficiently well to add the markup then there would
      be no need to add it in the first place.  This <emphasis>increases the
      cost</emphasis> (i.e., the effort required) to create the
      document.</para>

    <para>The previous example is actually represented in this document like
      this:</para>

    <programlisting><![ CDATA [
<para>To remove <filename>/tmp/foo</filename> use &man.rm.1;.</para>

<screen>&prompt.user; <userinput>rm /tmp/foo</userinput></screen>]]></programlisting>

    <para>As you can see, the markup is clearly separate from the
      content.</para>

    <para>Obviously, if you are going to use markup you need to define what
      your markup means, and how it should be interpreted.  You will need a
      markup language that you can follow when marking up your
      documents.</para>
    
   <para>Of course, one markup language might not be enough.  A markup
      language for technical documentation has very different requirements
      than a markup language that was to be used for cookery recipes.  This,
      in turn, would be very different from a markup language used to describe
      poetry.  What you really need is a first language that you use to write
      these other markup languages.  A <emphasis>meta markup
      language</emphasis>.</para>
    
    <para>This is exactly what the Standard Generalized Markup Language (SGML)
      is.  Many markup languages have been written in SGML, including the two
      most used by the FDP, HTML and DocBook.</para>
    
    <para>Each language definition is more properly called a Document Type
      Definition (DTD).  The DTD specifies the name of the elements that can
      be used, what order they appear in (and whether some markup can be used
      inside other markup) and related information.  A DTD is sometimes
      referred to as an <emphasis>application</emphasis> of SGML.</para>

    <para id="sgml-primer-validating">A DTD is a <emphasis>complete</emphasis>
      specification of all the elements that are allowed to appear, the order
      in which they should appear, which elements are mandatory, which are
      optional, and so forth. This makes it possible to write an SGML
      <emphasis>parser</emphasis> which reads in both the DTD and a document
      which claims to conform to the DTD.  The parser can then confirm whether
      or not all the elements required by the DTD are in the document in the
      right order, and whether there are any errors in the markup.  This is
      normally referred to as <quote>validating  the document</quote>.</para>

    <note>
      <para>This processing simply confirms that the choice of elements, their 
	ordering, and so on, conforms to that listed in the DTD.  It does
	<emphasis>not</emphasis> check that you have used
	<emphasis>appropriate</emphasis> markup for the content.  If you
	tried to mark up all the filenames in your document as function
	names, the parser would not flag this as an error (assuming, of
	course, that your DTD defines elements for filenames and functions,
	and that they are allowed to appear in the same place).</para>
    </note>
    
    <para>It is likely that most of your contributions to the Documentation
      Project will consist of content marked up in either HTML or DocBook,
      rather than alterations to the DTDs.  For this reason this book will
      not touch on how to write a DTD.</para>
  </sect1>
  
  <sect1 id="sgml-primer-elements">
    <title>Elements, tags, and attributes</title>

    <para>All the DTDs written in SGML share certain characteristics.  This is
      hardly surprising, as the philosophy behind SGML will inevitably show
      through. One of the most obvious manifestations of this philosophy is
      that of <emphasis>content</emphasis> and
      <emphasis>elements</emphasis>.</para>

    <para>Your documentation (whether it is a single web page, or a lengthy
      book) is considered to consist of content.  This content is then divided
      (and further subdivided) into elements.  The purpose of adding markup is
      to name and identify the boundaries of these elements for further
      processing.</para>

    <para>For example, consider a typical book.  At the very top level, the
      book is itself an element.  This <quote>book</quote> element obviously
      contains chapters, which can be considered to be elements in their own
      right.  Each chapter will contain more elements, such as paragraphs,
      quotations, and footnotes.  Each paragraph might contain further
      elements, identifying content that was direct speech, or the name of a
      character in the story.</para>

    <para>You might like to think of this as <quote>chunking</quote> content.
      At the very top level you have one chunk, the book.  Look a little
      deeper, and you have more chunks, the individual chapters.  These are
      chunked further into paragraphs, footnotes, character names, and so
      on.</para>

    <para>Notice how you can make this differentiation between different
      elements of the content without resorting to any SGML terms.  It really
      is surprisingly straightforward.  You could do this with a highlighter
      pen and a printout of the book, using different colors to indicate
      different chunks of content.</para>

    <para>Of course, we do not have an electronic highlighter pen, so we need
      some other way of indicating which element each piece of content belongs
      to.  In languages written in SGML (HTML, DocBook, et al) this is done by
      means of <emphasis>tags</emphasis>.</para>

    <para>A tag is used to identify where a particular element starts, and
      where the element ends.  <emphasis>The tag is not part of the element
      itself</emphasis>. Because each DTD was normally written to mark up
      specific types of information, each one will recognize different
      elements, and will therefore have different names for the tags.</para>

    <para>For an element called <replaceable>element-name</replaceable> the
      start tag will normally look like
      <literal>&lt;<replaceable>element-name</replaceable>&gt;</literal>.  The
      corresponding closing tag for this element is
      <literal>&lt;/<replaceable>element-name</replaceable>&gt;</literal>.</para>

    <example>
      <title>Using an element (start and end tags)</title>

      <para>HTML has an element for indicating that the content enclosed by
	the element is a paragraph, called <literal>p</literal>.  This
	element has both start and end tags.</para>
      
      <programlisting><![ CDATA [<p>This is a paragraph.  It starts with the start tag for
  the 'p' element, and it will end with the end tag for the 'p'
  element.</p>

<p>This is another paragraph.  But this one is much shorter.</p>]]></programlisting>	  
    </example>

    <para>Not all elements require an end tag.  Some elements have no content.
      For example, in HTML you can indicate that you want a horizontal line to
      appear in the document.  Obviously, this line has no content, so just
      the start tag is required for this element.</para>

    <example>
      <title>Using an element (start tag only)</title>

      <para>HTML has an element for indicating a horizontal rule, called
	<literal>hr</literal>.  This element does not wrap content, so only
	has a start tag.</para>

      <programlisting><![ CDATA [<p>This is a paragraph.</p>

<hr>

<p>This is another paragraph.  A horizontal rule separates this
  from the previous paragraph.</p>]]></programlisting>
    </example>
    
    <para>If it is not obvious by now, elements can contain other elements.
      In the book example earlier, the book element contained all the chapter
      elements, which in turn contained all the paragraph elements, and so
      on.</para>

    <example>
      <title>Elements within elements; <sgmltag>em</sgmltag></title>
      
      <programlisting><![ CDATA [<p>This is a simple <em>paragraph</em> where some
  of the <em>words</em> have been <em>emphasized</em>.</p>]]></programlisting>
    </example>
    
    <para>The DTD will specify the rules detailing which elements can contain
      other elements, and exactly what they can contain.</para>

    <important>
      <para>People often confuse the terms tags and elements, and use the
	terms as if they were interchangeable.  They are not.</para>

      <para>An element is a conceptual part of your document.  An element has
	a defined start and end.  The tags mark where the element starts and
	end.</para>

      <para>When this document (or anyone else knowledgeable about SGML) refers
	to <quote>the &lt;p&gt; tag</quote> they mean the literal text
	consisting of the three characters <literal>&lt;</literal>,
	<literal>p</literal>, and <literal>&gt;</literal>.  But the phrase
	<quote>the &lt;p&gt; element</quote> refers to the whole
	element.</para>

      <para>This distinction <emphasis>is</emphasis> very subtle.  But keep it 
	in mind.</para>
    </important>
      
    <para>Elements can have attributes.  An attribute has a name and a value,
      and is used for adding extra information to the element.  This might be
      information that indicates how the content should be rendered, or might
      be something that uniquely identifies that occurrence of the element, or
      it might be something else.</para>

    <para>An element's attributes are written <emphasis>inside</emphasis> the
      start tag for that element, and take the form
      <literal><replaceable>attribute-name</replaceable>="<replaceable>attribute-value</replaceable>"</literal>.</para>

    <para>In sufficiently recent versions of HTML, the <sgmltag>p</sgmltag>
      element has an attribute called <literal>align</literal>, which suggests
      an alignment (justification) for the paragraph to the program displaying
      the HTML.</para>

    <para>The <literal>align</literal> attribute can take one of four defined
      values, <literal>left</literal>, <literal>center</literal>,
      <literal>right</literal> and <literal>justify</literal>.  If the
      attribute is not specified then the default is
      <literal>left</literal>.</para>

    <example>
      <title>Using an element with an attribute</title>
      
      <programlisting><![ CDATA [<p align="left">The inclusion of the align attribute
  on this paragraph was superfluous, since the default is left.</p>

<p align="center">This may appear in the center.</p>]]></programlisting>
    </example>
    
    <para>Some attributes will only take specific values, such as
      <literal>left</literal> or <literal>justify</literal>.  Others will
      allow you to enter anything you want.  If you need to include quotes
      (<literal>"</literal>) within an attribute then use single quotes around
      the attribute value.</para>

    <example>
      <title>Single quotes around attributes</title>
      
      <programlisting><![ CDATA [<p align='right'>I am on the right!</p>]]></programlisting>
    </example>

    <para>Sometimes you do not need to use quotes around attribute values at
      all.  However, the rules for doing this are subtle, and it is far
      simpler just to <emphasis>always</emphasis> quote your attribute
      values.</para>

    <para>The information on attributes, elements, and tags is stored
      in SGML catalogs.  The various Documentation Project tools use
      these catalog files to validate your work.  The tools in
      <filename role="package">textproc/docproj</filename> include a variety of SGML catalog
      files.  The FreeBSD Documentation Project includes its own set
      of catalog files.  Your tools need to know about both sorts of
      catalog files.</para>

    <sect2>
      <title>For you to do&hellip;</title>

      <para>In order to run the examples in this document you will need to
        install some software on your system and ensure that an environment
        variable is set correctly.</para>
    
      <procedure>
	<step>
	  <para>Download and install <filename role="package">textproc/docproj</filename>
	    from the FreeBSD ports system.  This is a
            <emphasis>meta-port</emphasis> that should download and install
            all of the programs and supporting files that are used by the
            Documentation Project.</para>
	</step>
	
        <step>
          <para>Add lines to your shell startup files to set
            <envar>SGML_CATALOG_FILES</envar>. (If you are not working
            on the English version of the documentation, you will want
            to substitute the correct directory for your
            language.)</para>
    
	  <example id="sgml-primer-envars">
	    <title><filename>.profile</filename>, for &man.sh.1; and
	      &man.bash.1; users</title>
	    
	    <programlisting>SGML_ROOT=/usr/local/share/sgml	    
SGML_CATALOG_FILES=${SGML_ROOT}/jade/catalog
SGML_CATALOG_FILES=${SGML_ROOT}/iso8879/catalog:$SGML_CATALOG_FILES
SGML_CATALOG_FILES=${SGML_ROOT}/html/catalog:$SGML_CATALOG_FILES
SGML_CATALOG_FILES=${SGML_ROOT}/docbook/4.1/catalog:$SGML_CATALOG_FILES
SGML_CATALOG_FILES=/usr/doc/share/sgml/catalog:$SGML_CATALOG_FILES
SGML_CATALOG_FILES=/usr/doc/en_US.ISO8859-1/share/sgml/catalog:$SGML_CATALOG_FILES
export SGML_CATALOG_FILES</programlisting>
	  </example>

	  <example>
	    <title><filename>.cshrc</filename>, for &man.csh.1; and
	      &man.tcsh.1; users</title>
	    
	    <programlisting>setenv SGML_ROOT /usr/local/share/sgml
setenv SGML_CATALOG_FILES ${SGML_ROOT}/jade/catalog
setenv SGML_CATALOG_FILES ${SGML_ROOT}/iso8879/catalog:$SGML_CATALOG_FILES
setenv SGML_CATALOG_FILES ${SGML_ROOT}/html/catalog:$SGML_CATALOG_FILES
setenv SGML_CATALOG_FILES ${SGML_ROOT}/docbook/4.1/catalog:$SGML_CATALOG_FILES
setenv SGML_CATALOG_FILES /usr/doc/share/sgml/catalog:$SGML_CATALOG_FILES
setenv SGML_CATALOG_FILES /usr/doc/en_US.ISO8859-1/share/sgml/catalog:$SGML_CATALOG_FILES</programlisting>
          </example>
    
          <para>Then either log out, and log back in again, or run those
            commands from the command line to set the variable values.</para>
	</step>
      </procedure>
    
      <procedure>
	<step>
	  <para>Create <filename>example.sgml</filename>, and enter the
            following text:</para>

	  <programlisting><![ CDATA [<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">

<html>
  <head>	     
    <title>An example HTML file</title>
  </head>

  <body>	    
    <p>This is a paragraph containing some text.</p>

    <p>This paragraph contains some more text.</p>

    <p align="right">This paragraph might be right-justified.</p>
  </body>	    
</html>]]></programlisting>
	</step>

	<step>
	  <para>Try to validate this file using an SGML parser.</para>

	  <para>Part of <filename role="package">textproc/docproj</filename> is the
	    <command>nsgmls</command> <link linkend="sgml-primer-validating">validating
	    parser</link>.  Normally, <command>nsgmls</command> reads in a document
	    marked up according to an SGML DTD and returns a copy of the
	    document's Element Structure Information Set (ESIS, but that is
	    not important right now).</para>

	  <para>However, when <command>nsgmls</command> is given the <option>-s</option>
	    parameter, <command>nsgmls</command> will suppress its normal output, and
	    just print error messages.  This makes it a useful way to check to
	    see if your document is valid or not.</para>

	  <para>Use <command>nsgmls</command> to check that your document is
	    valid:</para>

          <screen>&prompt.user; <userinput>nsgmls -s example.sgml</userinput></screen>

	  <para>As you will see, <command>nsgmls</command> returns without displaying any
	    output.  This means that your document validated
	    successfully.</para>
	</step>

	<step>
	  <para>See what happens when required elements are omitted.  Try
	    removing the <sgmltag>title</sgmltag> and
	    <sgmltag>/title</sgmltag> tags, and re-run the validation.</para>

          <screen>&prompt.user; <userinput>nsgmls -s example.sgml</userinput>
nsgmls:example.sgml:5:4:E: character data is not allowed here
nsgmls:example.sgml:6:8:E: end tag for "HEAD" which is not finished</screen>

	  <para>The error output from <command>nsgmls</command> is organized into
	    colon-separated groups, or columns.</para>

	  <informaltable frame="none" pgwide="1">
	    <tgroup cols="2">
	      <thead>
		<row>
		  <entry>Column</entry>
		  <entry>Meaning</entry>
		</row>
	      </thead>
	      
	      <tbody>
		<row>
		  <entry>1</entry>
		  <entry>The name of the program generating the error.  This
		    will always be <literal>nsgmls</literal>.</entry>
		</row>

		<row>
		  <entry>2</entry>
		  <entry>The name of the file that contains the error.</entry>
		</row>

		<row>
		  <entry>3</entry>
		  <entry>Line number where the error appears.</entry>
		</row>

		<row>
		  <entry>4</entry>
		  <entry>Column number where the error appears.</entry>
		</row>

		<row>
		  <entry>5</entry>
		  <entry>A one letter code indicating the nature of the
		    message.  <literal>I</literal> indicates an informational
		    message, <literal>W</literal> is for warnings, and
		    <literal>E</literal> is for errors<footnote>
		      <para>It is not always the fifth column either.
			<command>nsgmls -sv</command> displays
			<literal>nsgmls:I: SP version "1.3"</literal>
			(depending on the installed version).  As you can see,
			this is an informational message.</para>
		    </footnote>, and <literal>X</literal> is for
		    cross-references.  As you can see, these messages are
		    errors.</entry>
		</row>

		<row>
		  <entry>6</entry>
		  <entry>The text of the error message.</entry>
		</row>
	      </tbody>
	    </tgroup>
	  </informaltable>

	  <para>Simply omitting the <sgmltag>title</sgmltag> tags has
	    generated 2 different errors.</para>

	  <para>The first error indicates that content (in this case,
	    characters, rather than the start tag for an element) has occurred
	    where the SGML parser was expecting something else.  In this case,
	    the parser was expecting to see one of the start tags for elements
	    that are valid inside <sgmltag>head</sgmltag> (such as
	    <sgmltag>title</sgmltag>).</para>

	  <para>The second error is because <sgmltag>head</sgmltag> elements
	    <emphasis>must</emphasis> contain a <sgmltag>title</sgmltag>
	    element.  Because it does not <command>nsgmls</command> considers that the
	    element has not been properly finished.  However, the closing tag
	    indicates that the element has been closed before it has been
	    finished.</para>
	</step>

	<step>
	  <para>Put the <literal>title</literal> element back in.</para>
	</step>
      </procedure>
    </sect2>
  </sect1>

  <sect1 id="sgml-primer-doctype-declaration">
    <title>The DOCTYPE declaration</title>

    <para>The beginning of each document that you write must specify the name
      of the DTD that the document conforms to.  This is so that SGML parsers
      can determine the DTD and ensure that the document does conform to 
      it.</para>

    <para>This information is generally expressed on one line, in the DOCTYPE
      declaration.</para>

    <para>A typical declaration for a document written to conform with version
      4.0 of the HTML DTD looks like this:</para>

    <programlisting><![ CDATA [<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0//EN">]]></programlisting>

    <para>That line contains a number of different components.</para>

    <variablelist>
      <varlistentry>
	<term><literal>&lt;!</literal></term>
	
	<listitem>
	  <para>Is the <emphasis>indicator</emphasis> that indicates that this
	    is an SGML declaration.  This line is declaring the document type.
	  </para>
	</listitem>
      </varlistentry>

      <varlistentry>
	<term><literal>DOCTYPE</literal></term>
	
	<listitem>
	  <para>Shows that this is an SGML declaration for the document
	    type.</para>
	</listitem>
      </varlistentry>
      
      <varlistentry>
	<term><literal>html</literal></term>
	
	<listitem>
	  <para>Names the first <link linkend="sgml-primer-elements">element</link> that
	    will appear in the document.</para>
	</listitem>
      </varlistentry>

      <varlistentry>
	<term><literal>PUBLIC "-//W3C//DTD HTML 4.0//EN"</literal></term>

	<listitem>
	  <para>Lists the Formal Public Identifier (FPI)<indexterm>
	      <primary>Formal Public Identifier</primary>
	    </indexterm>
	    for the DTD that this
	    document conforms to.  Your SGML parser will use this to find the
	    correct DTD when processing this document.</para>

	  <para><literal>PUBLIC</literal> is not a part of the FPI, but
	    indicates to the SGML processor how to find the DTD referenced in
	    the FPI.  Other ways of telling the SGML parser how to find the
	    DTD are shown <link
	      linkend="sgml-primer-fpi-alternatives">later</link>.</para>
	</listitem>
      </varlistentry>

      <varlistentry>
	<term><literal>&gt;</literal></term>
	
	<listitem>
	  <para>Returns to the document.</para>
	</listitem>
      </varlistentry>
    </variablelist>
    
    <sect2>
      <title>Formal Public Identifiers (FPIs)<indexterm significance="preferred">
	  <primary>Formal Public Identifier</primary>
	</indexterm>
</title>

      <note>
	<para>You do not need to know this, but it is useful background, and
	  might help you debug problems when your SGML processor can not locate
	  the DTD you are using.</para>
      </note>
      
      <para>FPIs must follow a specific syntax.  This syntax is as
	follows:</para>

      <programlisting>"<replaceable>Owner</replaceable>//<replaceable>Keyword</replaceable> <replaceable>Description</replaceable>//<replaceable>Language</replaceable>"</programlisting>

      <variablelist>
	<varlistentry>
	  <term><replaceable>Owner</replaceable></term>
	  
	  <listitem>
	    <para>This indicates the owner of the FPI.</para>

	    <para>If this string starts with <quote>ISO</quote> then this is an
	      ISO owned FPI.  For example, the FPI <literal>"ISO
		8879:1986//ENTITIES Greek Symbols//EN"</literal> lists
	      <literal>ISO 8879:1986</literal> as being the owner for the set
	      of entities for Greek symbols.  ISO 8879:1986 is the ISO number
	      for the SGML standard.</para>

	    <para>Otherwise, this string will either look like
	      <literal>-//<replaceable>Owner</replaceable></literal> or
	      <literal>+//<replaceable>Owner</replaceable></literal> (notice
	      the only difference is the leading <literal>+</literal> or
	      <literal>-</literal>).</para>

	    <para>If the string starts with <literal>-</literal> then the
	      owner information is unregistered, with a <literal>+</literal>
	      it identifies it as being registered.</para>

	    <para>ISO 9070:1991 defines how registered names are generated; it
	      might be derived from the number of an ISO publication, an ISBN
	      code, or an organization code assigned according to ISO 6523.
	      In addition, a registration authority could be created in order
	      to assign registered names.  The ISO council delegated this to
	      the American National Standards Institute (ANSI).</para>

	    <para>Because the FreeBSD Project has not been registered the
	      owner string is <literal>-//FreeBSD</literal>.  And as you can
	      see, the W3C are not a registered owner either.</para>
	  </listitem>
	</varlistentry>
	
	<varlistentry>
	  <term><replaceable>Keyword</replaceable></term>
	  
	  <listitem>
	    <para>There are several keywords that indicate the type of
	      information in the file.  Some of the most common keywords are
	      <literal>DTD</literal>, <literal>ELEMENT</literal>,
	      <literal>ENTITIES</literal>, and <literal>TEXT</literal>.
	      <literal>DTD</literal> is used only for DTD files,
	      <literal>ELEMENT</literal> is usually used for DTD fragments
	      that contain only entity or element declarations.
	      <literal>TEXT</literal> is used for SGML content (text and
	      tags).</para>
	  </listitem>
	</varlistentry>
	
	<varlistentry>
	  <term><replaceable>Description</replaceable></term>
	  
	  <listitem>
	    <para>Any description you want to supply for the contents of this
	      file.  This may include version numbers or any short text that
	      is meaningful to you and unique for the SGML system.</para>
	  </listitem>
	</varlistentry>
	
	<varlistentry>
	  <term><replaceable>Language</replaceable></term>
	  
	  <listitem>
	    <para>This is an ISO two-character code that identifies the native
	      language for the file.  <literal>EN</literal> is used for
	      English.</para>
	  </listitem>
	</varlistentry>
      </variablelist>
      
      <sect3>
	<title><filename>catalog</filename> files</title>
	
	<para>If you use the syntax above and process this document
	  using an SGML processor, the processor will need to have some way of
	  turning the FPI into the name of the file on your computer that
	  contains the DTD.</para>
	
	<para>In order to do this it can use a catalog file.  A catalog file
	  (typically called <filename>catalog</filename>) contains lines that
	  map FPIs to filenames.  For example, if the catalog file contained
	  the line:</para>
	
	<programlisting>PUBLIC "-//W3C//DTD HTML 4.0//EN"             "4.0/strict.dtd"</programlisting>

	<para>The SGML processor would know to look up the DTD from
	  <filename>strict.dtd</filename> in the <filename>4.0</filename>
	  subdirectory of whichever directory held the
	  <filename>catalog</filename> file that contained that line.</para>

	<para>Look at the contents of
	  <filename>/usr/local/share/sgml/html/catalog</filename>.  This is
	  the catalog file for the HTML DTDs that will have been installed as
	  part of the <filename role="package">textproc/docproj</filename> port.</para>
      </sect3>

      <sect3>
	<title><envar>SGML_CATALOG_FILES</envar></title>

	<para>In order to locate a <filename>catalog</filename> file, your
	  SGML processor will need to know where to look.  Many of them
	  feature command line parameters for specifying the path to one or
	  more catalogs.</para>

	<para>In addition, you can set <envar>SGML_CATALOG_FILES</envar> to
	  point to the files.  This environment variable should consist of a
	  colon-separated list of catalog files (including their full
	  path).</para>

	<para>Typically, you will want to include the following files:</para>

	<itemizedlist>
	  <listitem>
	    <para><filename>/usr/local/share/sgml/docbook/4.1/catalog</filename></para>
	  </listitem>

	  <listitem>
	    <para><filename>/usr/local/share/sgml/html/catalog</filename></para>
	  </listitem>

	  <listitem>
	    <para><filename>/usr/local/share/sgml/iso8879/catalog</filename></para>
	  </listitem>

	  <listitem>
	    <para><filename>/usr/local/share/sgml/jade/catalog</filename></para>
	  </listitem>
	</itemizedlist>

	<para>You should <link linkend="sgml-primer-envars">already have done
	    this</link>.</para>
      </sect3>
    </sect2>
    
    <sect2 id="sgml-primer-fpi-alternatives">
      <title>Alternatives to FPIs</title>
      
      <para>Instead of using an FPI to indicate the DTD that the document
	conforms to (and therefore, which file on the system contains the DTD)
	you can explicitly specify the name of the file.</para>
      
      <para>The syntax for this is slightly different:</para>
      
      <programlisting><![ CDATA [<!DOCTYPE html SYSTEM "/path/to/file.dtd">]]></programlisting>
      
      <para>The <literal>SYSTEM</literal> keyword indicates that the SGML
	processor should locate the DTD in a system specific fashion.  This
	typically (but not always) means the DTD will be provided as a
	filename.</para>
      
      <para>Using FPIs is preferred for reasons of portability.  You do not
	want to have to ship a copy of the DTD around with your document, and
	if you used the <literal>SYSTEM</literal> identifier then everyone
	would need to keep their DTDs in the same place.</para>
    </sect2>
  </sect1>
  
  <sect1 id="sgml-primer-sgml-escape">
    <title>Escaping back to SGML</title>

    <para>Earlier in this primer I said that SGML is only used when writing a
      DTD.  This is not strictly true.  There is certain SGML syntax that you
      will want to be able to use within your documents.  For example,
      comments can be included in your document, and will be ignored by the
      parser.  Comments are entered using SGML syntax.  Other uses for SGML
      syntax in your document will be shown later too.</para>
      
    <para>Obviously, you need some way of indicating to the SGML processor
      that the following content is not elements within the document, but is
      SGML that the parser should act upon.</para>

    <para>These sections are marked by <literal>&lt;! ...  &gt;</literal> in
      your document.  Everything between these delimiters is SGML syntax as
      you might find within a DTD.</para>

    <para>As you may just have realized, the <link
	linkend="sgml-primer-doctype-declaration">DOCTYPE declaration</link>
      is an example of SGML syntax that you need to include in your
      document&hellip;</para>
  </sect1>
  
  <sect1 id="sgml-primer-comments">
    <title>Comments</title>
    
    <para>Comments are an SGML construction, and are normally only valid
      inside a DTD.  However, as <xref linkend="sgml-primer-sgml-escape">
      shows, it is possible to use SGML syntax within your document.</para>

    <para>The delimiter for SGML comments is the string
      <quote><literal>--</literal></quote>.  The first occurrence of this string
      opens a comment, and the second closes it.</para>

    <example>
      <title>SGML generic comment</title>

      <programlisting>&lt;!-- test comment --></programlisting>
      
      <programlisting><![ CDATA [
<!-- This is inside the comment -->

<!-- This is another comment    -->

<!-- This is one way
     of doing multiline comments -->

<!-- This is another way of   --
  -- doing multiline comments -->]]></programlisting>
    </example>

    <![ %output.print; [
    <important>
      <title>Use 2 dashes</title>

      <para>There is a problem with producing the Postscript and PDF versions
	of this document.  The above example probably shows just one hyphen
	symbol, <literal>-</literal> after the <literal>&lt;!</literal> and
	before the <literal>&gt;</literal>.</para>

      <para>You <emphasis>must</emphasis> use two <literal>-</literal>,
	<emphasis>not</emphasis> one.  The Postscript and PDF versions have
	translated the two <literal>-</literal> in the original to a longer,
	more professional <emphasis>em-dash</emphasis>, and broken this
	example in the process.</para>

      <para>The HTML, plain text, and RTF versions of this document are not
	affected.</para>
    </important>
    ]]>
    
    <para>If you have used HTML before you may have been shown different rules
      for comments.  In particular, you may think that the string
      <literal>&lt;!--</literal> opens a comment, and it is only closed by
      <literal>--&gt;</literal>.</para>

    <para>This is <emphasis>not</emphasis> the case.  A lot of web browsers
      have broken HTML parsers, and will accept that as valid.  However, the
      SGML parsers used by the Documentation Project are much stricter, and
      will reject documents that make that error.</para>

    <example>
      <title>Erroneous SGML comments</title>

      <programlisting><![ CDATA [
<!-- This is in the comment --

     THIS IS OUTSIDE THE COMMENT!

  -- back inside the comment -->]]></programlisting>

      <para>The SGML parser will treat this as though it were actually:</para>

      <programlisting>&lt;!THIS IS OUTSIDE THE COMMENT&gt;</programlisting>

      <para>This is not valid SGML, and may give confusing error
	messages.</para>

      <programlisting><![ CDATA [<!--------------- This is a very bad idea --------------->]]></programlisting>

      <para>As the example suggests, <emphasis>do not</emphasis> write
	comments like that.</para>

      <programlisting><![ CDATA [<!--===================================================-->]]></programlisting>

      <para>That is a (slightly) better approach, but it still potentially
	confusing to people new to SGML.</para>
    </example>

    <sect2>
      <title>For you to do&hellip;</title>

      <procedure>
	<step>
	  <para>Add some comments to <filename>example.sgml</filename>, and
	    check that the file still validates using <command>nsgmls</command>.</para>
	</step>

	<step>
	  <para>Add some invalid comments to
	    <filename>example.sgml</filename>, and see the error messages that
	    <command>nsgmls</command> gives when it encounters an invalid comment.</para>
	</step>
      </procedure>
    </sect2>
  </sect1>
    
  <sect1 id="sgml-primer-entities">
    <title>Entities</title>

    <para>Entities are a mechanism for assigning names to chunks of content.
      As an SGML parser processes your document, any entities it finds are
      replaced by the content of the entity.</para>
    
    <para>This is a good way to have re-usable, easily changeable chunks of
      content in your SGML documents.  It is also the only way to include one
      marked up file inside another using SGML.</para>
    
    <para>There are two types of entities which can be used in two different
      situations; <emphasis>general entities</emphasis> and
      <emphasis>parameter entities</emphasis>.</para>
    
    <sect2 id="sgml-primer-general-entities">
      <title>General Entities</title>
      
      <para>You cannot use general entities in an SGML context (although you
	define them in one).  They can only be used in your document.
	Contrast this with <link
	  linkend="sgml-primer-parameter-entities">parameter
	  entities</link>.</para>

      <para>Each general entity has a name.  When you want to reference a
	general entity (and therefore include whatever text it represents in
	your document), you write
	<literal>&amp;<replaceable>entity-name</replaceable>;</literal>.  For
	example, suppose you had an entity called
	<literal>current.version</literal> which expanded to the current
	version number of your product.  You could write:</para>

      <programlisting><![ CDATA [<para>The current version of our product is
  &current.version;.</para>]]></programlisting>

      <para>When the version number changes you can simply change the
	definition of the value of the general entity and reprocess your
	document.</para>

      <para>You can also use general entities to enter characters that you
	could not otherwise include in an SGML document.  For example, &lt;
	and &amp; cannot normally appear in an SGML document.  When the SGML
	parser sees the &lt; symbol it assumes that a tag (either a start tag
	or an end tag) is about to appear, and when it sees the &amp; symbol
	it assumes the next text will be the name of an entity.</para>

      <para>Fortunately, you can use the two general entities &amp;lt; and
	&amp;amp; whenever you need to include one or other of these.</para>
	
      <para>A general entity can only be defined within an SGML context.
	Typically, this is done immediately after the DOCTYPE
	declaration.</para>

      <example>
	<title>Defining general entities</title>

	<programlisting><![ CDATA [<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0//EN" [
<!ENTITY current.version    "3.0-RELEASE">
<!ENTITY last.version       "2.2.7-RELEASE">
]>]]></programlisting>
	  
	<para>Notice how the DOCTYPE declaration has been extended by adding a
	  square bracket at the end of the first line.  The two entities are
	  then defined over the next two lines, before the square bracket is
	  closed, and then the DOCTYPE declaration is closed.</para>

	<para>The square brackets are necessary to indicate that we are
	  extending the DTD indicated by the DOCTYPE declaration.</para>
      </example>
    </sect2>
    
    <sect2 id="sgml-primer-parameter-entities">
      <title>Parameter entities</title>

      <para>Like <link linkend="sgml-primer-general-entities">general
	  entities</link>, parameter entities are used to assign names to
	reusable chunks of text.  However, where as general entities can only
	be used within your document, parameter entities can only be used
	within an <link linkend="sgml-primer-sgml-escape">SGML
	  context</link>.</para>

      <para>Parameter entities are defined in a similar way to general
	entities.  However, instead of using
	<literal>&amp;<replaceable>entity-name</replaceable>;</literal> to
	refer to them, use
	<literal>%<replaceable>entity-name</replaceable>;</literal><footnote>
	  <para><emphasis>P</emphasis>arameter entities use the
	    <emphasis>P</emphasis>ercent symbol.</para>
	</footnote>.  The definition also includes the <literal>%</literal>
	between the <literal>ENTITY</literal> keyword and the name of the
	entity.</para>
      
      <example>
	<title>Defining parameter entities</title>
	
	<programlisting><![ CDATA [<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0//EN" [
<!ENTITY % param.some "some">
<!ENTITY % param.text "text">
<!ENTITY % param.new  "%param.some more %param.text">

<!-- %param.new now contains "some more text" -->
]>]]></programlisting>
      </example>
	
      <para>This may not seem particularly useful.  It will be.</para>
    </sect2>

    <sect2>
      <title>For you to do&hellip;</title>

      <procedure>
	<step>
	  <para>Add a general entity to
	    <filename>example.sgml</filename>.</para>

	  <programlisting><![ CDATA [<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" [
<!ENTITY version "1.1">
]>	  

<html>
  <head>	     
    <title>An example HTML file</title>
  </head>

  <!-- You might well have some comments in here as well -->
	  
  <body>	    
    <p>This is a paragraph containing some text.</p>

    <p>This paragraph contains some more text.</p>

    <p align="right">This paragraph might be right-justified.</p>

    <p>The current version of this document is: &version;</p>	  
  </body>	    
</html>]]></programlisting>	  
	</step>

	<step>
	  <para>Validate the document using <command>nsgmls</command>.</para>
	</step>

	<step>
	  <para>Load <filename>example.sgml</filename> into your web browser
	    (you may need to copy it to <filename>example.html</filename>
	    before your browser recognizes it as an HTML document).</para>

	  <para>Unless your browser is very advanced, you will not see the entity
	    reference <literal>&amp;version;</literal> replaced with the
	    version number.  Most web browsers have very simplistic parsers
	    which do not handle proper SGML<footnote>
	      <para>This is a shame.  Imagine all the problems and hacks (such
		as Server Side Includes) that could be avoided if they
		did.</para>
	    </footnote>.</para>
	</step>

	<step>
	  <para>The solution is to <emphasis>normalize</emphasis> your
	    document using an SGML normalizer.  The normalizer reads in valid
	    SGML and outputs equally valid SGML which has been transformed in
	    some way.  One of the ways in which the normalizer transforms the
	    SGML is to expand all the entity references in the document,
	    replacing the entities with the text that they represent.</para>

	  <para>You can use <command>sgmlnorm</command> to do this.</para>

          <screen>&prompt.user; <userinput>sgmlnorm example.sgml > example.html</userinput></screen>

	  <para>You should find a normalized (i.e., entity references
	    expanded) copy of your document in
	    <filename>example.html</filename>, ready to load into your web
	    browser.</para>
	</step>

	<step>
	  <para>If you look at the output from <command>sgmlnorm</command>
	    you will see that it does not include a DOCTYPE declaration at
	    the start.  To include this you need to use the <option>-d</option>
	    option:</para>

          <screen>&prompt.user; <userinput>sgmlnorm -d example.sgml > example.html</userinput></screen>
	</step>
      </procedure>
    </sect2>
  </sect1>
  
  <sect1 id="sgml-primer-include">
    <title>Using entities to include files</title>
    
    <para>Entities (both <link
	linkend="sgml-primer-general-entities">general</link> and <link
	linkend="sgml-primer-parameter-entities">parameter</link>) are
      particularly useful when used to include one file inside another.</para>

    <sect2 id="sgml-primer-include-using-gen-entities">
      <title>Using general entities to include files</title>
      
      <para>Suppose you have some content for an SGML book organized into
	files, one file per chapter, called
	<filename>chapter1.sgml</filename>,
	<filename>chapter2.sgml</filename>, and so forth, with a
	<filename>book.sgml</filename> file that will contain these
	chapters.</para>

      <para>In order to use the contents of these files as the values for your
	entities, you declare them with the <literal>SYSTEM</literal> keyword.
	This directs the SGML parser to use the contents of the named file as
	the value of the entity.</para>

      <example>
	<title>Using general entities to include files</title>
	
	<programlisting><![ CDATA [<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0//EN" [
<!ENTITY chapter.1 SYSTEM "chapter1.sgml">
<!ENTITY chapter.2 SYSTEM "chapter2.sgml">
<!ENTITY chapter.3 SYSTEM "chapter3.sgml">
<!-- And so forth -->
]>

<html>
  <!-- Use the entities to load in the chapters -->

  &chapter.1;
  &chapter.2;
  &chapter.3;
</html>]]></programlisting>
      </example>
      
      <warning>
	<para>When using general entities to include other files within a
	  document, the files being included
	  (<filename>chapter1.sgml</filename>,
	  <filename>chapter2.sgml</filename>, and so on) <emphasis>must
	    not</emphasis> start with a DOCTYPE declaration.  This is a syntax
	  error.</para>
      </warning>
    </sect2>
    
    <sect2>
      <title>Using parameter entities to include files</title>
      
      <para>Recall that parameter entities can only be used inside an SGML
	context.  Why then would you want to include a file within an SGML
	context?</para>

      <para>You can use this to ensure that you can reuse your general
	entities.</para>

      <para>Suppose that you had many chapters in your document, and you
	reused these chapters in two different books, each book organizing the
	chapters in a different fashion.</para>

      <para>You could list the entities at the top of each book, but this
	quickly becomes cumbersome to manage.</para>

      <para>Instead, place the general entity definitions inside one file,
	and use a parameter entity to include that file within your
	document.</para>

      <example>
	<title>Using parameter entities to include files</title>

	<para>First, place your entity definitions in a separate file, called
	  <filename>chapters.ent</filename>.  This file contains the
	  following:</para>
	  
	<programlisting><![ CDATA [<!ENTITY chapter.1 SYSTEM "chapter1.sgml">
<!ENTITY chapter.2 SYSTEM "chapter2.sgml">
<!ENTITY chapter.3 SYSTEM "chapter3.sgml">]]></programlisting>

	<para>Now create a parameter entity to refer to the contents of the
	  file.  Then use the parameter entity to load the file into the
	  document, which will then make all the general entities available
	  for use.  Then use the general entities as before:</para>

	<programlisting><![ CDATA [<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0//EN" [
<!-- Define a parameter entity to load in the chapter general entities -->
<!ENTITY % chapters SYSTEM "chapters.ent">

<!-- Now use the parameter entity to load in this file -->
%chapters;
]>

<html>
  &chapter.1;
  &chapter.2;
  &chapter.3;
</html>]]></programlisting>
      </example>
    </sect2>

    <sect2>
      <title>For you to do&hellip;</title>

      <sect3>
	<title>Use general entities to include files</title>

	<procedure>
	  <step>
	    <para>Create three files, <filename>para1.sgml</filename>,
	      <filename>para2.sgml</filename>, and
	      <filename>para3.sgml</filename>.</para>

	    <para>Put content similar to the following in each file:</para>

	    <programlisting><![ CDATA [<p>This is the first paragraph.</p>]]></programlisting>
	  </step>

	  <step>
	    <para>Edit <filename>example.sgml</filename> so that it looks like
	      this:</para>

	    <programlisting><![ CDATA [<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0//EN" [
<!ENTITY version "1.1">
<!ENTITY para1 SYSTEM "para1.sgml">
<!ENTITY para2 SYSTEM "para2.sgml">
<!ENTITY para3 SYSTEM "para3.sgml">
]>

<html>
  <head>
    <title>An example HTML file</title>
  </head>

  <body>
    <p>The current version of this document is: &version;</p>

    &para1;
    &para2;
    &para3;
  </body>
</html>]]></programlisting>
	  </step>

	  <step>
	    <para>Produce <filename>example.html</filename> by normalizing
	      <filename>example.sgml</filename>.</para>

            <screen>&prompt.user; <userinput>sgmlnorm -d example.sgml > example.html</userinput></screen>
	  </step>

	  <step>
	    <para>Load <filename>example.html</filename> into your web
	      browser, and confirm that the
	      <filename>para<replaceable>n</replaceable>.sgml</filename> files
	      have been included in <filename>example.html</filename>.</para>
	  </step>
	</procedure>
      </sect3>

      <sect3>
	<title>Use parameter entities to include files</title>

	<note>
	  <para>You must have taken the previous steps first.</para>
	</note>
	
	<procedure>
	  <step>
	    <para>Edit <filename>example.sgml</filename> so that it looks like
	      this:</para>
	    
	    <programlisting><![ CDATA [<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0//EN" [
<!ENTITY % entities SYSTEM "entities.sgml"> %entities;
]>

<html>
  <head>
    <title>An example HTML file</title>
  </head>

  <body>
    <p>The current version of this document is: &version;</p>

    &para1;
    &para2;
    &para3;
  </body>
</html>]]></programlisting>
	  </step>

	  <step>
	    <para>Create a new file, <filename>entities.sgml</filename>, with
	      this content:</para>

	    <programlisting><![ CDATA [<!ENTITY version "1.1">
<!ENTITY para1 SYSTEM "para1.sgml">
<!ENTITY para2 SYSTEM "para2.sgml">
<!ENTITY para3 SYSTEM "para3.sgml">]]></programlisting>
	  </step>

	  <step>
	    <para>Produce <filename>example.html</filename> by normalizing
	      <filename>example.sgml</filename>.</para>

            <screen>&prompt.user; <userinput>sgmlnorm -d example.sgml > example.html</userinput></screen>
	  </step>

	  <step>
	    <para>Load <filename>example.html</filename> into your web
	      browser, and confirm that the
	      <filename>para<replaceable>n</replaceable>.sgml</filename> files
	      have been included in <filename>example.html</filename>.</para>
	  </step>
	</procedure>
      </sect3>
    </sect2>
  </sect1>
  
  <sect1 id="sgml-primer-marked-sections">
    <title>Marked sections</title>
    
    <para>SGML provides a mechanism to indicate that particular pieces of the
      document should be processed in a special way.  These are termed
      <quote>marked sections</quote>.</para>

    <example>
      <title>Structure of a marked section</title>

      <programlisting>&lt;![ <replaceable>KEYWORD</replaceable> [
  Contents of marked section
]]&gt;</programlisting>
    </example>

    <para>As you would expect, being an SGML construct, a marked section
      starts with <literal>&lt!</literal>.</para>

    <para>The first square bracket begins to delimit the marked
      section.</para>

    <para><replaceable>KEYWORD</replaceable> describes how this marked
      section should be processed by the parser.</para>

    <para>The second square bracket indicates that the content of the marked
      section starts here.</para>
    
    <para>The marked section is finished by closing the two square brackets,
      and then returning to the document context from the SGML context with
      <literal>&gt;</literal>.</para>
    
    <sect2>
      <title>Marked section keywords</title>
      
      <sect3>
	<title><literal>CDATA</literal>, <literal>RCDATA</literal></title>
	
	<para>These keywords denote the marked sections <emphasis>content
	    model</emphasis>, and allow you to change it from the
	  default.</para>
	
	<para>When an SGML parser is processing a document it keeps track
	  of what is called the <quote>content model</quote>.</para>
	
	<para>Briefly, the content model describes what sort of content the
	  parser is expecting to see, and what it will do with it when it
	  finds it.</para>

	<para>The two content models you will probably find most useful are
	  <literal>CDATA</literal> and <literal>RCDATA</literal>.</para>
	
	<para><literal>CDATA</literal> is for <quote>Character Data</quote>.
	  If the parser is in this content model then it is expecting to see
	  characters, and characters only.  In this model the &lt; and &amp;
	  symbols lose their special status, and will be treated as ordinary
	  characters.</para>
	
	<para><literal>RCDATA</literal> is for <quote>Entity references and
	  character data</quote> If the parser is in this content model then it
	  is expecting to see characters <emphasis>and</emphasis> entities.
	  &lt; loses its special status, but &amp; will still be treated as
	  starting the beginning of a general entity.</para>
	
	<para>This is particularly useful if you are including some verbatim
	  text that contains lots of &lt; and &amp; characters.  While you
	  could go through the text ensuring that every &lt; is converted to a
	  &amp;lt; and every &amp; is converted to a &amp;amp;, it can be
	  easier to mark the section as only containing CDATA.  When the SGML
	  parser encounters this it will ignore the &lt; and &amp; symbols
	  embedded in the content.</para>

        <note>
          <para>When you use <literal>CDATA</literal> or
            <literal>RCDATA</literal> in examples of text marked up in SGML,
            keep in mind that the content of <literal>CDATA</literal> is not
            validated.  You have to check the included SGML text using other
            means.  You could, for example, write the example in another
            document, validate the example code, and then paste it to your
            <literal>CDATA</literal> content.</para>
        </note>
	<!-- The nesting of CDATA within the next example is disgusting -->
	  
	<example>
	  <title>Using a CDATA marked section</title>
	  
	  <programlisting>&lt;para>Here is an example of how you would include some text
  that contained many &amp;lt; and &amp;amp; symbols.  The sample
  text is a fragment of HTML.  The surrounding text (&lt;para> and
  &lt;programlisting>) are from DocBook.&lt;/para>

&lt;programlisting>
  &lt;![ CDATA [  <![ CDATA [
    <p>This is a sample that shows you some of the elements within
      HTML.  Since the angle brackets are used so many times, it is
      simpler to say the whole example is a CDATA marked section
      than to use the entity names for the left and right angle
      brackets throughout.</p>

    <ul>
      <li>This is a listitem</li>
      <li>This is a second listitem</li>
      <li>This is a third listitem</li>
    </ul>

    <p>This is the end of the example.</p>]]>
  ]]&gt;
&lt;/programlisting></programlisting>

	  <para>If you look at the source for this document you will see this
	    technique used throughout.</para>
	</example>
      </sect3>
      
      <sect3>
	<title><literal>INCLUDE</literal> and
	  <literal>IGNORE</literal></title>
	
	<para>If the keyword is <literal>INCLUDE</literal> then the contents
	  of the marked section will be processed.  If the keyword is
	  <literal>IGNORE</literal> then the marked section is ignored and
	  will not be processed.  It will not appear in the output.</para>

	<example>
	  <title>Using <literal>INCLUDE</literal> and
	    <literal>IGNORE</literal> in marked sections</title>

	  <programlisting>&lt;![ INCLUDE [
  This text will be processed and included.
]]&gt;

&lt;![ IGNORE [
  This text will not be processed or included.
]]&gt;</programlisting>
	</example>
	
	<para>By itself, this is not too useful.  If you wanted to remove text
	  from your document you could cut it out, or wrap it in
	  comments.</para>
	  
	<para>It becomes more useful when you realize you can use <link
	    linkend="sgml-primer-parameter-entities">parameter entities</link>
	  to control this.  Remember that parameter entities can only be used
	  in SGML contexts, and the keyword of a marked section
	  <emphasis>is</emphasis> an SGML context.</para>

	<para>For example, suppose that you produced a hard-copy version of
	  some documentation and an electronic version.  In the electronic
	  version you wanted to include some extra content that was not to
	  appear in the hard-copy.</para>

	<para>Create a parameter entity, and set its value to
	  <literal>INCLUDE</literal>.  Write your document, using marked
	  sections to delimit content that should only appear in the
	  electronic version.  In these marked sections use the parameter
	  entity in place of the keyword.</para>

	<para>When you want to produce the hard-copy version of the document,
	  change the parameter entity's value to <literal>IGNORE</literal> and
	  reprocess the document.</para>

	<example>
	  <title>Using a parameter entity to control a marked
	    section</title>
	  
	  <programlisting>&lt;!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0//EN" [
&lt;!ENTITY % electronic.copy "INCLUDE">	     
]]&gt;

...

&lt;![ %electronic.copy [
  This content should only appear in the electronic
  version of the document.
]]&gt;</programlisting>

	  <para>When producing the hard-copy version, change the entity's
	    definition to:</para>
	  
	  <programlisting>&lt;!ENTITY % electronic.copy "IGNORE"></programlisting>

	  <para>On reprocessing the document, the marked sections that use
	    <literal>%electronic.copy</literal> as their keyword will be
	    ignored.</para>
	</example>
      </sect3>
    </sect2>

    <sect2>
      <title>For you to do&hellip;</title>

      <procedure>
	<step>
	  <para>Create a new file, <filename>section.sgml</filename>, that
	    contains the following:</para>

	  <programlisting>&lt;!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0//EN" [
&lt;!ENTITY % text.output "INCLUDE">
]&gt;

&lt;html>
  &lt;head>
    &lt;title>An example using marked sections&lt;/title>
  &lt;/head>

  &lt;body>	    
    &lt;p>This paragraph &lt;![ CDATA [contains many &lt;
      characters (&lt; &lt; &lt; &lt; &lt;) so it is easier
      to wrap it in a CDATA marked section ]]&gt;&lt;/p>

    &lt;![ IGNORE [
    &lt;p>This paragraph will definitely not be included in the
      output.&lt;/p>
    ]]&gt;

    &lt;![ <![ CDATA [%text.output]]> [
    &lt;p>This paragraph might appear in the output, or it
      might not.&lt;/p>

    &lt;p>Its appearance is controlled by the <![CDATA[%text.output]]>
      parameter entity.&lt;/p>	    
    ]]&gt;
  &lt;/body>
&lt;/html></programlisting>	    
	</step>

	<step>
	  <para>Normalize this file using &man.sgmlnorm.1; and examine the
	    output.  Notice which paragraphs have appeared, which have
	    disappeared, and what has happened to the content of the CDATA
	    marked section.</para>
	</step>

	<step>
	  <para>Change the definition of the <literal>text.output</literal>
	    entity from <literal>INCLUDE</literal> to
	    <literal>IGNORE</literal>.  Re-normalize the file, and examine the
	    output to see what has changed.</para>
	</step>
      </procedure>
    </sect2>
  </sect1>
  
  <sect1 id="sgml-primer-conclusion">
    <title>Conclusion</title>
    
    <para>That is the conclusion of this SGML primer.  For reasons of space
      and complexity several things have not been covered in depth (or at
      all).  However, the previous sections cover enough SGML for you to be
      able to follow the organization of the FDP documentation.</para>
  </sect1>
</chapter>

<!--
     Local Variables:
     mode: sgml
     sgml-declaration: "../chapter.decl"
     sgml-indent-data: t
     sgml-omittag: nil
     sgml-always-quote-attributes: t
     sgml-parent-document: ("../book.sgml" "part" "chapter")
     End:
-->