1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
|
<?xml version="1.0" encoding="iso-8859-1"?>
<!-- Copyright (c) 1998, 1999 Nik Clayton, All rights reserved.
Redistribution and use in source (SGML DocBook) and 'compiled' forms
(SGML, HTML, PDF, PostScript, RTF and so forth) with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code (SGML DocBook) must retain the above
copyright notice, this list of conditions and the following
disclaimer as the first lines of this file unmodified.
2. Redistributions in compiled form (transformed to other DTDs,
converted to PDF, PostScript, RTF and other formats) must reproduce
the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials
provided with the distribution.
THIS DOCUMENTATION IS PROVIDED BY NIK CLAYTON "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL NIK CLAYTON BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS DOCUMENTATION, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
$FreeBSD$
-->
<chapter xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink" version="5.0" xml:id="xml-primer">
<title>XML Primer</title>
<para>The majority of FDP documentation is written in applications
of XML. This chapter explains exactly what that means, how to
read and understand the source to the documentation, and the sort
of XML tricks you will see used in the documentation.</para>
<para>Portions of this section were inspired by Mark Galassi's
<link xlink:href="http://www.galassi.org/mark/mydocs/docbook-intro/docbook-intro.html">Get
Going With DocBook</link>.</para>
<sect1 xml:id="xml-primer-overview">
<title>Overview</title>
<para>Way back when, electronic text was simple to deal with.
Admittedly, you had to know which character set your document
was written in (ASCII, EBCDIC, or one of a number of others) but
that was about it. Text was text, and what you saw really was
what you got. No frills, no formatting, no intelligence.</para>
<para>Inevitably, this was not enough. Once you have text in a
machine-usable format, you expect machines to be able to use it
and manipulate it intelligently. You would like to indicate
that certain phrases should be emphasized, or added to a
glossary, or be hyperlinks. You might want filenames to be
shown in a <quote>typewriter</quote> style font for viewing on
screen, but as <quote>italics</quote> when printed, or any of a
myriad of other options for presentation.</para>
<para>It was once hoped that Artificial Intelligence (AI) would
make this easy. Your computer would read in the document and
automatically identify key phrases, filenames, text that the
reader should type in, examples, and more. Unfortunately, real
life has not happened quite like that, and our computers require
some assistance before they can meaningfully process our
text.</para>
<para>More precisely, they need help identifying what is what.
Let's look at this text:</para>
<blockquote>
<para>To remove <filename>/tmp/foo</filename> use
&man.rm.1;.</para>
<screen>&prompt.user; <userinput>rm /tmp/foo</userinput></screen>
</blockquote>
<para>It is easy to see which parts are filenames, which are
commands to be typed in, which parts are references to manual
pages, and so on. But the computer processing the document
cannot. For this we need markup.</para>
<para><quote>Markup</quote> is commonly used to describe
<quote>adding value</quote> or <quote>increasing cost</quote>.
The term takes on both these meanings when applied to text.
Markup is additional text included in the document,
distinguished from the document's content in some way, so that
programs that process the document can read the markup and use
it when making decisions about the document. Editors can hide
the markup from the user, so the user is not distracted by
it.</para>
<para>The extra information stored in the markup <emphasis>adds
value</emphasis> to the document. Adding the markup to the
document must typically be done by a person—after all, if
computers could recognize the text sufficiently well to add the
markup then there would be no need to add it in the first place.
This <emphasis>increases the cost</emphasis> (i.e., the effort
required) to create the document.</para>
<para>The previous example is actually represented in this
document like this:</para>
<programlisting><![CDATA[
<para>To remove <filename>/tmp/foo</filename> use &man.rm.1;.</para>
<screen>&prompt.user; <userinput>rm /tmp/foo</userinput></screen>]]></programlisting>
<para>As you can see, the markup is clearly separate from the
content.</para>
<para>Obviously, if you are going to use markup you need to define
what your markup means, and how it should be interpreted. You
will need a markup language that you can follow when marking up
your documents.</para>
<para>Of course, one markup language might not be enough. A
markup language for technical documentation has very different
requirements than a markup language that was to be used for
cookery recipes. This, in turn, would be very different from a
markup language used to describe poetry. What you really need
is a first language that you use to write these other markup
languages. A <emphasis>meta markup language</emphasis>.</para>
<para>This is exactly what the eXtensible Markup
Language (XML) is. Many markup languages have been written in
XML, including the two most used by the FDP, XHTML and
DocBook.</para>
<para>Each language definition is more properly called a grammar,
vocabulary, schema or Document Type Definition (DTD). There
are various languages to specify an XML grammar, for example,
DTD (yes, it also means the specification language itself),
XML Schema (XSD) or RELANG NG. The schema specifies the name
of the elements that can be used, what order they appear in (and
whether some markup can be used inside other markup) and related
information.</para>
<para xml:id="xml-primer-validating">A schema is a
<emphasis>complete</emphasis> specification of all the elements
that are allowed to appear, the order in which they should
appear, which elements are mandatory, which are optional, and so
forth. This makes it possible to write an XML
<emphasis>parser</emphasis> which reads in both the schema and a
document which claims to conform to the schema. The parser can
then confirm whether or not all the elements required by the vocabulary
are in the document in the right order, and whether there are
any errors in the markup. This is normally referred to as
<quote>validating the document</quote>.</para>
<note>
<para>This processing simply confirms that the choice of
elements, their ordering, and so on, conforms to that listed
in the grammar. It does <emphasis>not</emphasis> check that you
have used <emphasis>appropriate</emphasis> markup for the
content. If you tried to mark up all the filenames in your
document as function names, the parser would not flag this as
an error (assuming, of course, that your schema defines elements
for filenames and functions, and that they are allowed to
appear in the same place).</para>
</note>
<para>It is likely that most of your contributions to the
Documentation Project will consist of content marked up in
either XHTML or DocBook, rather than alterations to the schemas.
For this reason this book will not touch on how to write a
vocabulary.</para>
</sect1>
<sect1 xml:id="xml-primer-elements">
<title>Elements, Tags, and Attributes</title>
<para>All the vocabularies written in XML share certain characteristics.
This is hardly surprising, as the philosophy behind XML will
inevitably show through. One of the most obvious manifestations
of this philosophy is that of <emphasis>content</emphasis> and
<emphasis>elements</emphasis>.</para>
<para>Your documentation (whether it is a single web page, or a
lengthy book) is considered to consist of content. This content
is then divided (and further subdivided) into elements. The
purpose of adding markup is to name and identify the boundaries
of these elements for further processing.</para>
<para>For example, consider a typical book. At the very top
level, the book is itself an element. This <quote>book</quote>
element obviously contains chapters, which can be considered to
be elements in their own right. Each chapter will contain more
elements, such as paragraphs, quotations, and footnotes. Each
paragraph might contain further elements, identifying content
that was direct speech, or the name of a character in the
story.</para>
<para>You might like to think of this as <quote>chunking</quote>
content. At the very top level you have one chunk, the book.
Look a little deeper, and you have more chunks, the individual
chapters. These are chunked further into paragraphs, footnotes,
character names, and so on.</para>
<para>Notice how you can make this differentiation between
different elements of the content without resorting to any XML
terms. It really is surprisingly straightforward. You could do
this with a highlighter pen and a printout of the book, using
different colors to indicate different chunks of content.</para>
<para>Of course, we do not have an electronic highlighter pen, so
we need some other way of indicating which element each piece of
content belongs to. In languages written in XML (XHTML,
DocBook, et al) this is done by means of
<emphasis>tags</emphasis>.</para>
<para>A tag is used to identify where a particular element starts,
and where the element ends. <emphasis>The tag is not part of
the element itself</emphasis>. Because each grammar was normally
written to mark up specific types of information, each one will
recognize different elements, and will therefore have different
names for the tags.</para>
<para>For an element called
<replaceable>element-name</replaceable> the start tag will
normally look like
<tag><replaceable>element-name</replaceable></tag>. The
corresponding closing tag for this element is
<tag>/<replaceable>element-name</replaceable></tag>.</para>
<example>
<title>Using an Element (Start and End Tags)</title>
<para>XHTML has an element for indicating that the content
enclosed by the element is a paragraph, called
<tag>p</tag>.</para>
<programlisting><![CDATA[<p>This is a paragraph. It starts with the start tag for
the 'p' element, and it will end with the end tag for the 'p'
element.</p>
<p>This is another paragraph. But this one is much shorter.</p>]]></programlisting>
</example>
<para>Some elements have no
content. For example, in XHTML you can indicate that you want a
horizontal line to appear in the document.</para>
<para>For such elements, that have no content at all, XML introduced
a shorthand form, which is ccompletely equivalent to the above
form:</para>
<programlisting><![CDATA[<hr/>]]></programlisting>
<example>
<title>Using an Element (Without Content)</title>
<para>XHTML has an element for indicating a horizontal rule,
called <tag>hr</tag>. This element does not wrap
content, so it looks like this.</para>
<programlisting><![CDATA[<p>One paragraph.</p>
<hr></hr>
<p>This is another paragraph. A horizontal rule separates this
from the previous paragraph.</p>]]></programlisting>
<para>For such elements, that have no content at all, XML introduced
a shorthand form, which is ccompletely equivalent to the above
form:</para>
<programlisting><![CDATA[<p>One paragraph.</p>
<hr/>
<p>This is another paragraph. A horizontal rule separates this
from the previous paragraph.</p>]]></programlisting>
</example>
<para>If it is not obvious by now, elements can contain other
elements. In the book example earlier, the book element
contained all the chapter elements, which in turn contained all
the paragraph elements, and so on.</para>
<example>
<title>Elements within Elements; <tag>em</tag></title>
<programlisting><![CDATA[<p>This is a simple <em>paragraph</em> where some
of the <em>words</em> have been <em>emphasized</em>.</p>]]></programlisting>
</example>
<para>The grammar will specify the rules detailing which elements can
contain other elements, and exactly what they can
contain.</para>
<important>
<para>People often confuse the terms tags and elements, and use
the terms as if they were interchangeable. They are
not.</para>
<para>An element is a conceptual part of your document. An
element has a defined start and end. The tags mark where the
element starts and end.</para>
<para>When this document (or anyone else knowledgeable about
XML) refers to <quote>the <tag>p</tag> tag</quote>
they mean the literal text consisting of the three characters
<literal><</literal>, <literal>p</literal>, and
<literal>></literal>. But the phrase <quote>the
<tag>p</tag> element</quote> refers to the whole
element.</para>
<para>This distinction <emphasis>is</emphasis> very subtle. But
keep it in mind.</para>
</important>
<para>Elements can have attributes. An attribute has a name and a
value, and is used for adding extra information to the element.
This might be information that indicates how the content should
be rendered, or might be something that uniquely identifies that
occurrence of the element, or it might be something else.</para>
<para>An element's attributes are written
<emphasis>inside</emphasis> the start tag for that element, and
take the form
<literal>attribute-name="attribute-value"</literal>.</para>
<para>In XHTML, the
<tag>p</tag> element has an attribute called
<tag>align</tag>, which suggests an alignment
(justification) for the paragraph to the program displaying the
XHTML.</para>
<para>The <literal>align</literal> attribute can take one of four
defined values, <literal>left</literal>,
<literal>center</literal>, <literal>right</literal> and
<literal>justify</literal>. If the attribute is not specified
then the default is <literal>left</literal>.</para>
<example>
<title>Using An Element with An Attribute</title>
<programlisting><![CDATA[<p align="left">The inclusion of the align attribute
on this paragraph was superfluous, since the default is left.</p>
<p align="center">This may appear in the center.</p>]]></programlisting>
</example>
<para>Some attributes will only take specific values, such as
<literal>left</literal> or <literal>justify</literal>. Others
will allow you to enter anything you want.</para>
<example>
<title>Single Quotes Around Attributes</title>
<programlisting><![CDATA[<p align='right'>I am on the right!</p>]]></programlisting>
</example>
<para>XML requires you to quote each attribute value with either
single or double quotes. It is more habitual to use double quotes
but you may use single quotes, as well. Using single quotes is
practical if you want to include double quotes in the attribute
value.</para>
<para>The information on attributes, elements, and tags is stored
in XML catalogs. The various Documentation Project tools use
these catalog files to validate your work. The tools in
<package>textproc/docproj</package> include a
variety of XML catalog files. The FreeBSD Documentation
Project includes its own set of catalog files. Your tools need
to know about both sorts of catalog files.</para>
<sect2>
<title>For You to Do…</title>
<para>In order to run the examples in this document you will
need to install some software on your system and ensure that
an environment variable is set correctly.</para>
<procedure>
<step>
<para>Download and install
<package>textproc/docproj</package> from
the FreeBSD ports system. This is a
<emphasis>meta-port</emphasis> that should download and
install all of the programs and supporting files that are
used by the Documentation Project.</para>
</step>
<step>
<para>Add lines to your shell startup files to set
<envar>SGML_CATALOG_FILES</envar>. (If you are not working
on the English version of the documentation, you will want
to substitute the correct directory for your
language.)</para>
<example xml:id="xml-primer-envars">
<title><filename>.profile</filename>, for &man.sh.1; and
&man.bash.1; Users</title>
<programlisting>SGML_ROOT=/usr/local/share/xml
SGML_CATALOG_FILES=${SGML_ROOT}/jade/catalog
SGML_CATALOG_FILES=${SGML_ROOT}/docbook/4.1/catalog:$SGML_CATALOG_FILES
SGML_CATALOG_FILES=${SGML_ROOT}/html/catalog:$SGML_CATALOG_FILES
SGML_CATALOG_FILES=${SGML_ROOT}/iso8879/catalog:$SGML_CATALOG_FILES
SGML_CATALOG_FILES=/usr/doc/share/xml/catalog:$SGML_CATALOG_FILES
SGML_CATALOG_FILES=/usr/doc/en_US.ISO8859-1/share/xml/catalog:$SGML_CATALOG_FILES
export SGML_CATALOG_FILES</programlisting>
</example>
<example>
<title><filename>.cshrc</filename>, for &man.csh.1; and
&man.tcsh.1; Users</title>
<programlisting>setenv SGML_ROOT /usr/local/share/xml
setenv SGML_CATALOG_FILES ${SGML_ROOT}/jade/catalog
setenv SGML_CATALOG_FILES ${SGML_ROOT}/docbook/4.1/catalog:$SGML_CATALOG_FILES
setenv SGML_CATALOG_FILES ${SGML_ROOT}/html/catalog:$SGML_CATALOG_FILES
setenv SGML_CATALOG_FILES ${SGML_ROOT}/iso8879/catalog:$SGML_CATALOG_FILES
setenv SGML_CATALOG_FILES /usr/doc/share/xml/catalog:$SGML_CATALOG_FILES
setenv SGML_CATALOG_FILES /usr/doc/en_US.ISO8859-1/share/xml/catalog:$SGML_CATALOG_FILES</programlisting>
</example>
<para>Then either log out, and log back in again, or run
those commands from the command line to set the variable
values.</para>
</step>
</procedure>
<procedure>
<step>
<para>Create <filename>example.xml</filename>, and enter
the following text:</para>
<programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>An Example XHTML File</title>
</head>
<body>
<p>This is a paragraph containing some text.</p>
<p>This paragraph contains some more text.</p>
<p align="right">This paragraph might be right-justified.</p>
</body>
</html>]]></programlisting>
</step>
<step>
<para>Try to validate this file using an XML parser.</para>
<para>Part of
<package>textproc/docproj</package> is
the <command>xmllint</command>
<link linkend="xml-primer-validating">validating
parser</link>.</para>
<para>Use <command>xmllint</command> in the following way to
check that your document is valid:</para>
<screen>&prompt.user; <userinput>xmllint --valid --noout example.xml</userinput></screen>
<para>As you will see, <command>xmllint</command> returns
without displaying any output. This means that your
document validated successfully.</para>
</step>
<step>
<para>See what happens when required elements are omitted.
Try removing the <tag>title</tag> and
<tag>/title</tag> tags, and re-run the
validation.</para>
<screen>&prompt.user; <userinput>xmllint --valid --noout example.xml</userinput>
example.xml:5: element head: validity error : Element head content does not follow the DTD, expecting ((script | style | meta | link | object | isindex)* , ((title , (script | style | meta | link | object | isindex)* , (base , (script | style | meta | link | object | isindex)*)?) | (base , (script | style | meta | link | object | isindex)* , title , (script | style | meta | link | object | isindex)*))), got ()</screen>
<para>This line tells you that the validation error comes from
the <replaceable>fifth</replaceable> line of the
<replaceable>example.xml</replaceable> file and that the
content of the <tag>head</tag> is the part, which
does not follow the rules described by the XHTML grammar.</para>
<para>Below this line <command>xmllint</command> will show you
the line where the error has been found and will also mark the
exact character position with a ^ sign.</para>
</step>
<step>
<para>Put the <tag>title</tag> element back
in.</para>
</step>
</procedure>
</sect2>
</sect1>
<sect1 xml:id="xml-primer-doctype-declaration">
<title>The DOCTYPE Declaration</title>
<para>The beginning of each document that you write may specify
the name of the DTD that the document conforms to in case you use
the DTD specification language. Other specification languages, like
XML Schema and RELAX NG are not referred in the source document.
This DOCTYPE declaration serves the XML parsers so that they can
determine the DTD and ensure that the document does conform to it.</para>
<para>A typical declaration for a document written to conform with
version 1.0 of the XHTML DTD looks like this:</para>
<programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">]]></programlisting>
<para>That line contains a number of different components.</para>
<variablelist>
<varlistentry>
<term><literal><!</literal></term>
<listitem>
<para>Is the <emphasis>indicator</emphasis> that indicates
that this is an XML declaration. This line is declaring
the document type.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>DOCTYPE</literal></term>
<listitem>
<para>Shows that this is an XML declaration for the
document type.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>html</literal></term>
<listitem>
<para>Names the first
<link linkend="xml-primer-elements">element</link> that
will appear in the document.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"</literal></term>
<listitem>
<para>Lists the Formal Public Identifier (FPI)
<indexterm>
<primary>Formal Public Identifier</primary>
</indexterm>
for the DTD that this document conforms to. Your XML
parser will use this to find the correct DTD when
processing this document.</para>
<para><literal>PUBLIC</literal> is not a part of the FPI,
but indicates to the XML processor how to find the DTD
referenced in the FPI. Other ways of telling the XML
parser how to find the DTD are shown <link linkend="xml-primer-fpi-alternatives">later</link>.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"</literal></term>
<listitem>
<para>A local filename or an URL to find the DTD.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>></literal></term>
<listitem>
<para>Returns to the document.</para>
</listitem>
</varlistentry>
</variablelist>
<sect2>
<title>Formal Public Identifiers (FPIs)
<indexterm significance="preferred">
<primary>Formal Public Identifier</primary>
</indexterm></title>
<note>
<para>You do not need to know this, but it is useful
background, and might help you debug problems when your XML
processor can not locate the DTD you are using.</para>
</note>
<para>FPIs must follow a specific syntax. This syntax is as
follows:</para>
<programlisting>"<replaceable>Owner</replaceable>//<replaceable>Keyword</replaceable> <replaceable>Description</replaceable>//<replaceable>Language</replaceable>"</programlisting>
<variablelist>
<varlistentry>
<term><replaceable>Owner</replaceable></term>
<listitem>
<para>This indicates the owner of the FPI.</para>
<para>If this string starts with <quote>ISO</quote> then
this is an ISO owned FPI. For example, the FPI
<literal>"ISO 8879:1986//ENTITIES Greek
Symbols//EN"</literal> lists
<literal>ISO 8879:1986</literal> as being the owner for
the set of entities for Greek symbols. ISO 8879:1986 is
the ISO number for the SGML standard, the predecessor
(and a superset) of XML.</para>
<para>Otherwise, this string will either look like
<literal>-//Owner</literal>
or
<literal>+//Owner</literal>
(notice the only difference is the leading
<literal>+</literal> or <literal>-</literal>).</para>
<para>If the string starts with <literal>-</literal> then
the owner information is unregistered, with a
<literal>+</literal> it identifies it as being
registered.</para>
<para>ISO 9070:1991 defines how registered names are
generated; it might be derived from the number of an ISO
publication, an ISBN code, or an organization code
assigned according to ISO 6523. In addition, a
registration authority could be created in order to
assign registered names. The ISO council delegated this
to the American National Standards Institute
(ANSI).</para>
<para>Because the FreeBSD Project has not been registered
the owner string is <literal>-//FreeBSD</literal>. And
as you can see, the W3C are not a registered owner
either.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable>Keyword</replaceable></term>
<listitem>
<para>There are several keywords that indicate the type of
information in the file. Some of the most common
keywords are <literal>DTD</literal>,
<literal>ELEMENT</literal>, <literal>ENTITIES</literal>,
and <literal>TEXT</literal>. <literal>DTD</literal> is
used only for DTD files, <literal>ELEMENT</literal> is
usually used for DTD fragments that contain only entity
or element declarations. <literal>TEXT</literal> is
used for XML content (text and tags).</para>
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable>Description</replaceable></term>
<listitem>
<para>Any description you want to supply for the contents
of this file. This may include version numbers or any
short text that is meaningful to you and unique for the
XML system.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable>Language</replaceable></term>
<listitem>
<para>This is an ISO two-character code that identifies
the native language for the file. <literal>EN</literal>
is used for English.</para>
</listitem>
</varlistentry>
</variablelist>
<sect3>
<title><filename>catalog</filename> Files</title>
<para>If you use the syntax above and process this document
using an XML processor, the processor will need to have
some way of turning the FPI into the name of the file on
your computer that contains the DTD.</para>
<para>In order to do this it can use a catalog file. A
catalog file (typically called <filename>catalog</filename>)
contains lines that map FPIs to filenames. For example, if
the catalog file contained the line:</para>
<!-- XXX: mention XML catalog or maybe replace this totally and only cover XML catalog -->
<programlisting>PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "1.0/transitional.dtd"</programlisting>
<para>The XML processor would know to look up the DTD from
<filename>transitional.dtd</filename> in the
<filename>1.0</filename> subdirectory of whichever directory
held the <filename>catalog</filename> file that contained
that line.</para>
<para>Look at the contents of
<filename>/usr/local/share/xml/dtd/xhtml/catalog.xml</filename>.
This is the catalog file for the XHTML DTDs that will have
been installed as part of the <package>textproc/docproj</package> port.</para>
</sect3>
<sect3>
<title><envar>SGML_CATALOG_FILES</envar></title>
<para>In order to locate a <filename>catalog</filename> file,
your XML processor will need to know where to look. Many
of them feature command line parameters for specifying the
path to one or more catalogs.</para>
<para>In addition, you can set
<envar>SGML_CATALOG_FILES</envar> to point to the files.
This environment variable should consist of a
colon-separated list of catalog files (including their full
path).</para>
<para>Typically, you will want to include the following
files:</para>
<itemizedlist>
<listitem>
<para><filename>/usr/local/share/xml/docbook/4.1/catalog</filename></para>
</listitem>
<listitem>
<para><filename>/usr/local/share/xml/html/catalog</filename></para>
</listitem>
<listitem>
<para><filename>/usr/local/share/xml/iso8879/catalog</filename></para>
</listitem>
<listitem>
<para><filename>/usr/local/share/xml/jade/catalog</filename></para>
</listitem>
</itemizedlist>
<para>You should <link linkend="xml-primer-envars">already
have done this</link>.</para>
</sect3>
</sect2>
<sect2 xml:id="xml-primer-fpi-alternatives">
<title>Alternatives to FPIs</title>
<para>Instead of using an FPI to indicate the DTD that the
document conforms to (and therefore, which file on the system
contains the DTD) you can explicitly specify the name of the
file.</para>
<para>The syntax for this is slightly different:</para>
<programlisting><![CDATA[<!DOCTYPE html SYSTEM "/path/to/file.dtd">]]></programlisting>
<para>The <literal>SYSTEM</literal> keyword indicates that the
XML processor should locate the DTD in a system specific
fashion. This typically (but not always) means the DTD will
be provided as a filename.</para>
<para>Using FPIs is preferred for reasons of portability. You
do not want to have to ship a copy of the DTD around with your
document, and if you used the <literal>SYSTEM</literal>
identifier then everyone would need to keep their DTDs in the
same place.</para>
</sect2>
</sect1>
<sect1 xml:id="xml-primer-xml-escape">
<title>Escaping Back to SGML</title>
<para>As mentioned earlier, XML is only used when writing a DTD.
This is not strictly true. There is certain XML syntax that
you will want to be able to use within your documents. For
example, comments can be included in your document, and will be
ignored by the parser. Comments are entered using XML syntax.
Other uses for XML syntax in your document will be shown later
too.</para>
<para>Obviously, you need some way of indicating to the XML
processor that the following content is not elements within the
document, but is XML that the parser should act upon.</para>
<para>These sections are marked by
<literal><! ... ></literal> in your document. Everything
between these delimiters is XML syntax as you might find within
a DTD.</para>
<para>As you may just have realized, the
<link linkend="xml-primer-doctype-declaration">DOCTYPE
declaration</link> is an example of XML syntax that you need
to include in your document…</para>
</sect1>
<sect1 xml:id="xml-primer-comments">
<title>Comments</title>
<para>Comments are an XML construction, and are normally only
valid inside a DTD. However, as
<xref linkend="xml-primer-xml-escape"/> shows, it is possible
to use XML syntax within your document.</para>
<para>The delimiter for XML comments is the string
<quote><literal>--</literal></quote>. The first occurrence of
this string opens a comment, and the second closes it.</para>
<example>
<title>XML Generic Comment</title>
<programlisting><!-- test comment --></programlisting>
<programlisting>
<!‐- This is inside the comment -‐>
<!‐- This is another comment -‐>
<!‐- This is one way
of doing multiline comments -‐>
<!‐- This is another way of -‐
‐- doing multiline comments -‐></programlisting>
</example>
<para>If you have used XHTML before you may have been shown
different rules for comments. In particular, you may think that
the string <literal><!--</literal> opens a comment, and it is
only closed by <literal>--></literal>.</para>
<para>This is <emphasis>not</emphasis> the case. A lot of web
browsers have broken XHTML parsers, and will accept that as
valid. However, the XML parsers used by the Documentation
Project are much stricter, and will reject documents that make
that error.</para>
<example>
<title>Erroneous XML Comments</title>
<programlisting>
<!‐- This is in the comment -‐
THIS IS OUTSIDE THE COMMENT!
‐- back inside the comment -‐></programlisting>
<para>The XML parser will treat this as though it were
actually:</para>
<programlisting><!THIS IS OUTSIDE THE COMMENT></programlisting>
<para>This is not valid XML, and may give confusing error
messages.</para>
<programlisting>>!‐‐‐‐‐ This is a very bad idea ‐‐‐‐‐></programlisting>
<para>As the example suggests, <emphasis>do not</emphasis> write
comments like that.</para>
<programlisting>>!-‐===================================================-‐></programlisting>
<para>That is a (slightly) better approach, but it still
potentially confusing to people new to XML.</para>
</example>
<sect2>
<title>For You to Do…</title>
<procedure>
<step>
<para>Add some comments to
<filename>example.xml</filename>, and check that the file
still validates using <command>xmllint</command>.</para>
</step>
<step>
<para>Add some invalid comments to
<filename>example.xml</filename>, and see the error
messages that <command>xmllint</command> gives when it
encounters an invalid comment.</para>
</step>
</procedure>
</sect2>
</sect1>
<sect1 xml:id="xml-primer-entities">
<title>Entities</title>
<para>Entities are a mechanism for assigning names to chunks of
content. As an XML parser processes your document, any
entities it finds are replaced by the content of the
entity.</para>
<para>This is a good way to have re-usable, easily changeable
chunks of content in your XML documents. It is also the only
way to include one marked up file inside another using
XML.</para>
<para>There are two types of entities which can be used in two
different situations; <emphasis>general entities</emphasis> and
<emphasis>parameter entities</emphasis>.</para>
<sect2 xml:id="xml-primer-general-entities">
<title>General Entities</title>
<para>You cannot use general entities in an XML context
(although you define them in one). They can only be used in
your document. Contrast this with <link linkend="xml-primer-parameter-entities">parameter
entities</link>.</para>
<para>Each general entity has a name. When you want to
reference a general entity (and therefore include whatever
text it represents in your document), you write
<literal>&entity-name;</literal>.
For example, suppose you had an entity called
<literal>current.version</literal> which expanded to the
current version number of your product. You could
write:</para>
<programlisting><![CDATA[<para>The current version of our product is
¤t.version;.</para>]]></programlisting>
<para>When the version number changes you can simply change the
definition of the value of the general entity and reprocess
your document.</para>
<para>You can also use general entities to enter characters that
you could not otherwise include in an XML document. For
example, <literal><</literal> and <literal>&</literal>
cannot normally appear in an XML document. When the XML
parser sees the <literal><</literal> symbol it assumes that
a tag (either a start tag or an end tag) is about to appear,
and when it sees the <literal>&</literal> symbol it
assumes the next text will be the name of an entity.</para>
<para>Fortunately, you can use the two general entities
<literal>&lt;</literal> and <literal>&amp;</literal>
whenever you need to include one or other of these.</para>
<para>A general entity can only be defined within an XML
context. Typically, this is done immediately after the
DOCTYPE declaration.</para>
<example>
<title>Defining General Entities</title>
<programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [
<!ENTITY current.version "3.0-RELEASE">
<!ENTITY last.version "2.2.7-RELEASE">
]>]]></programlisting>
<para>Notice how the DOCTYPE declaration has been extended by
adding a square bracket at the end of the first line. The
two entities are then defined over the next two lines,
before the square bracket is closed, and then the DOCTYPE
declaration is closed.</para>
<para>The square brackets are necessary to indicate that we
are extending the DTD indicated by the DOCTYPE
declaration.</para>
</example>
</sect2>
<sect2 xml:id="xml-primer-parameter-entities">
<title>Parameter Entities</title>
<para>Like <link linkend="xml-primer-general-entities">general
entities</link>, parameter entities are used to assign names
to reusable chunks of text. However, whereas general entities
can only be used within your document, parameter entities can
only be used within an <link linkend="xml-primer-xml-escape">XML
context</link>.</para>
<para>Parameter entities are defined in a similar way to general
entities. However, instead of using
<literal>&entity-name;</literal>
to refer to them, use
<literal>%entity-name;</literal>
<footnote><para><emphasis>P</emphasis>arameter entities use
the <emphasis>P</emphasis>ercent
symbol.</para></footnote>. The definition also includes
the <literal>%</literal> between the <literal>ENTITY</literal>
keyword and the name of the entity.</para>
<example>
<title>Defining Parameter Entities</title>
<programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [
<!ENTITY % param.some "some">
<!ENTITY % param.text "text">
<!ENTITY % param.new "%param.some more %param.text">
]>]]></programlisting>
</example>
<para>This may not seem particularly useful. It will be.</para>
</sect2>
<sect2>
<title>For You to Do…</title>
<procedure>
<step>
<para>Add a general entity to
<filename>example.xml</filename>.</para>
<programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [
<!ENTITY version "1.1">
]>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>An Example XHTML File</title>
</head>
<body>
<p>This is a paragraph containing some text.</p>
<p>This paragraph contains some more text.</p>
<p align="right">This paragraph might be right-justified.</p>
<p>The current version of this document is: &version;</p>
</body>
</html>]]></programlisting>
</step>
<step>
<para>Validate the document using
<command>xmllint</command>.</para>
</step>
<step>
<para>Load <filename>example.xml</filename> into your web
browser (you may need to copy it to
<filename>example.html</filename> before your browser
recognizes it as an XHTML document).</para>
<para>Unless your browser is very advanced, you will not see
the entity reference <literal>&version;</literal>
replaced with the version number. Most web browsers have
very simplistic parsers which do not handle XML DTD
constructs. Furthermore, the closing <literal>]<</literal>
of the XML context are not recognized properly by browser and
will probably be rendered.</para>
</step>
<step>
<para>The solution is to <emphasis>normalize</emphasis> your
document using an XML normalizer. The normalizer reads
in valid XML and outputs equally valid XML which has
been transformed in some way. One of the ways in which
the normalizer transforms the XML is to expand all the
entity references in the document, replacing the entities
with the text that they represent.</para>
<para>You can use <command>xmllint</command> to do
this. It also has an option to drop the initial
DTD section so that the closing <literal>]<</literal>
does not confuse browsers:</para>
<screen>&prompt.user; <userinput>xmllint --noent --dropdtd example.xml > example.html</userinput></screen>
<para>You should find a normalized (i.e., entity references
expanded) copy of your document in
<filename>example.html</filename>, ready to load into your
web browser.</para>
</step>
</procedure>
</sect2>
</sect1>
<sect1 xml:id="xml-primer-include">
<title>Using Entities to Include Files</title>
<para>Entities (both
<link linkend="xml-primer-general-entities">general</link> and
<link linkend="xml-primer-parameter-entities">parameter</link>)
are particularly useful when used to include one file inside
another.</para>
<sect2 xml:id="xml-primer-include-using-gen-entities">
<title>Using General Entities to Include Files</title>
<para>Suppose you have some content for an XML book organized
into files, one file per chapter, called
<filename>chapter1.xml</filename>,
<filename>chapter2.xml</filename>, and so forth, with a
<filename>book.xml</filename> file that will contain these
chapters.</para>
<para>In order to use the contents of these files as the values
for your entities, you declare them with the
<literal>SYSTEM</literal> keyword. This directs the XML
parser to use the contents of the named file as the value of
the entity.</para>
<example>
<title>Using General Entities to Include Files</title>
<programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [
<!ENTITY chapter.1 SYSTEM "chapter1.xml">
<!ENTITY chapter.2 SYSTEM "chapter2.xml">
<!ENTITY chapter.3 SYSTEM "chapter3.xml">
]>
<html xmlns="http://www.w3.org/1999/xhtml">
&chapter.1;
&chapter.2;
&chapter.3;
</html>]]></programlisting>
</example>
<warning>
<para>When using general entities to include other files
within a document, the files being included
(<filename>chapter1.xml</filename>,
<filename>chapter2.xml</filename>, and so on)
<emphasis>must not</emphasis> start with a DOCTYPE
declaration. This is a syntax error because entities
are low-level constructs and they are resolved before
any parsing happens.</para>
</warning>
</sect2>
<sect2>
<title>Using Parameter Entities to Include Files</title>
<para>Recall that parameter entities can only be used inside an
XML context. Why then would you want to include a file
within an XML context?</para>
<para>You can use this to ensure that you can reuse your general
entities.</para>
<para>Suppose that you had many chapters in your document, and
you reused these chapters in two different books, each book
organizing the chapters in a different fashion.</para>
<para>You could list the entities at the top of each book, but
this quickly becomes cumbersome to manage.</para>
<para>Instead, place the general entity definitions inside one
file, and use a parameter entity to include that file within
your document.</para>
<example>
<title>Using Parameter Entities to Include Files</title>
<para>First, place your entity definitions in a separate file,
called <filename>chapters.ent</filename>. This file
contains the following:</para>
<programlisting><![CDATA[<!ENTITY chapter.1 SYSTEM "chapter1.xml">
<!ENTITY chapter.2 SYSTEM "chapter2.xml">
<!ENTITY chapter.3 SYSTEM "chapter3.xml">]]></programlisting>
<para>Now create a parameter entity to refer to the contents
of the file. Then use the parameter entity to load the file
into the document, which will then make all the general
entities available for use. Then use the general entities
as before:</para>
<programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [
<!ENTITY % chapters SYSTEM "chapters.ent">
%chapters;
]>
<html xmlns="http://www.w3.org/1999/xhtml">
&chapter.1;
&chapter.2;
&chapter.3;
</html>]]></programlisting>
</example>
</sect2>
<sect2>
<title>For You to Do…</title>
<sect3>
<title>Use General Entities to Include Files</title>
<procedure>
<step>
<para>Create three files, <filename>para1.xml</filename>,
<filename>para2.xml</filename>, and
<filename>para3.xml</filename>.</para>
<para>Put content similar to the following in each
file:</para>
<programlisting><![CDATA[<p>This is the first paragraph.</p>]]></programlisting>
</step>
<step>
<para>Edit <filename>example.xml</filename> so that it
looks like this:</para>
<programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [
<!ENTITY version "1.1">
<!ENTITY para1 SYSTEM "para1.xml">
<!ENTITY para2 SYSTEM "para2.xml">
<!ENTITY para3 SYSTEM "para3.xml">
]>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>An Example XHTML File</title>
</head>
<body>
<p>The current version of this document is: &version;</p>
¶1;
¶2;
¶3;
</body>
</html>]]></programlisting>
</step>
<step>
<para>Produce <filename>example.html</filename> by
normalizing <filename>example.xml</filename>.</para>
<screen>&prompt.user; <userinput>xmllint --dropdtd --noent example.xml > example.html</userinput></screen>
</step>
<step>
<para>Load <filename>example.html</filename> into your web
browser, and confirm that the
<filename>paran.xml</filename>
files have been included in
<filename>example.html</filename>.</para>
</step>
</procedure>
</sect3>
<sect3>
<title>Use Parameter Entities to Include Files</title>
<note>
<para>You must have taken the previous steps first.</para>
</note>
<procedure>
<step>
<para>Edit <filename>example.xml</filename> so that it
looks like this:</para>
<programlisting><![CDATA[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [
<!ENTITY % entities SYSTEM "entities.ent"> %entities;
]>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>An Example XHTML File</title>
</head>
<body>
<p>The current version of this document is: &version;</p>
¶1;
¶2;
¶3;
</body>
</html>]]></programlisting>
</step>
<step>
<para>Create a new file,
<filename>entities.ent</filename>, with this
content:</para>
<programlisting><![CDATA[<!ENTITY version "1.1">
<!ENTITY para1 SYSTEM "para1.xml">
<!ENTITY para2 SYSTEM "para2.xml">
<!ENTITY para3 SYSTEM "para3.xml">]]></programlisting>
</step>
<step>
<para>Produce <filename>example.html</filename> by
normalizing <filename>example.xml</filename>.</para>
<screen>&prompt.user; <userinput>xmllint --dropdtd --noent example.xml > example.html</userinput></screen>
</step>
<step>
<para>Load <filename>example.html</filename> into your web
browser, and confirm that the
<filename>paran.xml</filename>
files have been included in
<filename>example.html</filename>.</para>
</step>
</procedure>
</sect3>
</sect2>
</sect1>
<sect1 xml:id="xml-primer-marked-sections">
<title>Marked Sections</title>
<para>XML provides a mechanism to indicate that particular pieces
of the document should be processed in a special way. These are
termed <quote>marked sections</quote>.</para>
<example>
<title>Structure of A Marked Section</title>
<programlisting><![<replaceable>KEYWORD</replaceable>[
Contents of marked section
]]></programlisting>
</example>
<para>As you would expect, being an XML construct, a marked
section starts with <literal><!</literal>.</para>
<para>The first square bracket begins to delimit the marked
section.</para>
<para><replaceable>KEYWORD</replaceable> describes how this marked
section should be processed by the parser.</para>
<para>The second square bracket indicates that the content of the
marked section starts here.</para>
<para>The marked section is finished by closing the two square
brackets, and then returning to the document context from the
XGML context with <literal>></literal>.</para>
<sect2>
<title>Marked Section Keywords</title>
<sect3>
<title><literal>CDATA</literal></title>
<para>These keywords denote the marked sections
<emphasis>content model</emphasis>, and allow you to change
it from the default.</para>
<para>When an XML parser is processing a document it keeps
track of what is called the <quote>content
model</quote>.</para>
<para>Briefly, the content model describes what sort of
content the parser is expecting to see, and what it will do
with it when it finds it.</para>
<para>The content model you will probably find most
useful is <literal>CDATA</literal>.</para>
<para><literal>CDATA</literal> is for <quote>Character
Data</quote>. If the parser is in this content model then
it is expecting to see characters, and characters only. In
this model the <literal><</literal> and
<literal>&</literal> symbols lose their special status,
and will be treated as ordinary characters.</para>
<note>
<para>When you use <literal>CDATA</literal>
in examples of text marked up in
XML, keep in mind that the content of
<literal>CDATA</literal> is not validated. You have to
check the included XML text using other means. You
could, for example, write the example in another document,
validate the example code, and then paste it to your
<literal>CDATA</literal> content.</para>
</note>
<!-- The nesting of CDATA within the next example is disgusting -->
<example>
<title>Using a <literal>CDATA</literal> Marked
Section</title>
<programlisting><para>Here is an example of how you would include some text
that contained many <literal>&lt;</literal>
and <literal>&amp;</literal> symbols. The sample
text is a fragment of XHTML. The surrounding text (<para> and
<programlisting>) are from DocBook.</para>
<programlisting>
<![CDATA[<![CDATA[
<p>This is a sample that shows you some of the elements within
XHTML. Since the angle brackets are used so many times, it is
simpler to say the whole example is a CDATA marked section
than to use the entity names for the left and right angle
brackets throughout.</p>
<ul>
<li>This is a listitem</li>
<li>This is a second listitem</li>
<li>This is a third listitem</li>
</ul>
<p>This is the end of the example.</p>]]>
]]>
</programlisting></programlisting>
<para>If you look at the source for this document you will
see this technique used throughout.</para>
</example>
</sect3>
<sect3>
<title><literal>INCLUDE</literal> and
<literal>IGNORE</literal></title>
<para>If the keyword is <literal>INCLUDE</literal> then the
contents of the marked section will be processed. If the
keyword is <literal>IGNORE</literal> then the marked section
is ignored and will not be processed. It will not appear in
the output.</para>
<example>
<title>Using <literal>INCLUDE</literal> and
<literal>IGNORE</literal> in Marked Sections</title>
<programlisting><![INCLUDE[
This text will be processed and included.
]]>
<![IGNORE[
This text will not be processed or included.
]]></programlisting>
</example>
<para>By itself, this is not too useful. If you wanted to
remove text from your document you could cut it out, or wrap
it in comments.</para>
<para>It becomes more useful when you realize you can use
<link linkend="xml-primer-parameter-entities">parameter
entities</link> to control this, yet this usage is limited
to entity files.</para>
<para>For example, suppose that you produced a hard-copy
version of some documentation and an electronic version. In
the electronic version you wanted to include some extra
content that was not to appear in the hard-copy.</para>
<para>Create an entity file that defines general entities
to include each chapter and guard these definitions with
a parameter entity that can be set to either
<literal>INCLUDE</literal> or <literal>IGNORE</literal>
to control whether the entity is defined. After these
conditional general entity definitions, place one more
definition for each general entity to set them to an
empty value. This technique makes use of the fact that
entity definitions cannot be overridden but always the
first definition takes effect. So you can control the
inclusion of your chapter with the corrsponding parameter
entity; if you set it to <literal>INCLUDE</literal>, the
first general entity definition will be read and the
second one will be ignored but if you set it to
<literal>IGNORE</literal>, the first definition will be
ignored and the second one will take effect.</para>
<example>
<title>Using A Parameter Entity to Control a Marked
Section</title>
<programlisting>
<!ENTITY % electronic.copy "INCLUDE">
<![%electronic.copy;[
<!ENTITY chap.preface SYSTEM "preface.xml">
]]>
<!ENTITY chap.preface "">
</programlisting>
<para>When producing the hard-copy version, change the
parameter entity's definition to:</para>
<programlisting><!ENTITY % electronic.copy "IGNORE"></programlisting>
</example>
</sect3>
</sect2>
<sect2>
<title>For You to Do…</title>
<procedure>
<step>
<para>Modify the <filename>entities.ent</filename> file to contain
the following:</para>
<programlisting><!ENTITY version "1.1">
<!ENTITY % conditional.text "IGNORE">
<![%conditional.text;[
<!ENTITY para1 SYSTEM "para1.xml">
]]>
<!ENTITY para1 "">
<!ENTITY para2 SYSTEM "para2.xml">
<!ENTITY para3 SYSTEM "para3.xml"></programlisting>
</step>
<step>
<para>Normalize the <filename>example.xml</filename> file and notice
that the conditional text is not present on the output document.
Now if you set the parameter entity guard to <literal>INCLUDE</literal>
and regenerate the normalized document, it will appear there again.
Of course, this method makes more sense if you have more conditional
chunks that depend on the same condition, for example, whether you are
generating printed or online text.</para>
</step>
</procedure>
</sect2>
</sect1>
<sect1 xml:id="xml-primer-conclusion">
<title>Conclusion</title>
<para>That is the conclusion of this XML primer. For reasons of
space and complexity several things have not been covered in
depth (or at all). However, the previous sections cover enough
XML for you to be able to follow the organization of the FDP
documentation.</para>
</sect1>
</chapter>
|