CSG memo 144: Abstraction Mechanisms in CLU.

2026-02-26 08:53:29 +00:00 · 2021-08-26 13:22:30 +02:00
parent 76e5b7cb8b
commit 74c3facf09
12 changed files with 2294 additions and 0 deletions
--- a/build/timestamps.txt
+++ b/build/timestamps.txt
@@ -156,6 +156,17 @@ clib/-read-.-this- 198002261810.43
 clib/tv.128 197908312338.58
 clu/clu.order 197711161922.32
 clu/action.refman 197806022022.04
+clu/clukey.r 197909041308.12
+clu/clup0.r 197711261712.18
+clu/clup1.r 197711261712.27
+clu/clup2.r 197711261714.24
+clu/clup3.r 197711261712.44
+clu/clup4.r 197711261716.29
+clu/clup5.r 197711261713.02
+clu/clup6.r 197711261713.08
+clu/clup7.r 197711261713.18
+clu/clupap.header 197711261651.18
+clu/clupap.r 197711261651.32
 clu/clusym.r 197806271243.01
 clu/exampl.refman 197805301747.35
 clu/except.refman 197806061946.59
--- a/doc/clu/clukey.r
+++ b/doc/clu/clukey.r
@@ -0,0 +1,27 @@
+.
+ string registers for CLU keywords
+.
+.de bold
+.fr i 0 nargs-1
+.sr \\i 1\\i*
+.en
+.em
+.
+.bold any array
+.bold begin bool break
+.bold cand char cluster continue cor cvt
+.bold do down
+.bold else elseif end except exit
+.bold false for force
+.bold has
+.bold if in int is iter itertype
+.bold nil null
+.bold oneof others own
+.bold proc proctype
+.bold real record rep resignal return returns
+.bold sequence signal signals string struct
+.bold tag tagcase then true type
+.bold up
+.bold variant
+.bold when where while
+.bold yield yields
--- a/doc/clu/clup0.r
+++ b/doc/clu/clup0.r
@@ -0,0 +1,293 @@
+.nd started 0
+.nr do_refs 0
+.if ~started
+.nr do_refs 1
+.en
+.so clu/clupap.header
+.so r/ref3.rmac
+.if csg_memo==0
+.ls 1
+.if narrow
+.new_font 1
+.ef
+.new_font 3
+.en
+.nf c
+.vp 2i
+Abstraction Mechanisms in CLU
+.if narrow
+.new_font 0
+.ef
+.new_font 1
+.en
+.sp .5i
+Barbara Liskov
+Alan Snyder
+Russell Atkinson
+Craig Schaffert
+.sp .3i
+Laboratory for Computer Science
+Massachusetts Institute of Technology
+545 Technology Square
+Cambridge, MA 02139
+.sp 2
+.if ~narrow
+.vp 8.5i
+.en
+.nf l
+.fi
+.new_font 0
+This research was supported in part by the Advanced Research
+Projects Agency of the Department of Defense, monitored by the
+Office of Naval Research under contract N00014-75-C-0661, and
+in part by the National Science Foundation under grant DCR74-21892.
+.ls
+.end
+.if csg_memo>0
+.ls 1
+.nf c
+.new_font 3
+Massachusetts Institute of Technology
+Laboratory for Computer Science
+.new_font 0
+(formerly Project MAC)
+.sp 1.25i
+Computation Structures Group Memo csg_memo-1
+.sp 1.25i
+.new_font 4
+Abstraction Mechanisms in CLU
+.new_font 1
+.sp
+by
+.sp
+Barbara Liskov
+Alan Snyder
+Russell Atkinson
+Craig Schaffert
+.new_font 0
+.nf l
+.vp 8.25i
+.fi
+.new_font 0
+This research was supported in part by the Advanced Research
+Projects Agency of the Department of Defense, monitored by the
+Office of Naval Research under contract N00014-75-C-0661, and
+in part by the National Science Foundation under grant DCR74-21892.
+.nf c
+.sp .5i
+January 1977
+.nf l
+.fi
+.ls
+.end
+.
+.
+.if narrow
+.sp 2
+.new_font 1
+.ll 7i
+.ef
+.bp
+.rs
+.vp 3i
+.new_font 3
+.en
+ABSTRACT
+.new_font 0
+.sp
+.ns
+.para
+CLU is a new programming language designed to support
+the use of abstractions in program construction.
+Work in programming methodology has led to the realization
+that three kinds of abstractions,
+procedural, control, and especially data abstractions,
+are useful in the programming process.
+Of these, only the procedural abstraction
+is supported well by conventional languages,
+through the procedure or subroutine.
+CLU provides, in addition to procedures,
+novel linguistic mechanisms that
+support the use of data and control abstractions.
+.para
+This paper provides an introduction to the abstraction mechanisms
+in CLU.
+By means of programming examples, we illustrate the utility of
+the three kinds of abstractions in program construction
+and show how CLU programs may be written to use
+and implement abstractions.
+We also discuss the CLU library, which permits
+incremental program development with complete
+type-checking performed at compile-time.
+.sp
+.fi l
+Key words and phrases: programming languages, data types,
+data abstractions, control abstractions, programming
+methodology, separate compilation.
+.sp
+CR categories:  4.0, 4.12, 4.20, 4.22.
+.br
+.fi b
+.if narrow
+.ll
+.en
+.
+.ref All71
+Allen, F. E. and Cocke, J.
+A catalogue of optimizing transformations.
+Rep. RC 3548,
+IBM Thomas J. Watson Research Center,
+Yorktown Heights, N.@Y., 1971.
+.em
+.ref All75
+Allen, F. E.
+A program data flow analysis procedure.
+Rep. RC 5278,
+IBM Thomas J. Watson Research Center,
+Yorktown Heights, N.@Y., 1975.
+.em
+.ref Atk76
+Atkinson, R. R.
+Optimization techniques for a structured programming language.
+S.M. Thesis,
+Dept. of Electrical Engineering and Computer Science,
+M.@I.@T., Cambridge, Mass., June 1976.
+.em
+.ref Dah70
+Dahl, O. J., Myhrhaug, B., and Nygaard, K.
+The SIMULA 67 common base language.
+Publication S-22, Norwegian Computing Center, Oslo, 1970.
+.em
+.ref DK75
+DeRemer, F. and Kron, H.
+Programming-in-the-large versus programming-in-the-small.
+2Proceedings of International Conference on Reliable Software*,
+2SIGPLAN Notices 10*, 6 (June 1975), 114-121.
+.em
+.ref Dij72
+Dijkstra, E. W.  
+Notes on structured programming.
+2Structured Programming,
+A.P.I.C. Studies in Data Processing No. 8*,
+Academic Press, New York 1972, 1-81.
+.em
+.ref Guttag
+Guttag, J. V., Horowitz, E., and Musser, D. R.
+Abstract data types and software validation.
+Rep. ISI/RR-76-48, Information Sciences Institute,
+University of Southern California, Marina del Rey,
+Calif., August 1976.
+.em
+.ref Hoare72
+Hoare, C. A. R.
+Proof of correctness of data representations.
+2Acta Informatica*, 4 (1972), 271-281.
+.em
+.ref Knu73
+Knuth, D.
+2The Art of Computer Programming*, vol. 3.
+Addison Wesley, Reading, Mass., 1973.
+.em
+.ref LCS75
+2Laboratory for Computer Science Progress Report 1974-1975*,
+Computation Structures Group.
+Rep. PR-XII,
+Laboratory for Computer Science, M.@I.@T.,
+to be published.
+.em
+.ref Lam71
+Lampson, B. W.
+Protection.
+Proc. Fifth Annual Princeton Conference on Information
+Sciences and Systems, Princeton University, 1971, 437-443.
+.em
+.ref Lis74
+Liskov, B. H. and Zilles, S. N.
+Programming with abstract data types.
+Proc. ACM SIGPLAN Conference on Very High Level Languages,
+2SIGPLAN Notices 9*, 4 (April 1974), 50-59.
+.em
+.ref Lis75
+Liskov, B. H. and Zilles, S. N.
+Specification techniques for data abstractions.
+2IEEE Trans. on Software Engineering*, 2SE-1*,
+(1975), 7-19.
+.em
+.ref Lis76
+Liskov, B. H. and Berzins, V.
+An appraisal of program specifications.
+Computation Structures Group Memo 141,
+Laboratory for Computer Science,
+M.@I.@T., Cambridge, Mass., July 1976.
+.em
+.ref McC62
+McCarthy, J., et al.
+2LISP 1.5 Programmer's Manual*, MIT Press, 1962.
+.em
+.ref Mor73
+Morris, J. H.
+Protection in programming languages.
+2Comm. ACM 16*, 1 (Jan 1973), 15-21.
+.em
+ .ref Mor74
+ Morris, J. H.
+ Toward more flexible type systems.
+ Proceedings of the Programming Symposium, Paris, April 9-11, 1974,
+ 2Lecture Notes in Computer Science 19*, Springer-Verlag, New York, 
+ 377-384.
+ ..
+.ref Par71
+Parnas, D. L.
+Information distribution aspects of design methodology.
+Proc. IFIP 1971.
+.em
+.ref Sch76
+Scheifler, R. W.
+An analysis of inline substitution for the CLU programming language.
+Computation Structures Group Memo 139,
+Laboratory for Computer Science,
+M.@I.@T., Cambridge, Mass., June 1976.
+.em
+.ref Spitzen
+Spitzen, J. and Wegbreit, B.
+The verification and synthesis of data structures.
+2Acta Informatica*, 4 (1975), 127-144.
+.em
+.ref Standish
+Standish, T. A.
+2Data structures:  an axiomatic approach*.
+Rep. 2639, Bolt Beranek and Newman, Cambridge,
+Mass., 1973.
+.em
+.ref Thomas
+Thomas, J. W.
+Module interconnection in programming systems supporting
+abstraction.
+Rep. CS-16, Computer Science Program, Brown University,
+Providence, R.@I., 1976.
+.em
+.ref Wir71a
+Wirth, N.
+Program development by stepwise refinement.
+2Comm. ACM 14*, 4 (1971), 221-227.
+.em
+.ref Wir71b
+Wirth, N.
+The programming language PASCAL.
+2Acta Informatica*, 1 (1971), 35-63.
+.em
+.ref Wul84
+Wulf, W. A., London, R., and Shaw, M.
+An introduction to the construction and verification
+of Alphard programs.
+2IEEE Transactions on Software Engineering SE-2*,
+(1976), 253-264.
+.em
+.bp
+.if do_refs
+.insert_refs
+.en
+.if narrow
+.rs
+.sp 3i
+.en
--- a/doc/clu/clup1.r
+++ b/doc/clu/clup1.r
@@ -0,0 +1,212 @@
+.so clu/clupap.header
+.chapter "Introduction"
+.para
+The motivation for the design of the CLU programming
+language was to provide programmers with a tool that would
+enhance their effectiveness in constructing programs of
+high quality -- programs that are reliable and reasonably
+easy to understand, modify, and maintain.
+CLU aids programmers
+by providing constructs that support
+the use of abstractions in program design and implementation.
+.para
+The quality of software depends primarily on
+the programming methodology in use.
+The choice of programming language, however, can have a major impact on
+the effectiveness of a methodology.
+A methodology can be easy
+or difficult to apply in a given language, depending on
+how well the language constructs match the 
+structures that the methodology deems desirable.
+The presence of constructs that give a concrete form
+for the desired structures makes the methodology more understandable.
+In addition, a programming language influences the way that
+its users think about programming;
+matching a language to a methodology increases the likelihood that
+the methodology will be used.
+.para
+CLU has been designed to support a methodology
+(similar to
+[Dij72,@Wir71a])
+in which programs are developed by
+means of problem decomposition based on the recognition 
+of abstractions.
+A program is constructed in many
+stages.
+At each stage, the problem to be solved is
+how to implement some abstraction (the initial problem
+is to implement the abstract behavior required of the
+entire program).
+The implementation is developed by envisioning a number
+of subsidiary abstractions (abstract objects and
+operations) that are useful in the problem domain.
+Once the behavior of the abstract objects and operations
+has been defined, a program can be written to solve the
+original problem; in this program, the abstract objects
+and operations are used as primitives.
+Now the original
+problem has been solved, but new problems have arisen,
+namely, how to implement the subsidiary abstractions.
+Each of these abstractions is
+considered in turn as a new problem; its implementation
+may introduce further abstractions.
+This process 
+terminates when all the abstractions introduced at various
+stages have been implemented or are present in the
+programming language in use.
+.para
+In this methodology, programs are developed
+incrementally, one abstraction at a time.
+Further, a distinction is made between an abstraction,
+which is a kind of behavior, and a program,
+or 2module*, which implements that behavior.
+An abstraction isolates
+use from implementation:  an abstraction can be used
+without knowledge of its implementation and implemented
+without knowledge of its use.
+These aspects of the methodology are supported by the
+CLU 2library*, which maintains
+information about abstractions
+and the CLU modules that implement them.
+The library permits separate compilation of
+modules with complete type-checking at
+compile-time.
+.para
+To make effective use of the
+methodology, it is necessary to understand the kinds
+of abstractions that are useful in constructing programs.
+In studying this question,
+we identified an important kind of abstraction,
+the data abstraction, that
+had been largely neglected in discussions of programming methodology.
+.para
+A data abstraction [Hoare72,@Lis74,@Standish]
+is used to introduce a new
+type of data object that is deemed useful
+in the domain of the problem being solved.
+At the level of use, the programmer is
+concerned with the 2behavior* of these data objects,
+what kinds of information can be stored in them and
+obtained from them.
+The programmer is 2not* concerned
+with how the data objects are represented in storage,
+nor with the algorithms used to store and access
+information in them.
+In fact, a data abstraction is
+often introduced to delay such implementation
+decisions until a later stage of design.
+.para
+The behavior of the data objects is expressed most
+naturally in terms of a set of operations that are meaningful
+for those objects.
+This set will include operations
+to create objects, to obtain information from them,
+and possibly to modify them.
+For example,
+push and pop are among the meaningful operations for stacks,
+while meaningful operations for integers include the usual
+arithmetic operations.
+Thus, a data abstraction consists of a
+set of objects and a set of operations
+characterizing the behavior of the
+objects.
+.para
+If a data abstraction is to be
+understandable at an abstract level,
+the behavior of the data objects must be
+2completely* characterized by the set of operations.
+This property is ensured by making the operations the
+2only direct means* of creating and manipulating the objects.
+One effect of this restriction
+is that, when defining an abstraction,
+the programmer must be careful to include a
+sufficient set of operations, since every action
+he wishes to perform on the objects must be
+realized in terms of this set.
+.para
+We have identified the following requirements that must be
+satisfied by a language supporting data abstractions:
+.ilist 3
+1.	A linguistic construct is needed that permits
+a data abstraction to be implemented as a unit.
+The implementation involves selecting a representation
+for the data objects and defining an algorithm for each
+operation in terms of that representation.
+.next
+2.	The language must limit access to the
+representation to just the operations.  This limitation
+is necessary to ensure that the operations completely
+characterize the behavior of the objects.
+.end_list
+CLU satisfies these requirements by providing a linguistic construct
+called a 2cluster* for implementing data abstractions.
+Data abstractions are integrated into the language
+through the data type mechanism.
+Access to the representation is
+controlled by type-checking, which is done at 
+compile time.
+.para
+In addition to data abstractions, CLU
+supports two other kinds of abstractions:
+procedural abstractions and control abstractions.
+A procedural abstraction performs a computation on a
+set of input objects and produces a set of output objects;
+examples of procedural abstractions are sorting an
+array and computing a square root.
+CLU supports procedural abstractions by means of procedures,
+which are similar to procedures in other programming languages.
+.para
+A control abstraction defines a method
+for sequencing arbitrary actions.
+All languages provide built-in control abstractions;
+examples are the if statement and the while statement.
+In addition, however,
+CLU allows user definitions of a simple kind of control abstraction.
+The method provided is a generalization of the
+repetition methods available in many programming
+languages.
+Frequently the programmer desires to
+perform the same action for all the objects in a
+collection, such as all
+characters in a string or all items in a set.
+CLU
+provides a linguistic construct called an 2iterator*
+for defining how the objects in the
+collection are obtained.
+The iterator is used in
+conjunction with the for statement; the body
+of the for statement describes the action to be
+taken.
+.para
+The purpose of this paper is to illustrate
+the utility of the three kinds of abstractions
+in program construction,
+and to provide an informal introduction to CLU.
+We do not attempt a complete description of the language;
+rather, we concentrate on the constructs that
+support abstractions.
+The presence of these
+constructs constitutes the most important way in
+which CLU differs from other languages.
+The language closest to CLU is Alphard [Wul84],
+which represents a concurrent design effort with goals similar to
+our own.
+The design of CLU has been influenced by
+SIMULA 67 [Dah70], and to a lesser extent by
+Pascal [Wir71b] and LISP [McC62].
+.para
+In the next section we introduce CLU and,
+by means of a programming example,
+illustrate the use and implementation
+of data abstractions.
+Section semantics describes the basic semantics of CLU.
+In Section more_abstraction, we discuss
+control abstractions and more powerful kinds of
+data abstractions.
+We present the CLU library in Section library.
+Section implementation briefly describes
+the current implementation of CLU
+and discusses efficiency considerations.
+.ne 2
+Finally, we conclude by discussing
+the quality of CLU programs.
--- a/doc/clu/clup2.r
+++ b/doc/clu/clup2.r
@@ -0,0 +1,595 @@
+.nd chapter 2-1
+.so clu/clupap.header
+.
+ string registers for italic variable names
+.
+.sr i 2i*
+.sr s 2s*
+.sr o 2o*
+.sr c 2c*
+.sr n 2n*
+.sr t 2t*
+.sr r 2r*
+.sr x 2x*
+.sr tr 2tr*
+.sr w 2w*
+.sr wb 2wb*
+.sr total 2total*
+.sr contents 2contents*
+.sr count_words 2count_words*
+.sr next_word 2next_word*
+.sr wordbag 2wordbag*
+.sr wordtree 2wordtree*
+.sr wordbags 2wordbags*
+.sr wordtrees 2wordtrees*
+.sr insert 2insert*
+.sr create 2create*
+.sr print 2print*
+.sr instream 2instream*
+.sr instreams 2instreams*
+.sr outstream 2outstream*
+.sr outstreams 2outstreams*
+.
+.chapter "An Example of Data Abstraction"
+.para
+This section introduces the basic data
+abstraction mechanism of CLU, the cluster.
+By means of an example, we intend to show how
+abstractions occur naturally in program design,
+and how they are used and implemented in CLU.
+In particular, we show how a data abstraction
+can be used as structured intermediate storage.
+.para
+Consider the following problem:
+Given some document, we wish to compute,
+for each distinct word in the document,
+the number of times the word occurs
+and its frequency of occurrence as a percentage of the total
+number of words.
+The document will be
+represented as a sequence of characters.
+A word is any non-empty sequence of
+alphabetic characters.
+Adjacent words are
+separated by one or more non-alphabetic
+characters such as spaces, punctuation, or newline
+characters.
+In recognizing distinct words, the
+difference between upper and lower case letters should
+be ignored.
+.para
+The output is also to be a sequence of characters,
+divided into lines.
+Successive lines should contain an alphabetical
+list of all the distinct words in the document,
+one word per line.
+Accompanying each word should
+be the total number of occurrences and the
+.ne 5
+frequency of occurrence.  For example:
+.table
+.ta 8 20 28
+	a	2	3.509%
+	access	1	1.754%
+	and	2	3.509%
+		dots
+.rtabs
+.end_table
+.para
+Specifically, we are required to write the
+procedure count_words, which takes two arguments:
+an instream and an outstream.
+The former is the
+source of the document to be processed, and the latter
+is the destination of the required output.
+.ne 5
+The form of this procedure will be
+.code
+	count_words = proc (i: instream, o: outstream);
+		dots
+		end count_words;
+.end_code
+Note that count_words does not return any results;
+its only effects are modifications of i (reading the entire
+document) and of o (printing the required statistics).
+.para
+2Instream* and outstream are data abstractions.
+An instream i contains a sequence of characters.
+Of the primitive
+operations on instreams, only two will be of interest to us.
+2Empty@(i)* returns true if there are no characters available
+in i, and returns false otherwise.
+2Next@(i)* removes the first character from the sequence
+and returns it.
+Invoking the next operation on an empty instream is an
+error.
+.foot
+The CLU error handling mechanism is discussed in [LCS75].
+.efoot
+An outstream also contains a sequence of characters.
+The interesting operation on outstreams is
+2put_string@(s,@o)*,
+which appends the string s to the existing sequence of characters
+in o.
+.para
+Now consider how we might implement count_words.
+We begin by deciding how to handle words.
+We could define a new abstract data type 2word*.
+However, we choose instead to use strings (a primitive
+CLU type), with the restriction that only strings of
+lower-case alphabetic characters will be used.
+.foot
+Sometimes it is difficult to decide whether to introduce
+a new data abstraction or to use an existing abstraction.
+Our decision to use strings to represent words was made
+partly to shorten the presentation.
+.efoot
+.para
+Next, we investigate how to scan the document.
+Reading a word requires knowledge of the
+exact way in which words occur in the input stream.
+We choose to isolate this information in a procedural abstraction,
+called next_word,
+which takes in the instream i and returns the next word
+(converted to lower case characters) in the document.
+If there are no more words,
+next_word must communicate this fact to count_words.
+A simple way to indicate that there are no
+more words is by returning an ``end of document'' word,
+one that is distinct from any other word.
+A reasonable choice for the ``end of document'' word is
+the empty string.
+.para
+It is clear that in count_words we must scan the
+entire document before we can print our results, and
+therefore, we need some receptacle
+to retain information about words between these two
+actions (scanning and printing).
+Recording the
+information gained in the scan and organizing it
+for easy printing will probably be fairly complex.
+Therefore, we will defer such considerations until later
+by introducing a data abstraction wordbag with the
+appropriate properties.
+In particular, wordbag provides
+three operations:  create, which creates an empty wordbag;
+insert, which adds a word to the wordbag; and print, which
+prints the desired statistical information about the words
+in the wordbag.
+.foot
+The print operation is not the ideal choice, but a better
+solution requires the use of control abstractions.
+This solution is presented in Section more_abstraction.
+.efoot
+.nr count_words current_figure
+.para
+The implementation of count_words is shown in
+Figure count_words.
+.begin_figure "The count_words procedure."
+.code
+count_words = proc (i: instream, o: outstream);
+
+	% create an empty wordbag
+	wb: wordbag := wordbag$create ();
+
+	% scan document, adding each word found to wb
+	w: string := next_word (i);
+	while w ~= "" do
+		wordbag$insert (wb, w);
+		w := next_word (i);
+		end;
+
+	% print the wordbag
+	wordbag$print (wb, o);
+
+	end count_words;
+.ns
+.end_code
+.finish_figure
+The ``%'' character starts a comment,
+which continues to the end of the line.
+The ``~'' character stands for boolean negation.
+The notation 2variable:@type* is used
+in formal argument lists and declarations
+to specify the types of variables;
+a declaration may be combined with an assignment
+specifying the initial value of the variable.
+Boldface is used for reserved words, including the
+names of primitive CLU types.
+ CLU does not permit
+ redefinition of the primitive types; however,
+ primitive types are used in the same way as abstract
+ types.
+.para
+The count_words procedure declares four variables:
+i, o, wb, and w.
+The first two denote the instream and
+outstream that are passed as arguments to count_words.
+The third, wb, denotes the wordbag used to hold
+the words read so far,
+and the fourth, w, the word
+currently being processed.
+.para
+Operations of a data abstraction are named by
+a compound form that specifies both the type and
+the operation name.  Three examples of operation calls
+appear in count_words: 2wordbag$create@()*,
+2wordbag$insert@(wb,@w)*
+and 2wordbag$print@(wb,@o)*.
+The CLU system provides a mechanism that avoids conflicts
+between names of abstractions; this mechanism is discussed in
+Section library.
+However, operations of two different data abstractions may have
+the same name;
+the compound form serves to resolve this ambiguity.
+Although the ambiguity could in most cases be resolved by context,
+we have found in using CLU that the compound
+form enhances the readability of programs.
+.nr next_word current_figure
+.para
+The implementation of next_word is shown in
+Figure next_word.
+.begin_figure "The next_word procedure."
+.code
+next_word = proc (i: instream) returns (string);
+
+	c: char := 1' '*;
+
+	% scan for first alphabetic character
+	while ~alpha (c) do
+		if instream$empty (i)
+			then return "";
+			end;
+		c := instream$next (i);
+		end;
+
+	% accumulate characters in word
+	w: string := "";
+	while alpha (c) do
+		w := string$append (w, c);
+		if instream$empty (i)
+			then return w;
+			end;
+		c := instream$next (i);
+		end;
+
+	return w;	% the non-alphabetic character c is lost
+
+	end next_word;
+.ns
+.end_code
+.finish_figure
+The 2string$append* operation creates a new string
+by appending a character to the characters in the
+string argument
+(it does 2not* modify the string argument).
+Note the use of the instream operations
+2next* and 2empty*.
+Note also that two additional procedures have been used:
+2alpha@(c)*,
+which tests whether a character is alphabetic or not,
+and 2lower_case@(c)*,
+which returns the lower case version of a character.
+The implementations of these procedures are not shown in the paper.
+.para
+Now we must implement the type wordbag.
+.ne 5
+The cluster will have the form
+.code
+	wordbag = cluster is create, insert, print;
+		dots
+		end wordbag;
+.end_code
+This form expresses the idea that the data abstraction is a set
+of operations as well as a set of objects.
+The cluster must
+provide a representation for objects of the type wordbag and
+an implementation for each of the operations.
+We are free to choose from the possible representations the
+one best suited to our use of the wordbag cluster.
+.para
+The representation that we choose should allow
+reasonably efficient storage of words and easy printing,
+in alphabetic order, of the words and associated statistics.
+For efficiency in computing the statistics, maintaining
+a count of the total number of words in the document
+would be helpful.
+Since the total number of words in the document is probably
+much larger than the number of distinct words, the
+representation of a wordbag should contain only one ``item'' for
+each distinct word (along with a multiplicity count), rather
+than one ``item'' for each occurrence.
+This choice of representation requires that, at
+each insertion, we check whether the new word is already
+present in the wordbag.
+We would like a representation that
+allows the search for a matching ``item'' and the insertion of a
+not-previously-present ``item'' to be efficient.
+A binary tree representation [Knu73] fits our requirements nicely.
+.para
+Thus the main part of the wordbag representation will
+consist of a binary tree.
+The binary tree is another data abstraction,
+wordtree.  The data abstraction wordtree
+provides operations very similar to those of wordbag:
+2create@()* returns an empty wordtree;
+2insert@(tr,@w)* returns a wordtree containing all the
+words in the wordtree tr plus the additional word w
+(the wordtree tr may be modified in the process);
+and 2print@(tr,@n,@o)* prints the contents of the
+wordtree tr in alphabetic order on outstream o, along with the
+number of occurrences and the frequency (based on a total of
+n words).
+.nr wordbag current_figure
+.para
+The implementation of wordbag is given in Figure wordbag.
+.begin_figure "The wordbag cluster."
+.code
+wordbag = cluster is
+	create,		% create an empty bag
+	insert,		% insert an element
+	print;		% print contents of bag
+
+	rep = record [contents: wordtree, total: int];
+
+create = proc () returns (cvt);
+	return rep${contents: wordtree$create (), total: 0};
+	end create;
+
+insert = proc (x: cvt, v: string);
+	x.contents := wordtree$insert (x.contents, v);
+	x.total := x.total + 1;
+	end insert;
+
+print = proc (x: cvt, o: outstream);
+	wordtree$print (x.contents, x.total, o);
+	end print;
+
+end wordbag;
+.ns
+.end_code
+.finish_figure
+Following the header, we find the definition of the
+.ne 3
+representation selected for wordbag objects:
+.code
+	rep = record [contents: wordtree, total: int];
+.end_code
+The reserved type identifier rep indicates that the type
+specification to the right of the equal sign is the representing
+type for the cluster.
+We have defined the representation of a wordbag object to
+consist of two pieces:  a wordtree,
+as explained above, and an integer, which records the total
+number of words in the wordbag.
+.para
+A CLU record is an object with one or more named
+components.
+For each component name, there is an operation to select
+and an operation to set the corresponding component.
+The operation 2get_n@(r)* returns the n component
+of the record r (this operation is usually
+abbreviated 2r.n*).
+The operation 2put_n@(r,@x)* makes x the n component
+of the record r (this operation is usually
+abbreviated 2r.n@*:=2@x*,
+by analogy with the assignment statement).
+A new record is created by an expression of the form
+type${name1: value1, dots}.
+.para
+There are two different 
+types associated with any cluster:  the abstract
+type being defined (wordbag in this case) and the
+representation type (the record).
+Outside of the cluster,
+type-checking will ensure that a wordbag object will always be
+treated as such.
+In particular, the ability to convert a wordbag object into its
+representation is not provided (unless one of the
+wordbag operations does so explicitly).
+.para
+Inside the cluster, however, it is necessary to view
+a wordbag object as being of the representation type,
+because the implementations of the
+operations are defined in terms of the representation.
+This change of viewpoint is signalled by having the
+reserved word cvt appear as the type of an
+argument (as in the insert and print operations).
+1Cvt* may also appear as a return type
+(as in the create operation);
+here it indicates that a returned object
+will be changed into an object of abstract type.
+Whether cvt appears as the type of an
+argument or as a return type,
+it stipulates a ``conversion'' of viewpoint
+between the external abstract type and the internal representation type.
+1Cvt* can be used only within a cluster,
+and conversion can be done only between the single abstract
+type being defined and the (single) representation type.
+.foot
+1Cvt* corresponds to Morris' seal and unseal [Mor73],
+except that 1cvt* represents a change in viewpoint only;
+no computation is required.
+.efoot
+.para
+The procedures in wordbag are very simple.
+2Create* builds a new instance of the rep by use of the
+.ne 3
+record constructor
+.code
+	rep${contents: wordtree$create (), total: 0}
+.end_code
+Here total is initialized to 0, and contents to the
+empty wordtree (by calling the create operation of wordtree).
+This rep object is converted to a wordbag object as it
+is being returned.
+2Insert* and print are implemented directly
+in terms of wordtree operations.
+.nr wordtree current_figure
+.para
+The implementation of wordtree is shown in Figure wordtree.
+In the wordtree representation, each node
+contains a word and the number of times that word has been
+inserted into the wordbag, as well as two subtrees.
+.begin_page_figure "The wordtree cluster."
+.code
+wordtree = cluster is
+	create,		% create empty contents
+	insert,		% add item to contents
+	print;		% print contents
+
+	node = record [m!value: string, count: int,
+(mark!m)lesser: wordtree, greater: wordtree];
+	rep = oneof [empty: null, non_empty: node];
+
+create = proc () returns (cvt);
+	return rep$make_empty (nil);
+	end create;
+
+insert = proc (x: cvt, v: string) returns (cvt);
+	tagcase x
+		tag empty:
+			n: node := node${m!value: v, count: 1,
+(mark!m)lesser: wordtree$create (),
+(mark!m)greater: wordtree$create ()};
+			return rep$make_non_empty (n);
+		tag non_empty (n: node):
+			if v = n.value
+					then n.count := n.count + 1;
+				elseif v < n.value
+					then n.lesser := wordtree$insert (n.lesser, v);
+				else n.greater := wordtree$insert (n.greater, v);
+				end;
+			return x;
+		end;
+	end insert;
+
+print = proc (x: cvt, total: int, o: outstream);
+	tagcase x
+		tag empty: ;
+		tag non_empty (n: node):
+			wordtree$print (n.lesser, total, o);
+			print_word (n.value, n.count, total, o);
+			wordtree$print (n.greater, total, o);
+		end;
+	end print;
+
+end wordtree;
+.ns
+.end_code
+.finish_figure
+For any
+particular node, the words in the ``lesser'' subtree must
+alphabetically precede the word in the node, and the words
+in the ``greater'' subtree must follow the word in the node.
+.ne 4
+This information is described by 
+.code
+	node = record [m!value: string, count: int,
+(mark!m)lesser: wordtree, greater: wordtree];
+.end_code
+which defines ``node'' to be an
+abbreviation for the information following
+the equal sign.
+(The reserved word rep is used similarly,
+as an abbreviation for the representation type.)
+.para
+Now consider the representation of wordtrees.
+A non-empty wordtree can be represented by its top node.
+An empty wordtree, however, contains no information.
+The ideal type to represent an empty wordtree
+is the CLU type null,
+which has a single data object nil.
+So the representation of a wordtree should
+be either a node or nil.
+.ne 3
+This representation is expressed by
+.code
+	rep = oneof [empty: null, non_empty: node];
+.end_code
+.para
+Just as the record is the basic CLU
+mechanism to form an object
+that is a collection of other objects,
+the oneof is the basic CLU mechanism to form an object
+that is ``one of'' a set of alternatives.
+Oneof is CLU's method of forming a
+discriminated union, and is somewhat similar to 
+a variant component of a record in Pascal [Wir71b].
+.para
+An object of the type oneof@[s1:@T1 dots sn:@Tn]
+can be thought of as a pair.
+The ``tag'' component is an
+identifier from the set {s1 dots sn}.
+The ``value''
+component is an object of the type corresponding to the
+tag.
+That is, if the tag component is si then the
+value is some object of type Ti.
+.para
+Objects of type oneof@[s1:@T1 dots sn:@Tn]
+are created by the operations 2make_si@(x)*, each of
+which takes an object x of type Ti
+and returns the pair <si,@x>.
+Because the type of the value component of a oneof object is not
+known at compile-time, allowing direct access
+to the value component
+could result in a run-time type error (e.g., assigning an object
+to a variable of the wrong type).  To eliminate this possibility,
+.ne 7
+we require the use of a special tagcase statement to decompose
+a oneof object:
+.code
+	tagcase e
+		tag s1 (id1: T1):  @@@m!statements dots
+			dots
+		tag sn (idn: Tn):(mark!m)statements dots
+		end;
+.end_code
+This statement evaluates the expression 2e*
+to obtain an object of type
+oneof@[s1:@T1@dots@sn:@Tn].
+If the tag is si,
+then the value is assigned to the new variable
+idi and the statements following the ith alternative
+are executed.
+The variable idi is local to those statements.
+If, for some reason, we do not need the value,
+we can omit the parenthesized variable declaration.
+.para
+The reader should now know enough to understand
+Figure wordtree.
+Note, in the create operation, the use
+of the construction operation 2make_empty*
+of the representation type of wordtree
+(the discriminated union oneof@[empty:@null,@non-empty:@node])
+to create the empty wordtree.
+The tagcase statement is used in both insert and print.
+Note that if insert is given an empty wordtree, it creates a
+new top node for the returned value,
+but if insert is given a non-empty wordtree,
+it modifies the given wordtree and returns it.
+.foot
+It is necessary for insert to return a value in addition to
+having a side-effect because, in the case of an empty wordtree
+argument, side-effects are not possible.  Side-effects are not
+possible because of the representation chosen for the empty
+wordtree and because of the CLU parameter passing mechanism
+(see Section semantics).
+.efoot
+The insert operation depends on the dynamic
+allocation of space for newly-created records (see
+Section semantics).
+.para
+The print operation uses the obvious recursive descent.
+It makes use of procedure
+2print_word@(w,@c,@t,@o)*, which generates a single line of
+output on 2o*, consisting of the word 2w*,
+the count 2c*, and the frequency of occurrence
+derived from 2c* and 2t*.
+The implementation of 2print_word* has been omitted.
+.para
+We have now completed
+our first discussion of the count_words procedure.
+We return to this problem in Section more_abstraction,
+where we present a superior solution.
--- a/doc/clu/clup3.r
+++ b/doc/clu/clup3.r
@@ -0,0 +1,181 @@
+.nd chapter 3-1
+.nd current_figure 5
+.nd wordbag 3
+.so clu/clupap.header
+.sr m 2m*
+.sr p 2p*
+.sr q 2q*
+.sr x 2x*
+.sr y 2y*
+.sr z 2z*
+.sr a 2a*
+.sr b 2b*
+.sr insert 2insert*
+.sr increment 2increment*
+.chapter "Semantics"
+.para
+All languages present their users with some model of computation.
+This section describes those aspects of CLU semantics that differ
+from the common ALGOL-like model.
+In particular, we discuss
+CLU's notions of objects and variables,
+and the definitions of assignment and argument passing that
+follow from these notions.
+We also discuss type correctness.
+.section "Objects and Variables"
+.para
+The basic elements of CLU semantics are
+2objects* and 2variables*.
+Objects are the data entities that are created and manipulated
+by CLU programs.
+Variables are just the names used in a
+program to refer to objects.
+.para
+In CLU, each object has a particular 2type*,
+which characterizes its behavior.
+A type defines a set of operations
+that create and manipulate objects of that type.
+An object
+may be created and manipulated only via the operations of its type.
+.para
+An object may 2refer* to objects.
+For example,
+a record object refers to the objects that are the components
+of the record.
+This notion is one of logical, not physical, containment.
+In particular, it is possible for two distinct record objects to
+refer to (or 2share*) the same component object.
+In the case of a cyclic structure, it is even possible for an object
+to ``contain'' itself.
+Thus, it is possible to have recursive data
+structure definitions and shared data objects without explicit
+reference types.
+The 2wordtree* type described in the previous
+section is an example of a recursively-defined data structure.
+(This notion of object is similar to that in LISP.)
+.para
+CLU objects exist independently of procedure activations.
+Space for objects is allocated from a dynamic storage area
+as the result of invoking
+constructor operations of certain primitive CLU types.
+For example,
+the record constructor is used in the implementation of 2wordbag*
+(Figure wordbag) to acquire space for new 2wordbag* objects.
+In theory, all objects continue to exist forever.
+In practice,
+the space used by an object may be reclaimed when that object is
+no longer accessible to any CLU program.
+.foot
+An object is accessible if it is denoted by a variable of an active
+procedure or is a component of an accessible object.
+.efoot
+.para
+An object may exhibit time-varying behavior.
+Such an object, called a 2mutable* object,
+has a state which may be modified by certain operations
+without changing the identity of the object.
+Records are examples of mutable objects.
+The record update operations (2put_s (r,@v)*,
+written as 2r*.2s*@:=@2v*
+in the examples) change the state of record objects and
+therefore affect the behavior of subsequent applications of
+the select operations (2get_s (r)*, written as 2r*.2s*).
+The 2wordbag* and 2wordtree* types are additional examples
+of types with mutable objects.
+.para
+If a mutable object m is shared by two other objects x and y,
+then a modification to m made via x will be visible when m is
+examined via y.
+Communication through
+shared mutable objects is most beneficial in the context
+of procedure invocation, described below.
+.para
+Objects that do not exhibit time-varying behavior are called
+2immutable* objects, or 2constants*.
+Examples of constants are integers, booleans,
+characters, and strings.
+The value of a constant object can not be modified.
+For example,
+new strings may be computed from old ones,
+but existing strings do not change.
+Similarly,
+none of the integer operations
+modify the integers passed to them as arguments.
+.para
+Variables are names used in CLU programs to 2denote*
+particular objects at execution time.
+Unlike variables in many common programming languages,
+which 2are* objects that 2contain* values,
+CLU variables are simply names
+that the programmer uses to refer to objects.
+As such, it is possible for two variables to denote
+(or 2share*) the same object.
+CLU variables are much like those in LISP,
+and are similar to pointer variables in other languages.
+However, CLU variables are 2not* objects;
+they cannot be denoted by other variables or referred to by objects.
+Thus, variables are completely private to the procedure
+in which they are declared,
+and cannot be accessed or modified by any other procedure.
+.section "Assignment and Procedure Invocation"
+.para
+The basic actions in CLU are 2assignment* and
+2procedure invocation*.
+The assignment primitive 2x*@:=@2E*, where x is a variable
+and 2E* is an expression, causes x to denote
+the object resulting from the evaluation of 2E*.
+For example,
+if 2E* is a simple variable y, then the assignment x@:=@y
+causes x to denote the object denoted by y.
+The object
+is 2not* copied; after the assignment is performed, it will be
+2shared* by x and y.
+Assignment does not affect
+the state of any object.
+(Recall that 2r*.2s*@:=@2v* is not a true assignment,
+but an abbreviation for 2put_s@(r,@v)*.)
+.para
+Procedure invocation involves passing argument objects
+from the caller to the called procedure and returning result
+objects from the procedure to the caller.
+The formal arguments
+of a procedure are considered to be local variables of the procedure,
+and are initialized, by assignment, to the objects resulting from the
+evaluation of the argument expressions.  Thus, argument
+objects are shared between the caller and the called procedure.
+A procedure may modify mutable argument objects (e.g., records),
+but of course it cannot modify immutable ones (e.g., integers).
+A procedure has no access to the variables of its caller.
+.para
+Procedure invocations may be
+used directly as statements; those
+that return objects may also be used as expressions.
+Arbitrary recursive procedures are permitted.
+.ne 5
+.section "Type Correctness"
+.para
+Every variable in a CLU module must be declared;
+the declaration specifies the type of object
+that the variable may denote.
+All assignments to a variable must satisfy
+the variable's declaration.
+Because argument passing is defined
+in terms of assignment, the types of actual
+argument objects must be consistent with the declarations of the
+corresponding formal arguments.
+.para
+These restrictions, plus the restriction that only the code
+in a cluster may use cvt to convert between the abstract
+and representation types, ensure that the behavior of an object
+is indeed characterized completely by the operations of its type.
+For example, the type restrictions ensure that
+the only modification possible to a record object that represents
+a 2wordbag* (Figure wordbag) is the modification performed by
+the insert operation.
+.para
+Type-checking is performed on a module-by-module basis
+at compile-time (it could also be done at run-time).
+This checking can catch all type errors -- even those involving
+inter-module references -- because the CLU library maintains the
+necessary type information for all modules
+(see Section 5.)
--- a/doc/clu/clup4.r
+++ b/doc/clu/clup4.r
@@ -0,0 +1,494 @@
+.nd chapter 4-1
+.nd current_figure 7
+.so clu/clupap.header
+.
+.sr words 2words*
+.sr wordbag 2wordbag*
+.sr sorted_bag 2sorted_bag*
+.sr sorted_bags 2sorted_bags*
+.sr wordtree 2wordtree*
+.sr tree 2tree*
+.sr node 2node*
+.sr r 2r*
+.sr x 2x*
+.sr t 2t*
+.sr count_words 2count_words*
+.sr count_numeric 2count_numeric*
+.sr lt 2lt*
+.sr equal 2equal*
+.sr print 2print*
+.sr string_chars 2string_chars*
+.sr create 2create*
+.sr insert 2insert*
+.sr size 2size*
+.sr increasing 2increasing*
+.sr s 2s*
+.sr n 2n*
+.sr index 2index*
+.sr limit 2limit*
+.sr count 2count*
+.sr next_word 2next_word*
+.sr elements 2elements*
+.sr reverse_elements 2reverse_elements*
+.
+.
+.chapter "More Abstraction Mechanisms"
+.para
+In this section we continue our discussion of
+abstraction mechanisms in CLU.
+A generalization of the 2wordbag* abstraction,
+called 2sorted_bag*,
+is presented as an illustration of parameterized clusters,
+which are a means for implementing
+more generally applicable data abstractions.
+The presentation of 2sorted_bag*
+is also used to motivate the introduction of a control
+abstraction called an 2iterator*,
+which is a mechanism for incrementally generating 
+the elements of a collection of objects.
+Finally, we show an implementation of the sorted_bag
+abstraction and illustrate how sorted_bag
+can be used in implementing count_words.
+.section "Properties of the Sorted_bag Abstraction"
+.para
+In the count_words procedure given earlier,
+a data abstraction called wordbag was used.
+A wordbag object is a collection of strings,
+each with an associated count.
+Strings are inserted into a wordbag object one at a time.
+Strings in a wordbag object may be printed in alphabetical order,
+each with a count of the number of times it was inserted.
+.para
+Although wordbag has properties that are specific to the usage
+in count_words,
+it also has properties in common with a more general abstraction,
+sorted_bag.
+A bag is similar to a set
+(it is sometimes called a multi-set)
+except that an item can appear in a bag many times.
+For example, if the integer 1 is inserted in the set {1,2},
+the result is the set {1,2},
+but if 1 is inserted in the bag {1,2},
+the result is the bag {1,1,2}.
+A sorted_bag is a bag that affords access
+to the items it contains
+according to an ordering relation on the items.
+.para
+The concept of a sorted_bag is meaningful not only for strings
+but for many types of items.
+Therefore, we would like to parameterize the sorted_bag abstraction,
+the parameter being the type of item to be collected
+in the sorted_bag objects.
+.para
+Most programming languages provide built-in parameterized
+data abstractions.
+For example, the concept of an array is a parameterized
+data abstraction.
+.ne 3
+An example of a use of arrays in Pascal is
+.code
+	1array* 1..n 1of* 1integer*
+.end_code
+These arrays have two parameters,
+one specifying the array bounds (1..n)
+and one specifying the type of element in the array (integer).
+In CLU we provide mechanisms allowing user-defined
+data abstractions (like sorted_bag) to be parameterized.
+.para
+In the sorted_bag abstraction,
+not all types of items make sense.
+Only types that define a total ordering on their objects
+are meaningful,
+since the sorted_bag abstraction depends on the presence
+of this ordering.
+In addition, information about the ordering must be
+expressed in a way that is useful for programming.
+A natural way to express this information
+is by means of operations of the item type.
+Therefore, we require that the item type provide
+less than and equal operations
+(called lt and equal).
+.ne 5
+This constraint is expressed in the header for sorted_bag:
+.code
+	sorted_bag = cluster [t: type] is create, insert, dots
+		where t has
+			lt, equal: proctype (t, t) returns (bool);
+.end_code
+The item type t is a 2formal parameter* of the sorted_bag
+cluster; whenever the sorted_bag abstraction is used,
+.ne 3
+the item type must be specified as an 2actual parameter*, e.g.,
+.code
+	sorted_bag[string]
+.end_code
+.para
+The information about required operations
+informs the programmer about legitimate uses of sorted_bag.
+The compiler will check each use of sorted_bag to ensure
+that the item type provides the required operations.
+The where clause specifies exactly the information
+that the compiler can check.
+Of course, more is assumed about the item type 2t*
+than the presence of 
+operations with appropriate names and functionalities:
+these operations must also define a total ordering on the items.
+Although we expect formal and complete specifications
+for data abstractions to be included in the CLU library eventually,
+we do not include in the CLU language declarations
+that the compiler cannot check.
+This point is discussed further in Section discussion.
+.para
+Now that we have decided to define a
+sorted_bag abstraction that works for many item types,
+we must decide what operations this abstraction provides.
+When an abstraction (like wordbag)
+is written for a very specific purpose,
+it is reasonable to have
+some specialized operations.
+For a more general abstraction,
+the operations should be more generally useful.
+.para
+The 2print* operation is a case in point.
+Printing is only one possible use of the information contained
+in a 2sorted_bag*.
+It was the only use in the case of 2wordbag*,
+so it was reasonable to have a 2print* operation.
+However, if 2sorted_bags* are to be generally useful,
+there should be some way for the user to obtain
+the elements of the 2sorted_bag*; the user can then
+perform some action on the elements (for example, print them).
+.para
+What we would like is an operation on sorted_bags
+that makes all of the elements available to the caller
+in increasing order.
+One possible approach is to map
+the elements of a sorted_bag
+into a sequence object,
+a solution potentially requiring a large amount of space.
+A more efficient method is provided by CLU and is discussed below.
+This solution computes the sequence
+one element at a time, thus saving space.
+If only part of the sequence is used
+(as in a search for some element),
+then execution time can be saved as well.
+.section "Control Abstractions"
+.para
+The purpose of many loops is to perform an action
+on some or all of the objects in a collection.
+For such loops,
+it is often useful to separate the
+selection of the next object
+from the action performed on that object.
+CLU provides a control abstraction that permits
+a complete decomposition of the two activities.
+The for statement available in many programming languages
+provides a limited ability in this direction:
+it iterates over ranges of integers.
+The CLU for statement
+can iterate over collections of any
+type of object.
+The selection of the next object in the collection
+is done by a user-defined 2iterator*.
+The iterator
+produces the objects in the collection one at a time
+(the entire collection need not physically exist);
+each object is consumed by the for statement in turn.
+.nr rra0 current_figure
+.para
+Figure rra0 gives an example of a simple iterator
+called string_chars, which produces the characters in a string in
+the order in which they appear.
+.begin_figure "Use and definition of a simple iterator."
+.code
+count_numeric = proc (s: string) returns (int);
+	count: int := 0;
+	for c: char in string_chars (s) do
+		if char_is_numeric (c)
+			then count := count + 1;
+			end;
+		end;
+	return count;
+	end count_numeric;
+
+string_chars = iter (s: string) yields (char);
+	index: int := 1;
+	limit: int := string$size (s);
+	while index <= limit do
+		yield string$fetch (s, index);
+		index := index + 1;
+		end;
+	end string_chars;
+.ns
+.end_code
+.finish_figure
+This iterator uses string operations 2size@(s)*,
+which tells how many characters are in the string s,
+and 2fetch@(s,@n)*,
+which returns the n!th character in the string s
+(provided the integer n is greater than zero
+and does not exceed the size of the string).
+.foot
+A while loop is used in the implementation of
+string_chars so that the example would be based
+on familiar concepts.  In actual practice, such a
+loop would be written using a for statement invoking
+a primitive iterator.
+.efoot
+.br
+.ne 5
+.para
+The general form of the CLU for statement is
+.code
+	for declarations in iterator-invocation do
+		body
+		end;
+.end_code
+An example of the use of the for statement
+occurs in the count_numeric procedure
+(see Figure rra0),
+which contains a loop
+that counts the number of numeric characters in a string.
+Note that the details of how the characters are obtained
+from the string are entirely contained
+in the definition of the iterator.
+.para
+Iterators work as follows:
+A for statement initially invokes an iterator,
+passing it some arguments.
+Each time a yield statement is executed in the iterator,
+the objects yielded
+.foot
+Zero or more objects may be yielded,
+but the number and types of objects yielded each time by an iterator
+must agree with the number and types of variables in
+a for statement using the iterator.
+.efoot
+are assigned to the variables declared in the for statement
+(following the reserved word for)
+in corresponding order, and the body of the for
+statement is executed.
+Then the iterator is resumed at the statement
+following the yield statement,
+in the same environment as when the objects were yielded.
+When the iterator terminates, by either an implicit
+or explicit return, then the invoking for statement
+terminates.  The iteration may also be prematurely
+terminated by a return in the body of the
+for statement.
+.para
+For example, suppose that string_chars is invoked
+with the string ``a3''.
+The first character yielded is `a'.
+At this point within string_chars, index@=@1 and limit@=@2.
+Next the body of the for statement is performed.
+Since the character `a' is not numeric,
+count remains at 0.
+Next string_chars is resumed at the statement after the yield
+statement, and when resumed, index@=@1 and limit@=@2.
+Then index is assigned 2,
+and the character `3' is selected from the string and yielded.
+Since `3' is numeric, count becomes@1.
+Then string_chars is resumed,
+with index@=@2 and limit@=@2, and index is incremented,
+which causes the while loop to terminate.
+The implicit return terminates both the iterator and the
+for statement, with control resuming at the statement
+after the for statement,
+and count@=@1.
+.para
+While iterators are useful in general,
+they are especially valuable in conjunction with data abstractions
+that are collections of objects (such as sets, arrays, and
+sorted_bags).
+Iterators afford users of such abstractions access to all objects
+in the collection, without exposing irrelevant details.
+Several iterators may be included in a data abstraction.
+When the order of obtaining the objects is important,
+different iterators may provide different orders.
+.section "Implementation and Use of Sorted_bag"
+.para
+Now we can describe a minimal set of operations
+for sorted_bag.
+The operations are create, insert, size, and increasing.
+2Create*, insert, and size are procedural abstractions
+that, respectively,
+create a sorted_bag, insert an item into a sorted_bag,
+and give the number of items in a sorted_bag.
+2Increasing* is a control abstraction
+that produces the items in a sorted_bag in increasing order;
+each item produced is accompanied by
+an integer representing the number of times
+the item appears in the sorted_bag.
+Note that other operations might also
+be useful for sorted_bag,
+for example, an iterator yielding the items
+in decreasing order.
+In general, the definer of a data abstraction
+can provide as many operations as seems reasonable.
+.para
+In Figure current_figure, we give an implementation
+of the sorted_bag abstraction.
+.begin_figure "The sorted_bag cluster."
+.code
+sorted_bag = cluster [t: type] is create, insert, size, increasing
+	where t has equal, lt: proctype (t, t) returns (bool);
+
+	rep = record [contents: tree[t], total: int];
+
+create = proc () returns (cvt);
+	return rep${contents: tree[t]$create (), total: 0};
+	end create;
+
+insert = proc (sb: cvt, v: t);
+	sb.contents := tree[t]$insert (sb.contents, v);
+	sb.total := sb.total + 1;
+	end insert;
+
+size = proc (sb: cvt) returns (int);
+	return sb.total;
+	end size;
+
+increasing = iter (sb: cvt) yields (t, int);
+	for item: t, count: int
+		in tree[t]$increasing (sb.contents) do
+			yield item, count;
+			end;
+	end increasing;
+
+end sorted_bag;
+.ns
+.end_code
+.finish_figure
+It is implemented using a sorted binary tree,
+just as wordbag was implemented.
+Thus, a subsidiary abstraction is necessary.
+This abstraction, called tree, is a generalization
+of the wordtree abstraction (used in Section example),
+which has been parameterized to work for all ordered types.
+An implementation of tree is given in Figure current_figure.
+Notice that both the tree abstraction and the sorted_bag abstraction
+place the same constraints on their type parameters.
+.begin_page_figure "The tree cluster."
+.code
+tree = cluster [t: type] is create, insert, increasing
+	where t has equal, lt: proctype (t, t) returns (bool);
+
+	node = record [m!value: t, count: int,
+(mark!m)lesser: tree[t], greater: tree[t]];
+	rep = oneof [empty: null, non_empty: node];
+
+create = proc () returns (cvt);
+	return rep$make_empty (nil);
+	end create;
+
+insert = proc (x: cvt, v: t) returns (cvt);
+	tagcase x
+		tag empty:
+			n: node := node${m!value: v, count: 1,
+(mark!m)lesser: tree[t]$create (),
+(mark!m)greater: tree[t]$create ()};
+			return rep$make_non_empty (n);
+		tag non_empty (n: node):
+			if t$equal (v, n.value)
+					then n.count := n.count + 1;
+				elseif t$lt (v, n.value)
+					then n.lesser := tree[t]$insert (n.lesser, v);
+				else n.greater := tree[t]$insert (n.greater, v);
+				end;
+			return x;
+		end;
+	end insert;
+
+increasing = iter (x: cvt) yields (t, int);
+	tagcase x
+		tag empty: ;
+		tag non_empty (n: node):
+			for item: t, count: int
+				in tree[t]$increasing (n.lesser) do
+					yield item, count;
+					end;
+			yield n.value, n.count;
+			for item: t, count: int
+				in tree[t]$increasing (n.greater) do
+					yield item, count;
+					end;
+		end;
+	end increasing;
+end tree;
+.ns
+.end_code
+.finish_figure
+.para
+An important feature of the 2sorted_bag*
+and 2tree* clusters
+is the way that the cluster parameter is used in places
+where the type string was used in wordbag and wordtree.
+This usage is especially evident in the implementation of tree.
+For example, tree has a representation that stores values of
+type t: the 2value* component of a node
+must be an object of type t.
+.para
+In the insert operation of tree,
+the lt and equal operations of type t are used.
+We have used the compound form, e.g. 2t$equal@(v,@n.value)*,
+to emphasize that the equal operation of t is being used.
+The short form, 2v@=@n.value*, could have been used instead.
+.para
+The increasing iterator of tree works as follows:
+First it yields all items in the current tree
+that are less than the item at the top node;
+the items are obtained by a recursive use of itself,
+passing the 2lesser* subtree as a parameter.
+Next it yields the contents of the top node,
+and then it yields all items in the current tree
+that are greater than the item at the top node
+(again by a recursive use of itself).
+In this way it performs a complete walk over the tree,
+yielding the values at all nodes, in increasing order.
+.nr rra1 current_figure
+.para
+Finally, we show in Figure rra1 how the original
+procedure count_words can be implemented in terms of sorted_bag.
+.begin_figure "The count_words procedure using iterators."
+.code
+count_words = proc (i: instream, o: outstream);
+
+	wordbag = sorted_bag[string];
+
+	% create an empty wordbag
+	wb: wordbag := wordbag$create ();
+
+	% scan document, adding each word found to wb
+	for word: string in words (i) do
+		wordbag$insert (wb, word);
+		end;
+
+	% print the wordbag
+	total: int := wordbag$size (wb);
+	for w: string, count: int in wordbag$increasing (wb) do
+		print_word (w, count, total, o);
+		end;
+	end count_words;
+.ns
+.end_code
+.finish_figure
+.
+Note that the count_words procedure now uses 2sorted_bag*[string]
+instead of wordbag.
+2Sorted_bag*[string] is legitimate, since the type string
+provides both lt and equal operations.
+Note that two for statements are used in count_words.
+The second for statement prints the words
+in alphabetic order,
+using the increasing iterator of sorted_bag.
+.ne 4
+The first for statement inserts the words into the sorted_bag;
+it uses an iterator
+.code
+	words = iter (i: instream) yields (string);
+		dots
+		end words;
+.widow 2
+.end_code
+The definition of words is left as an exercise for the reader.
--- a/doc/clu/clup5.r
+++ b/doc/clu/clup5.r
@@ -0,0 +1,112 @@
+.nd chapter 5-1
+.nd current_figure 11
+.nd wordbag 3
+.so clu/clupap.header
+.sr p 2p*
+.sr q 2q*
+.sr x 2x*
+.sr y 2y*
+.sr z 2z*
+.sr a 2a*
+.sr b 2b*
+.sr insert 2insert*
+.chapter "The CLU Library"
+.para
+So far, we have shown CLU modules as separate pieces of
+text, without explaining how they are bound together to form a
+program.  This section describes the CLU library, which plays a
+central role in supporting inter-module references.
+.para
+The CLU library contains information about
+abstractions.  The library supports incremental program
+development, one abstraction at a time, and, in addition,
+makes abstractions that are defined during the construction of
+one program available as a basis for subsequent program development.
+The information in the library permits the separate
+compilation of single modules, with complete type-checking
+of all external references (such as procedure
+invocations).
+.para
+The structure of the library derives from the fundamental
+distinction between abstractions and implementations.
+For each abstraction, there is a 2description
+unit* which contains all system-maintained information
+about that abstraction.  Included in the description unit
+are zero or more modules that implement the abstraction.
+.foot
+Other information that may be stored in the library
+includes information about relationships among
+abstractions, as might be expressed in a module
+interconnection language [DK75,@Thomas].
+.efoot
+.para
+The most important information contained in a description
+unit is the abstraction's 2interface specification*, which
+is that information needed to type-check uses of the abstraction.
+For procedural and control abstractions,
+this information consists of the number and types of
+parameters, arguments, and output values, plus any constraints
+on type parameters (i.e., required operations, as described in
+Section 4).  For data abstractions,
+it includes the number and types of parameters, constraints on
+type parameters, and the
+name and interface specification of each
+operation.
+.para
+An abstraction is entered in the library by
+submitting the interface specification;
+no implementations are required.
+In fact, a module can be compiled before any implementations
+have been provided for the abstractions that it uses;
+it is necessary only that interface specifications
+have been given for those abstractions.
+Ultimately, there can be many implementations
+of an abstraction;
+each implementation is required to satisfy the
+interface specification of the abstraction.
+Because all uses and implementations
+of an abstraction are checked against the interface
+specification, the actual selection
+of an implementation can be delayed
+until just before (or perhaps during) execution.
+We imagine a process of binding together modules
+into programs, prior to execution, at which time
+this selection would be made.
+.para
+An important detail of the CLU system is
+the method by which CLU modules refer to abstractions.
+To avoid problems of name conflicts that can arise in
+large systems, the names used by a module to refer to
+abstractions can be chosen to suit the programmer's
+convenience.
+When a module is submitted for
+compilation, its external references must be bound to
+description units so that type-checking can be
+performed.  The binding is accomplished by constructing
+an 2association list*,
+mapping names to description units, which
+is passed to the compiler along with the source code when
+compiling the module.
+The mapping in the association list is stored by the compiler
+in the library as part of the module.
+A similar process is involved in entering interface
+specifications of abstractions, as these will include
+references to other (data) abstractions.
+.para
+When the compiler type-checks a module,
+it uses the association list to map the external
+names in the module to description units, and then uses
+the interface specifications in those description
+units to check that the abstractions are used correctly.
+The type-correctness of the module thus
+depends upon the binding of names to description units
+and the interface specifications in those description
+units, and could be invalidated if changes to the
+binding or the interface specifications were subsequently
+made.  For this reason, the process of compilation
+permanently binds a module to the abstractions it
+uses, and the interface description of an abstraction,
+'ne 2
+once defined, is not allowed to change.
+(Of course, a new description unit can be created
+to describe a modified abstraction.)
--- a/doc/clu/clup6.r
+++ b/doc/clu/clup6.r
@@ -0,0 +1,77 @@
+.nd chapter 6-1
+.nd current_figure 1
+.nd wordbag 3
+.so clu/clupap.header
+.sr insert 2insert*
+.
+.chapter "Implementation"
+.para
+This section briefly describes the current implementation of CLU
+and discusses its efficiency.
+.para
+The implementation is based on a decision to represent
+all CLU objects by 2object descriptors*,
+which are fixed-size values containing a type code and some
+type-dependent information.
+.foot
+Object descriptors are similar to capabilities [Lam71].
+.efoot
+In the case of mutable types, the type-dependent information
+is a pointer to a separately-allocated
+area containing the state information.  For constant
+types, the information either directly contains
+the value (if the value can be encoded in the
+information field, such as for integers, characters,
+and booleans) or contains a pointer to separately-allocated
+space (as for strings).
+The type codes are used by the garbage collector
+to determine the physical representation of objects
+so that the accessible objects can be traced;
+they are also useful for supporting program debugging.
+.para
+The use of fixed-size object descriptors
+allows variables to be fixed-size cells.  Assignment
+is efficient: the object descriptor resulting
+from the evaluation of the expression is simply
+copied into the variable.  In addition, a single
+size for variables facilitates the separate compilation
+of modules and allows most of the code of a
+parameterized module to be shared among all instantiations
+of the module.  The actual parameters are made available
+to this code by means of a small parameter-dependent
+section, which is initialized prior to execution.
+.para
+Procedure invocation is relatively efficient.
+A single program stack is used,
+and argument passing is as efficient as assignment.
+Iterators are a form of coroutine;
+however, their use is sufficiently constrained
+that they are implemented using just the program stack.
+Using an iterator is therefore only slightly more expensive
+than using a procedure.
+.para
+The data abstraction mechanism is not inherently
+expensive.  No execution time type-checking is necessary.
+Furthermore, the type conversion implied by 1cvt*
+is merely a change in the view taken of an object's type,
+and does not require any computation.
+.para
+A number of optimization techniques can be
+applied to a collection of modules, if one is
+willing to give up the flexibility of separate
+compilation.  The most effective such optimization is
+the inline substitution of procedure (and iterator) bodies
+for invocations [Sch76].
+The use of data abstractions tends to introduce
+extra levels of procedure invocations that perform little or no
+computation.  As an example, consider the 2wordbag$insert*
+operation (Figure wordbag), which merely invokes the
+2wordtree$insert* operation and increments a counter.
+If data abstractions had not been used, these actions would most
+likely have been performed directly by the 2count_words*
+procedure.  The 2wordbag$insert* operation is thus
+a good candidate for being compiled inline.
+Once inline substitution has been performed, the increase
+in context will enhance the effectiveness of
+conventional optimization techniques
+[All71,@All75,@Atk76].
--- a/doc/clu/clup7.r
+++ b/doc/clu/clup7.r
@@ -0,0 +1,151 @@
+.nd chapter 7-1
+.nd current_figure 5
+.nd wordbag 3
+.so clu/clupap.header
+.sr p 2p*
+.sr q 2q*
+.sr x 2x*
+.sr y 2y*
+.sr z 2z*
+.sr a 2a*
+.sr b 2b*
+.sr insert 2insert*
+.chapter "Discussion"
+.para
+Our intent in this paper has been to provide an
+informal introduction to the abstraction mechanisms in CLU.
+By means of programming examples, we have illustrated the
+use of data, procedural, and control abstractions, and have
+shown how CLU modules are used to implement these
+abstractions.  We have not attempted to provide a complete
+description of CLU, but, in the course of explaining
+the examples, most features of the language have appeared.
+One important omission is the CLU exception handling mechanism
+(which does support abstractions); this mechanism
+is described in [LCS75].
+.para
+In addition to describing constructs
+that support abstraction, previous sections have
+covered a number of other topics.  We have discussed the 
+semantics of CLU.  We have described the organization of the
+CLU library and discussed how it supports incremental
+program development and separate
+compilation and type-checking of modules.
+Also, we have described our current
+implementation and discussed its efficiency.
+.para
+In designing CLU, our goal was to simplify the task
+of constructing reliable software that is reasonably easy
+to understand, modify, and maintain.  It seems appropriate,
+therefore, to conclude this paper with a discussion of how
+CLU contributes to this goal.
+.para
+The quality of any program depends upon the skill of 
+the designer.  In CLU programs,
+this skill is reflected in the choice of abstractions.
+In a good design, abstractions will be used
+to simplify the connections between modules and to 
+encapsulate decisions that are likely to change [Par71].
+Data abstractions are particularly valuable for these purposes.
+For example, through the use of a data abstraction,
+modules that share a system data base
+rely only on its abstract behavior as
+defined by the data base operations.  The connections
+among these modules are much simpler
+than would be possible if they shared knowledge
+of the format of the data base and the relationship
+among its parts.  In addition, the data base abstraction
+can be reimplemented without affecting the code of the modules
+that use it.
+CLU encourages the use of data abstractions,
+and thus aids the programmer during program design.
+.para
+The benefits arising from the use of data
+abstractions are based on the constraint, inherent in CLU
+and enforced by the CLU compiler, that only the operations
+of the abstraction may access the representations of the objects.
+This constraint ensures that the distinction made in CLU
+between abstractions and implementations
+applies to data abstractions as well as to procedural
+and control abstractions.
+.para
+The distinction between abstractions and implementations
+eases program modification and maintenance.
+Once it has been determined that an abstraction must be
+reimplemented, CLU guarantees that the code of
+all modules using that
+abstraction will be unaffected by the change.
+The modules need not be reprogrammed or even recompiled;
+only the process of
+selecting the implementation of the abstraction must be
+redone.
+The problem of determining what modules must be
+changed is also simplified, because each module has a
+well-defined purpose, to implement an abstraction,
+and no other module can interfere with that purpose.
+.para
+Understanding and verification of CLU programs is
+made easier
+because the distinction between
+abstractions and implementations permits this task
+to be decomposed.
+One module at a time is studied to determine that it
+implements its abstraction.  This study requires
+understanding the behavior of the abstractions
+it uses, but it is not necessary to understand the
+modules implementing those abstractions.  Those
+modules can be studied separately.
+.para
+A promising way to establish the
+correctness of a program is by means of a mathematical
+proof.  For practical reasons, proofs should be
+performed (or at least checked) by a verification
+system, since the process of constructing
+a proof is tedious and error-prone.
+Decomposition of the proof is essential for
+program proving, which is practical only for small
+programs (like CLU modules).  Note that when the CLU
+compiler does type-checking, it is, in addition
+to enforcing the constraint that permits the proof
+to be decomposed, also performing a small part of the
+actual proof.
+.para
+We have included as declarations in CLU just
+the information that the compiler can check with
+reasonable efficiency.
+We believe that the other
+information required for proofs (specifications and
+assertions) should be expressed in a separate
+``specification'' language.
+The properties of such a language are being
+studied [Guttag, Lis75, Lis76, Spitzen].
+We intend eventually to add formal specifications to the
+CLU system; the library is already organized to
+accommodate this addition.  At that time various
+specification language processors could be added to 
+the system.
+.para
+We believe that the constraints imposed by
+CLU are essential for practical as well as theoretical
+reasons.  It is true that data abstractions
+can be used in any language by
+establishing programming conventions to protect the
+representations of objects.  However, conventions are no
+substitute for enforced constraints.  It is inevitable
+that the conventions will be violated -- and are likely
+to be violated just when they are needed most, in
+implementing, maintaining, and modifying large
+programs.  It is precisely at this time, when the
+.ne 3
+programming task becomes very difficult, that a
+language like CLU will be most valuable and
+appreciated.
+.chapter "Acknowledgements"
+.para
+The authors gratefully acknowledge the contributions
+made by members of the CLU design group over the
+last three years.  Several people have made
+helpful comments about this paper, including
+Toby Bloom, Dorothy Curtis, Mike Hammer,
+Eliot Moss, Jerry Saltzer, Bob Scheifler,
+and the referees.
--- a/doc/clu/clupap.header
+++ b/doc/clu/clupap.header
@@ -0,0 +1,117 @@
+.
+ header file for clu paper
+.
+.nd narrow 0
+.nd csg_memo 0
+.nd started 0
+.if ~started
+.dv xgp
+.fo 0 fonts; 30vr kst
+.fo 1 fonts; 31vgb kst
+.fo 2 fonts; 30vri kst
+.fo 3 fonts; 37vrb kst
+.fo 4 fonts; 75vbee kst
+.fo 7 fonts; 18fg kst
+.tr @ 
+.nr fnfont 7
+.nr chapter_starts_page 0
+.nr reset_per_page 0
+.nr both_sides 1
+.sr list_left_margin 500m
+.sr list_right_margin 500m
+.if narrow
+.nr big_font 1
+.ls 1
+.ll 4.25i
+.pl 14i
+.sr list_left_margin 200m
+.sr list_right_margin 200m
+.ef csg_memo==0
+.ls 2
+.sr left_heading Abstraction Mechanisms in CLU
+.sr right_heading date
+.ef csg_memo>0
+.ls 1.5
+.sr left_heading CSG Memo csg_memo-1
+.sr right_heading Abstraction Mechanisms in CLU
+.en
+.sr figure_name Figure \
+.sr table_name Table \
+.nr immediate_figure 0
+.nr tty_table_of_contents 1
+.nr verbose 1
+.so r/r.macros
+.so clu/clukey.r
+.nr started 1
+.if narrow
+.eq begin_page_figure begin_figure
+.en
+.en
+.
+ number register for section references
+.
+.nr introduction 1
+.nr example 2
+.nr semantics 3
+.nr more_abstraction 4
+.nr library 5
+.nr implementation 6
+.nr efficiency 6
+.nr discussion 7
+.
+ paragraph macro
+.
+.if ls<150
+.de para
+.sp
+.ne 3
+.ti 5
+.em
+.ef
+.de para
+.br
+.ne 3
+.ti 5
+.em
+.en
+.
+ three dots
+.
+.sr dots 1...*
+.sr th 7th*
+.
+ code brackets - use like table, end_table
+.
+.de code
+.table 2
+.if narrow
+.ta 8 11 14 17 20 23 26 29 32
+.en
+.em
+.
+.de end_code
+.rtabs
+.end_table
+.em
+.
+ mark position
+.
+.de m
+.nr mark hpos
+.em
+.
+ widow eliminator
+.
+.de widow  <n>
+.lbegin
+.nv n \0
+.if n<1
+.nr n 1
+.end
+.vx n n!l 2m
+.if vtrap<n&vpos>0
+.nr n n-vtrap
+.ct footer_handler +n!m
+.end
+.end
+.em
--- a/doc/clu/clupap.r
+++ b/doc/clu/clupap.r
@@ -0,0 +1,24 @@
+ CLU paper
+.
+ set csg_memo to 0 for paper version
+ set it to csg memo number for csg memo version
+.
+ set narrow to 1 for map version
+ and insert ;SIZE 14 at the beginning of the XGP file
+.
+.nr narrow 0
+.nr csg_memo 0
+.
+.so clu/clupap.header
+.so clu/clup0.r
+.so clu/clup1.r
+.so clu/clup2.r
+.so clu/clup3.r
+.so clu/clup4.r
+.so clu/clup5.r
+.so clu/clup6.r
+.so clu/clup7.r
+.if narrow
+.ns p
+.en
+.insert_refs