mirror of
https://github.com/PDP-10/its.git
synced 2026-02-26 08:53:29 +00:00
CSG memo 144: Abstraction Mechanisms in CLU.
This commit is contained in:
@@ -156,6 +156,17 @@ clib/-read-.-this- 198002261810.43
|
||||
clib/tv.128 197908312338.58
|
||||
clu/clu.order 197711161922.32
|
||||
clu/action.refman 197806022022.04
|
||||
clu/clukey.r 197909041308.12
|
||||
clu/clup0.r 197711261712.18
|
||||
clu/clup1.r 197711261712.27
|
||||
clu/clup2.r 197711261714.24
|
||||
clu/clup3.r 197711261712.44
|
||||
clu/clup4.r 197711261716.29
|
||||
clu/clup5.r 197711261713.02
|
||||
clu/clup6.r 197711261713.08
|
||||
clu/clup7.r 197711261713.18
|
||||
clu/clupap.header 197711261651.18
|
||||
clu/clupap.r 197711261651.32
|
||||
clu/clusym.r 197806271243.01
|
||||
clu/exampl.refman 197805301747.35
|
||||
clu/except.refman 197806061946.59
|
||||
|
||||
27
doc/clu/clukey.r
Normal file
27
doc/clu/clukey.r
Normal file
@@ -0,0 +1,27 @@
|
||||
.
|
||||
string registers for CLU keywords
|
||||
.
|
||||
.de bold
|
||||
.fr i 0 nargs-1
|
||||
.sr \\i 1\\i*
|
||||
.en
|
||||
.em
|
||||
.
|
||||
.bold any array
|
||||
.bold begin bool break
|
||||
.bold cand char cluster continue cor cvt
|
||||
.bold do down
|
||||
.bold else elseif end except exit
|
||||
.bold false for force
|
||||
.bold has
|
||||
.bold if in int is iter itertype
|
||||
.bold nil null
|
||||
.bold oneof others own
|
||||
.bold proc proctype
|
||||
.bold real record rep resignal return returns
|
||||
.bold sequence signal signals string struct
|
||||
.bold tag tagcase then true type
|
||||
.bold up
|
||||
.bold variant
|
||||
.bold when where while
|
||||
.bold yield yields
|
||||
293
doc/clu/clup0.r
Normal file
293
doc/clu/clup0.r
Normal file
@@ -0,0 +1,293 @@
|
||||
.nd started 0
|
||||
.nr do_refs 0
|
||||
.if ~started
|
||||
.nr do_refs 1
|
||||
.en
|
||||
.so clu/clupap.header
|
||||
.so r/ref3.rmac
|
||||
.if csg_memo==0
|
||||
.ls 1
|
||||
.if narrow
|
||||
.new_font 1
|
||||
.ef
|
||||
.new_font 3
|
||||
.en
|
||||
.nf c
|
||||
.vp 2i
|
||||
Abstraction Mechanisms in CLU
|
||||
.if narrow
|
||||
.new_font 0
|
||||
.ef
|
||||
.new_font 1
|
||||
.en
|
||||
.sp .5i
|
||||
Barbara Liskov
|
||||
Alan Snyder
|
||||
Russell Atkinson
|
||||
Craig Schaffert
|
||||
.sp .3i
|
||||
Laboratory for Computer Science
|
||||
Massachusetts Institute of Technology
|
||||
545 Technology Square
|
||||
Cambridge, MA 02139
|
||||
.sp 2
|
||||
.if ~narrow
|
||||
.vp 8.5i
|
||||
.en
|
||||
.nf l
|
||||
.fi
|
||||
.new_font 0
|
||||
This research was supported in part by the Advanced Research
|
||||
Projects Agency of the Department of Defense, monitored by the
|
||||
Office of Naval Research under contract N00014-75-C-0661, and
|
||||
in part by the National Science Foundation under grant DCR74-21892.
|
||||
.ls
|
||||
.end
|
||||
.if csg_memo>0
|
||||
.ls 1
|
||||
.nf c
|
||||
.new_font 3
|
||||
Massachusetts Institute of Technology
|
||||
Laboratory for Computer Science
|
||||
.new_font 0
|
||||
(formerly Project MAC)
|
||||
.sp 1.25i
|
||||
Computation Structures Group Memo csg_memo-1
|
||||
.sp 1.25i
|
||||
.new_font 4
|
||||
Abstraction Mechanisms in CLU
|
||||
.new_font 1
|
||||
.sp
|
||||
by
|
||||
.sp
|
||||
Barbara Liskov
|
||||
Alan Snyder
|
||||
Russell Atkinson
|
||||
Craig Schaffert
|
||||
.new_font 0
|
||||
.nf l
|
||||
.vp 8.25i
|
||||
.fi
|
||||
.new_font 0
|
||||
This research was supported in part by the Advanced Research
|
||||
Projects Agency of the Department of Defense, monitored by the
|
||||
Office of Naval Research under contract N00014-75-C-0661, and
|
||||
in part by the National Science Foundation under grant DCR74-21892.
|
||||
.nf c
|
||||
.sp .5i
|
||||
January 1977
|
||||
.nf l
|
||||
.fi
|
||||
.ls
|
||||
.end
|
||||
.
|
||||
.
|
||||
.if narrow
|
||||
.sp 2
|
||||
.new_font 1
|
||||
.ll 7i
|
||||
.ef
|
||||
.bp
|
||||
.rs
|
||||
.vp 3i
|
||||
.new_font 3
|
||||
.en
|
||||
ABSTRACT
|
||||
.new_font 0
|
||||
.sp
|
||||
.ns
|
||||
.para
|
||||
CLU is a new programming language designed to support
|
||||
the use of abstractions in program construction.
|
||||
Work in programming methodology has led to the realization
|
||||
that three kinds of abstractions,
|
||||
procedural, control, and especially data abstractions,
|
||||
are useful in the programming process.
|
||||
Of these, only the procedural abstraction
|
||||
is supported well by conventional languages,
|
||||
through the procedure or subroutine.
|
||||
CLU provides, in addition to procedures,
|
||||
novel linguistic mechanisms that
|
||||
support the use of data and control abstractions.
|
||||
.para
|
||||
This paper provides an introduction to the abstraction mechanisms
|
||||
in CLU.
|
||||
By means of programming examples, we illustrate the utility of
|
||||
the three kinds of abstractions in program construction
|
||||
and show how CLU programs may be written to use
|
||||
and implement abstractions.
|
||||
We also discuss the CLU library, which permits
|
||||
incremental program development with complete
|
||||
type-checking performed at compile-time.
|
||||
.sp
|
||||
.fi l
|
||||
Key words and phrases: programming languages, data types,
|
||||
data abstractions, control abstractions, programming
|
||||
methodology, separate compilation.
|
||||
.sp
|
||||
CR categories: 4.0, 4.12, 4.20, 4.22.
|
||||
.br
|
||||
.fi b
|
||||
.if narrow
|
||||
.ll
|
||||
.en
|
||||
.
|
||||
.ref All71
|
||||
Allen, F. E. and Cocke, J.
|
||||
A catalogue of optimizing transformations.
|
||||
Rep. RC 3548,
|
||||
IBM Thomas J. Watson Research Center,
|
||||
Yorktown Heights, N.@Y., 1971.
|
||||
.em
|
||||
.ref All75
|
||||
Allen, F. E.
|
||||
A program data flow analysis procedure.
|
||||
Rep. RC 5278,
|
||||
IBM Thomas J. Watson Research Center,
|
||||
Yorktown Heights, N.@Y., 1975.
|
||||
.em
|
||||
.ref Atk76
|
||||
Atkinson, R. R.
|
||||
Optimization techniques for a structured programming language.
|
||||
S.M. Thesis,
|
||||
Dept. of Electrical Engineering and Computer Science,
|
||||
M.@I.@T., Cambridge, Mass., June 1976.
|
||||
.em
|
||||
.ref Dah70
|
||||
Dahl, O. J., Myhrhaug, B., and Nygaard, K.
|
||||
The SIMULA 67 common base language.
|
||||
Publication S-22, Norwegian Computing Center, Oslo, 1970.
|
||||
.em
|
||||
.ref DK75
|
||||
DeRemer, F. and Kron, H.
|
||||
Programming-in-the-large versus programming-in-the-small.
|
||||
2Proceedings of International Conference on Reliable Software*,
|
||||
2SIGPLAN Notices 10*, 6 (June 1975), 114-121.
|
||||
.em
|
||||
.ref Dij72
|
||||
Dijkstra, E. W.
|
||||
Notes on structured programming.
|
||||
2Structured Programming,
|
||||
A.P.I.C. Studies in Data Processing No. 8*,
|
||||
Academic Press, New York 1972, 1-81.
|
||||
.em
|
||||
.ref Guttag
|
||||
Guttag, J. V., Horowitz, E., and Musser, D. R.
|
||||
Abstract data types and software validation.
|
||||
Rep. ISI/RR-76-48, Information Sciences Institute,
|
||||
University of Southern California, Marina del Rey,
|
||||
Calif., August 1976.
|
||||
.em
|
||||
.ref Hoare72
|
||||
Hoare, C. A. R.
|
||||
Proof of correctness of data representations.
|
||||
2Acta Informatica*, 4 (1972), 271-281.
|
||||
.em
|
||||
.ref Knu73
|
||||
Knuth, D.
|
||||
2The Art of Computer Programming*, vol. 3.
|
||||
Addison Wesley, Reading, Mass., 1973.
|
||||
.em
|
||||
.ref LCS75
|
||||
2Laboratory for Computer Science Progress Report 1974-1975*,
|
||||
Computation Structures Group.
|
||||
Rep. PR-XII,
|
||||
Laboratory for Computer Science, M.@I.@T.,
|
||||
to be published.
|
||||
.em
|
||||
.ref Lam71
|
||||
Lampson, B. W.
|
||||
Protection.
|
||||
Proc. Fifth Annual Princeton Conference on Information
|
||||
Sciences and Systems, Princeton University, 1971, 437-443.
|
||||
.em
|
||||
.ref Lis74
|
||||
Liskov, B. H. and Zilles, S. N.
|
||||
Programming with abstract data types.
|
||||
Proc. ACM SIGPLAN Conference on Very High Level Languages,
|
||||
2SIGPLAN Notices 9*, 4 (April 1974), 50-59.
|
||||
.em
|
||||
.ref Lis75
|
||||
Liskov, B. H. and Zilles, S. N.
|
||||
Specification techniques for data abstractions.
|
||||
2IEEE Trans. on Software Engineering*, 2SE-1*,
|
||||
(1975), 7-19.
|
||||
.em
|
||||
.ref Lis76
|
||||
Liskov, B. H. and Berzins, V.
|
||||
An appraisal of program specifications.
|
||||
Computation Structures Group Memo 141,
|
||||
Laboratory for Computer Science,
|
||||
M.@I.@T., Cambridge, Mass., July 1976.
|
||||
.em
|
||||
.ref McC62
|
||||
McCarthy, J., et al.
|
||||
2LISP 1.5 Programmer's Manual*, MIT Press, 1962.
|
||||
.em
|
||||
.ref Mor73
|
||||
Morris, J. H.
|
||||
Protection in programming languages.
|
||||
2Comm. ACM 16*, 1 (Jan 1973), 15-21.
|
||||
.em
|
||||
.ref Mor74
|
||||
Morris, J. H.
|
||||
Toward more flexible type systems.
|
||||
Proceedings of the Programming Symposium, Paris, April 9-11, 1974,
|
||||
2Lecture Notes in Computer Science 19*, Springer-Verlag, New York,
|
||||
377-384.
|
||||
..
|
||||
.ref Par71
|
||||
Parnas, D. L.
|
||||
Information distribution aspects of design methodology.
|
||||
Proc. IFIP 1971.
|
||||
.em
|
||||
.ref Sch76
|
||||
Scheifler, R. W.
|
||||
An analysis of inline substitution for the CLU programming language.
|
||||
Computation Structures Group Memo 139,
|
||||
Laboratory for Computer Science,
|
||||
M.@I.@T., Cambridge, Mass., June 1976.
|
||||
.em
|
||||
.ref Spitzen
|
||||
Spitzen, J. and Wegbreit, B.
|
||||
The verification and synthesis of data structures.
|
||||
2Acta Informatica*, 4 (1975), 127-144.
|
||||
.em
|
||||
.ref Standish
|
||||
Standish, T. A.
|
||||
2Data structures: an axiomatic approach*.
|
||||
Rep. 2639, Bolt Beranek and Newman, Cambridge,
|
||||
Mass., 1973.
|
||||
.em
|
||||
.ref Thomas
|
||||
Thomas, J. W.
|
||||
Module interconnection in programming systems supporting
|
||||
abstraction.
|
||||
Rep. CS-16, Computer Science Program, Brown University,
|
||||
Providence, R.@I., 1976.
|
||||
.em
|
||||
.ref Wir71a
|
||||
Wirth, N.
|
||||
Program development by stepwise refinement.
|
||||
2Comm. ACM 14*, 4 (1971), 221-227.
|
||||
.em
|
||||
.ref Wir71b
|
||||
Wirth, N.
|
||||
The programming language PASCAL.
|
||||
2Acta Informatica*, 1 (1971), 35-63.
|
||||
.em
|
||||
.ref Wul84
|
||||
Wulf, W. A., London, R., and Shaw, M.
|
||||
An introduction to the construction and verification
|
||||
of Alphard programs.
|
||||
2IEEE Transactions on Software Engineering SE-2*,
|
||||
(1976), 253-264.
|
||||
.em
|
||||
.bp
|
||||
.if do_refs
|
||||
.insert_refs
|
||||
.en
|
||||
.if narrow
|
||||
.rs
|
||||
.sp 3i
|
||||
.en
|
||||
212
doc/clu/clup1.r
Normal file
212
doc/clu/clup1.r
Normal file
@@ -0,0 +1,212 @@
|
||||
.so clu/clupap.header
|
||||
.chapter "Introduction"
|
||||
.para
|
||||
The motivation for the design of the CLU programming
|
||||
language was to provide programmers with a tool that would
|
||||
enhance their effectiveness in constructing programs of
|
||||
high quality -- programs that are reliable and reasonably
|
||||
easy to understand, modify, and maintain.
|
||||
CLU aids programmers
|
||||
by providing constructs that support
|
||||
the use of abstractions in program design and implementation.
|
||||
.para
|
||||
The quality of software depends primarily on
|
||||
the programming methodology in use.
|
||||
The choice of programming language, however, can have a major impact on
|
||||
the effectiveness of a methodology.
|
||||
A methodology can be easy
|
||||
or difficult to apply in a given language, depending on
|
||||
how well the language constructs match the
|
||||
structures that the methodology deems desirable.
|
||||
The presence of constructs that give a concrete form
|
||||
for the desired structures makes the methodology more understandable.
|
||||
In addition, a programming language influences the way that
|
||||
its users think about programming;
|
||||
matching a language to a methodology increases the likelihood that
|
||||
the methodology will be used.
|
||||
.para
|
||||
CLU has been designed to support a methodology
|
||||
(similar to
|
||||
[Dij72,@Wir71a])
|
||||
in which programs are developed by
|
||||
means of problem decomposition based on the recognition
|
||||
of abstractions.
|
||||
A program is constructed in many
|
||||
stages.
|
||||
At each stage, the problem to be solved is
|
||||
how to implement some abstraction (the initial problem
|
||||
is to implement the abstract behavior required of the
|
||||
entire program).
|
||||
The implementation is developed by envisioning a number
|
||||
of subsidiary abstractions (abstract objects and
|
||||
operations) that are useful in the problem domain.
|
||||
Once the behavior of the abstract objects and operations
|
||||
has been defined, a program can be written to solve the
|
||||
original problem; in this program, the abstract objects
|
||||
and operations are used as primitives.
|
||||
Now the original
|
||||
problem has been solved, but new problems have arisen,
|
||||
namely, how to implement the subsidiary abstractions.
|
||||
Each of these abstractions is
|
||||
considered in turn as a new problem; its implementation
|
||||
may introduce further abstractions.
|
||||
This process
|
||||
terminates when all the abstractions introduced at various
|
||||
stages have been implemented or are present in the
|
||||
programming language in use.
|
||||
.para
|
||||
In this methodology, programs are developed
|
||||
incrementally, one abstraction at a time.
|
||||
Further, a distinction is made between an abstraction,
|
||||
which is a kind of behavior, and a program,
|
||||
or 2module*, which implements that behavior.
|
||||
An abstraction isolates
|
||||
use from implementation: an abstraction can be used
|
||||
without knowledge of its implementation and implemented
|
||||
without knowledge of its use.
|
||||
These aspects of the methodology are supported by the
|
||||
CLU 2library*, which maintains
|
||||
information about abstractions
|
||||
and the CLU modules that implement them.
|
||||
The library permits separate compilation of
|
||||
modules with complete type-checking at
|
||||
compile-time.
|
||||
.para
|
||||
To make effective use of the
|
||||
methodology, it is necessary to understand the kinds
|
||||
of abstractions that are useful in constructing programs.
|
||||
In studying this question,
|
||||
we identified an important kind of abstraction,
|
||||
the data abstraction, that
|
||||
had been largely neglected in discussions of programming methodology.
|
||||
.para
|
||||
A data abstraction [Hoare72,@Lis74,@Standish]
|
||||
is used to introduce a new
|
||||
type of data object that is deemed useful
|
||||
in the domain of the problem being solved.
|
||||
At the level of use, the programmer is
|
||||
concerned with the 2behavior* of these data objects,
|
||||
what kinds of information can be stored in them and
|
||||
obtained from them.
|
||||
The programmer is 2not* concerned
|
||||
with how the data objects are represented in storage,
|
||||
nor with the algorithms used to store and access
|
||||
information in them.
|
||||
In fact, a data abstraction is
|
||||
often introduced to delay such implementation
|
||||
decisions until a later stage of design.
|
||||
.para
|
||||
The behavior of the data objects is expressed most
|
||||
naturally in terms of a set of operations that are meaningful
|
||||
for those objects.
|
||||
This set will include operations
|
||||
to create objects, to obtain information from them,
|
||||
and possibly to modify them.
|
||||
For example,
|
||||
push and pop are among the meaningful operations for stacks,
|
||||
while meaningful operations for integers include the usual
|
||||
arithmetic operations.
|
||||
Thus, a data abstraction consists of a
|
||||
set of objects and a set of operations
|
||||
characterizing the behavior of the
|
||||
objects.
|
||||
.para
|
||||
If a data abstraction is to be
|
||||
understandable at an abstract level,
|
||||
the behavior of the data objects must be
|
||||
2completely* characterized by the set of operations.
|
||||
This property is ensured by making the operations the
|
||||
2only direct means* of creating and manipulating the objects.
|
||||
One effect of this restriction
|
||||
is that, when defining an abstraction,
|
||||
the programmer must be careful to include a
|
||||
sufficient set of operations, since every action
|
||||
he wishes to perform on the objects must be
|
||||
realized in terms of this set.
|
||||
.para
|
||||
We have identified the following requirements that must be
|
||||
satisfied by a language supporting data abstractions:
|
||||
.ilist 3
|
||||
1. A linguistic construct is needed that permits
|
||||
a data abstraction to be implemented as a unit.
|
||||
The implementation involves selecting a representation
|
||||
for the data objects and defining an algorithm for each
|
||||
operation in terms of that representation.
|
||||
.next
|
||||
2. The language must limit access to the
|
||||
representation to just the operations. This limitation
|
||||
is necessary to ensure that the operations completely
|
||||
characterize the behavior of the objects.
|
||||
.end_list
|
||||
CLU satisfies these requirements by providing a linguistic construct
|
||||
called a 2cluster* for implementing data abstractions.
|
||||
Data abstractions are integrated into the language
|
||||
through the data type mechanism.
|
||||
Access to the representation is
|
||||
controlled by type-checking, which is done at
|
||||
compile time.
|
||||
.para
|
||||
In addition to data abstractions, CLU
|
||||
supports two other kinds of abstractions:
|
||||
procedural abstractions and control abstractions.
|
||||
A procedural abstraction performs a computation on a
|
||||
set of input objects and produces a set of output objects;
|
||||
examples of procedural abstractions are sorting an
|
||||
array and computing a square root.
|
||||
CLU supports procedural abstractions by means of procedures,
|
||||
which are similar to procedures in other programming languages.
|
||||
.para
|
||||
A control abstraction defines a method
|
||||
for sequencing arbitrary actions.
|
||||
All languages provide built-in control abstractions;
|
||||
examples are the if statement and the while statement.
|
||||
In addition, however,
|
||||
CLU allows user definitions of a simple kind of control abstraction.
|
||||
The method provided is a generalization of the
|
||||
repetition methods available in many programming
|
||||
languages.
|
||||
Frequently the programmer desires to
|
||||
perform the same action for all the objects in a
|
||||
collection, such as all
|
||||
characters in a string or all items in a set.
|
||||
CLU
|
||||
provides a linguistic construct called an 2iterator*
|
||||
for defining how the objects in the
|
||||
collection are obtained.
|
||||
The iterator is used in
|
||||
conjunction with the for statement; the body
|
||||
of the for statement describes the action to be
|
||||
taken.
|
||||
.para
|
||||
The purpose of this paper is to illustrate
|
||||
the utility of the three kinds of abstractions
|
||||
in program construction,
|
||||
and to provide an informal introduction to CLU.
|
||||
We do not attempt a complete description of the language;
|
||||
rather, we concentrate on the constructs that
|
||||
support abstractions.
|
||||
The presence of these
|
||||
constructs constitutes the most important way in
|
||||
which CLU differs from other languages.
|
||||
The language closest to CLU is Alphard [Wul84],
|
||||
which represents a concurrent design effort with goals similar to
|
||||
our own.
|
||||
The design of CLU has been influenced by
|
||||
SIMULA 67 [Dah70], and to a lesser extent by
|
||||
Pascal [Wir71b] and LISP [McC62].
|
||||
.para
|
||||
In the next section we introduce CLU and,
|
||||
by means of a programming example,
|
||||
illustrate the use and implementation
|
||||
of data abstractions.
|
||||
Section semantics describes the basic semantics of CLU.
|
||||
In Section more_abstraction, we discuss
|
||||
control abstractions and more powerful kinds of
|
||||
data abstractions.
|
||||
We present the CLU library in Section library.
|
||||
Section implementation briefly describes
|
||||
the current implementation of CLU
|
||||
and discusses efficiency considerations.
|
||||
.ne 2
|
||||
Finally, we conclude by discussing
|
||||
the quality of CLU programs.
|
||||
595
doc/clu/clup2.r
Normal file
595
doc/clu/clup2.r
Normal file
@@ -0,0 +1,595 @@
|
||||
.nd chapter 2-1
|
||||
.so clu/clupap.header
|
||||
.
|
||||
string registers for italic variable names
|
||||
.
|
||||
.sr i 2i*
|
||||
.sr s 2s*
|
||||
.sr o 2o*
|
||||
.sr c 2c*
|
||||
.sr n 2n*
|
||||
.sr t 2t*
|
||||
.sr r 2r*
|
||||
.sr x 2x*
|
||||
.sr tr 2tr*
|
||||
.sr w 2w*
|
||||
.sr wb 2wb*
|
||||
.sr total 2total*
|
||||
.sr contents 2contents*
|
||||
.sr count_words 2count_words*
|
||||
.sr next_word 2next_word*
|
||||
.sr wordbag 2wordbag*
|
||||
.sr wordtree 2wordtree*
|
||||
.sr wordbags 2wordbags*
|
||||
.sr wordtrees 2wordtrees*
|
||||
.sr insert 2insert*
|
||||
.sr create 2create*
|
||||
.sr print 2print*
|
||||
.sr instream 2instream*
|
||||
.sr instreams 2instreams*
|
||||
.sr outstream 2outstream*
|
||||
.sr outstreams 2outstreams*
|
||||
.
|
||||
.chapter "An Example of Data Abstraction"
|
||||
.para
|
||||
This section introduces the basic data
|
||||
abstraction mechanism of CLU, the cluster.
|
||||
By means of an example, we intend to show how
|
||||
abstractions occur naturally in program design,
|
||||
and how they are used and implemented in CLU.
|
||||
In particular, we show how a data abstraction
|
||||
can be used as structured intermediate storage.
|
||||
.para
|
||||
Consider the following problem:
|
||||
Given some document, we wish to compute,
|
||||
for each distinct word in the document,
|
||||
the number of times the word occurs
|
||||
and its frequency of occurrence as a percentage of the total
|
||||
number of words.
|
||||
The document will be
|
||||
represented as a sequence of characters.
|
||||
A word is any non-empty sequence of
|
||||
alphabetic characters.
|
||||
Adjacent words are
|
||||
separated by one or more non-alphabetic
|
||||
characters such as spaces, punctuation, or newline
|
||||
characters.
|
||||
In recognizing distinct words, the
|
||||
difference between upper and lower case letters should
|
||||
be ignored.
|
||||
.para
|
||||
The output is also to be a sequence of characters,
|
||||
divided into lines.
|
||||
Successive lines should contain an alphabetical
|
||||
list of all the distinct words in the document,
|
||||
one word per line.
|
||||
Accompanying each word should
|
||||
be the total number of occurrences and the
|
||||
.ne 5
|
||||
frequency of occurrence. For example:
|
||||
.table
|
||||
.ta 8 20 28
|
||||
a 2 3.509%
|
||||
access 1 1.754%
|
||||
and 2 3.509%
|
||||
dots
|
||||
.rtabs
|
||||
.end_table
|
||||
.para
|
||||
Specifically, we are required to write the
|
||||
procedure count_words, which takes two arguments:
|
||||
an instream and an outstream.
|
||||
The former is the
|
||||
source of the document to be processed, and the latter
|
||||
is the destination of the required output.
|
||||
.ne 5
|
||||
The form of this procedure will be
|
||||
.code
|
||||
count_words = proc (i: instream, o: outstream);
|
||||
dots
|
||||
end count_words;
|
||||
.end_code
|
||||
Note that count_words does not return any results;
|
||||
its only effects are modifications of i (reading the entire
|
||||
document) and of o (printing the required statistics).
|
||||
.para
|
||||
2Instream* and outstream are data abstractions.
|
||||
An instream i contains a sequence of characters.
|
||||
Of the primitive
|
||||
operations on instreams, only two will be of interest to us.
|
||||
2Empty@(i)* returns true if there are no characters available
|
||||
in i, and returns false otherwise.
|
||||
2Next@(i)* removes the first character from the sequence
|
||||
and returns it.
|
||||
Invoking the next operation on an empty instream is an
|
||||
error.
|
||||
.foot
|
||||
The CLU error handling mechanism is discussed in [LCS75].
|
||||
.efoot
|
||||
An outstream also contains a sequence of characters.
|
||||
The interesting operation on outstreams is
|
||||
2put_string@(s,@o)*,
|
||||
which appends the string s to the existing sequence of characters
|
||||
in o.
|
||||
.para
|
||||
Now consider how we might implement count_words.
|
||||
We begin by deciding how to handle words.
|
||||
We could define a new abstract data type 2word*.
|
||||
However, we choose instead to use strings (a primitive
|
||||
CLU type), with the restriction that only strings of
|
||||
lower-case alphabetic characters will be used.
|
||||
.foot
|
||||
Sometimes it is difficult to decide whether to introduce
|
||||
a new data abstraction or to use an existing abstraction.
|
||||
Our decision to use strings to represent words was made
|
||||
partly to shorten the presentation.
|
||||
.efoot
|
||||
.para
|
||||
Next, we investigate how to scan the document.
|
||||
Reading a word requires knowledge of the
|
||||
exact way in which words occur in the input stream.
|
||||
We choose to isolate this information in a procedural abstraction,
|
||||
called next_word,
|
||||
which takes in the instream i and returns the next word
|
||||
(converted to lower case characters) in the document.
|
||||
If there are no more words,
|
||||
next_word must communicate this fact to count_words.
|
||||
A simple way to indicate that there are no
|
||||
more words is by returning an ``end of document'' word,
|
||||
one that is distinct from any other word.
|
||||
A reasonable choice for the ``end of document'' word is
|
||||
the empty string.
|
||||
.para
|
||||
It is clear that in count_words we must scan the
|
||||
entire document before we can print our results, and
|
||||
therefore, we need some receptacle
|
||||
to retain information about words between these two
|
||||
actions (scanning and printing).
|
||||
Recording the
|
||||
information gained in the scan and organizing it
|
||||
for easy printing will probably be fairly complex.
|
||||
Therefore, we will defer such considerations until later
|
||||
by introducing a data abstraction wordbag with the
|
||||
appropriate properties.
|
||||
In particular, wordbag provides
|
||||
three operations: create, which creates an empty wordbag;
|
||||
insert, which adds a word to the wordbag; and print, which
|
||||
prints the desired statistical information about the words
|
||||
in the wordbag.
|
||||
.foot
|
||||
The print operation is not the ideal choice, but a better
|
||||
solution requires the use of control abstractions.
|
||||
This solution is presented in Section more_abstraction.
|
||||
.efoot
|
||||
.nr count_words current_figure
|
||||
.para
|
||||
The implementation of count_words is shown in
|
||||
Figure count_words.
|
||||
.begin_figure "The count_words procedure."
|
||||
.code
|
||||
count_words = proc (i: instream, o: outstream);
|
||||
|
||||
% create an empty wordbag
|
||||
wb: wordbag := wordbag$create ();
|
||||
|
||||
% scan document, adding each word found to wb
|
||||
w: string := next_word (i);
|
||||
while w ~= "" do
|
||||
wordbag$insert (wb, w);
|
||||
w := next_word (i);
|
||||
end;
|
||||
|
||||
% print the wordbag
|
||||
wordbag$print (wb, o);
|
||||
|
||||
end count_words;
|
||||
.ns
|
||||
.end_code
|
||||
.finish_figure
|
||||
The ``%'' character starts a comment,
|
||||
which continues to the end of the line.
|
||||
The ``~'' character stands for boolean negation.
|
||||
The notation 2variable:@type* is used
|
||||
in formal argument lists and declarations
|
||||
to specify the types of variables;
|
||||
a declaration may be combined with an assignment
|
||||
specifying the initial value of the variable.
|
||||
Boldface is used for reserved words, including the
|
||||
names of primitive CLU types.
|
||||
CLU does not permit
|
||||
redefinition of the primitive types; however,
|
||||
primitive types are used in the same way as abstract
|
||||
types.
|
||||
.para
|
||||
The count_words procedure declares four variables:
|
||||
i, o, wb, and w.
|
||||
The first two denote the instream and
|
||||
outstream that are passed as arguments to count_words.
|
||||
The third, wb, denotes the wordbag used to hold
|
||||
the words read so far,
|
||||
and the fourth, w, the word
|
||||
currently being processed.
|
||||
.para
|
||||
Operations of a data abstraction are named by
|
||||
a compound form that specifies both the type and
|
||||
the operation name. Three examples of operation calls
|
||||
appear in count_words: 2wordbag$create@()*,
|
||||
2wordbag$insert@(wb,@w)*
|
||||
and 2wordbag$print@(wb,@o)*.
|
||||
The CLU system provides a mechanism that avoids conflicts
|
||||
between names of abstractions; this mechanism is discussed in
|
||||
Section library.
|
||||
However, operations of two different data abstractions may have
|
||||
the same name;
|
||||
the compound form serves to resolve this ambiguity.
|
||||
Although the ambiguity could in most cases be resolved by context,
|
||||
we have found in using CLU that the compound
|
||||
form enhances the readability of programs.
|
||||
.nr next_word current_figure
|
||||
.para
|
||||
The implementation of next_word is shown in
|
||||
Figure next_word.
|
||||
.begin_figure "The next_word procedure."
|
||||
.code
|
||||
next_word = proc (i: instream) returns (string);
|
||||
|
||||
c: char := 1' '*;
|
||||
|
||||
% scan for first alphabetic character
|
||||
while ~alpha (c) do
|
||||
if instream$empty (i)
|
||||
then return "";
|
||||
end;
|
||||
c := instream$next (i);
|
||||
end;
|
||||
|
||||
% accumulate characters in word
|
||||
w: string := "";
|
||||
while alpha (c) do
|
||||
w := string$append (w, c);
|
||||
if instream$empty (i)
|
||||
then return w;
|
||||
end;
|
||||
c := instream$next (i);
|
||||
end;
|
||||
|
||||
return w; % the non-alphabetic character c is lost
|
||||
|
||||
end next_word;
|
||||
.ns
|
||||
.end_code
|
||||
.finish_figure
|
||||
The 2string$append* operation creates a new string
|
||||
by appending a character to the characters in the
|
||||
string argument
|
||||
(it does 2not* modify the string argument).
|
||||
Note the use of the instream operations
|
||||
2next* and 2empty*.
|
||||
Note also that two additional procedures have been used:
|
||||
2alpha@(c)*,
|
||||
which tests whether a character is alphabetic or not,
|
||||
and 2lower_case@(c)*,
|
||||
which returns the lower case version of a character.
|
||||
The implementations of these procedures are not shown in the paper.
|
||||
.para
|
||||
Now we must implement the type wordbag.
|
||||
.ne 5
|
||||
The cluster will have the form
|
||||
.code
|
||||
wordbag = cluster is create, insert, print;
|
||||
dots
|
||||
end wordbag;
|
||||
.end_code
|
||||
This form expresses the idea that the data abstraction is a set
|
||||
of operations as well as a set of objects.
|
||||
The cluster must
|
||||
provide a representation for objects of the type wordbag and
|
||||
an implementation for each of the operations.
|
||||
We are free to choose from the possible representations the
|
||||
one best suited to our use of the wordbag cluster.
|
||||
.para
|
||||
The representation that we choose should allow
|
||||
reasonably efficient storage of words and easy printing,
|
||||
in alphabetic order, of the words and associated statistics.
|
||||
For efficiency in computing the statistics, maintaining
|
||||
a count of the total number of words in the document
|
||||
would be helpful.
|
||||
Since the total number of words in the document is probably
|
||||
much larger than the number of distinct words, the
|
||||
representation of a wordbag should contain only one ``item'' for
|
||||
each distinct word (along with a multiplicity count), rather
|
||||
than one ``item'' for each occurrence.
|
||||
This choice of representation requires that, at
|
||||
each insertion, we check whether the new word is already
|
||||
present in the wordbag.
|
||||
We would like a representation that
|
||||
allows the search for a matching ``item'' and the insertion of a
|
||||
not-previously-present ``item'' to be efficient.
|
||||
A binary tree representation [Knu73] fits our requirements nicely.
|
||||
.para
|
||||
Thus the main part of the wordbag representation will
|
||||
consist of a binary tree.
|
||||
The binary tree is another data abstraction,
|
||||
wordtree. The data abstraction wordtree
|
||||
provides operations very similar to those of wordbag:
|
||||
2create@()* returns an empty wordtree;
|
||||
2insert@(tr,@w)* returns a wordtree containing all the
|
||||
words in the wordtree tr plus the additional word w
|
||||
(the wordtree tr may be modified in the process);
|
||||
and 2print@(tr,@n,@o)* prints the contents of the
|
||||
wordtree tr in alphabetic order on outstream o, along with the
|
||||
number of occurrences and the frequency (based on a total of
|
||||
n words).
|
||||
.nr wordbag current_figure
|
||||
.para
|
||||
The implementation of wordbag is given in Figure wordbag.
|
||||
.begin_figure "The wordbag cluster."
|
||||
.code
|
||||
wordbag = cluster is
|
||||
create, % create an empty bag
|
||||
insert, % insert an element
|
||||
print; % print contents of bag
|
||||
|
||||
rep = record [contents: wordtree, total: int];
|
||||
|
||||
create = proc () returns (cvt);
|
||||
return rep${contents: wordtree$create (), total: 0};
|
||||
end create;
|
||||
|
||||
insert = proc (x: cvt, v: string);
|
||||
x.contents := wordtree$insert (x.contents, v);
|
||||
x.total := x.total + 1;
|
||||
end insert;
|
||||
|
||||
print = proc (x: cvt, o: outstream);
|
||||
wordtree$print (x.contents, x.total, o);
|
||||
end print;
|
||||
|
||||
end wordbag;
|
||||
.ns
|
||||
.end_code
|
||||
.finish_figure
|
||||
Following the header, we find the definition of the
|
||||
.ne 3
|
||||
representation selected for wordbag objects:
|
||||
.code
|
||||
rep = record [contents: wordtree, total: int];
|
||||
.end_code
|
||||
The reserved type identifier rep indicates that the type
|
||||
specification to the right of the equal sign is the representing
|
||||
type for the cluster.
|
||||
We have defined the representation of a wordbag object to
|
||||
consist of two pieces: a wordtree,
|
||||
as explained above, and an integer, which records the total
|
||||
number of words in the wordbag.
|
||||
.para
|
||||
A CLU record is an object with one or more named
|
||||
components.
|
||||
For each component name, there is an operation to select
|
||||
and an operation to set the corresponding component.
|
||||
The operation 2get_n@(r)* returns the n component
|
||||
of the record r (this operation is usually
|
||||
abbreviated 2r.n*).
|
||||
The operation 2put_n@(r,@x)* makes x the n component
|
||||
of the record r (this operation is usually
|
||||
abbreviated 2r.n@*:=2@x*,
|
||||
by analogy with the assignment statement).
|
||||
A new record is created by an expression of the form
|
||||
type${name1: value1, dots}.
|
||||
.para
|
||||
There are two different
|
||||
types associated with any cluster: the abstract
|
||||
type being defined (wordbag in this case) and the
|
||||
representation type (the record).
|
||||
Outside of the cluster,
|
||||
type-checking will ensure that a wordbag object will always be
|
||||
treated as such.
|
||||
In particular, the ability to convert a wordbag object into its
|
||||
representation is not provided (unless one of the
|
||||
wordbag operations does so explicitly).
|
||||
.para
|
||||
Inside the cluster, however, it is necessary to view
|
||||
a wordbag object as being of the representation type,
|
||||
because the implementations of the
|
||||
operations are defined in terms of the representation.
|
||||
This change of viewpoint is signalled by having the
|
||||
reserved word cvt appear as the type of an
|
||||
argument (as in the insert and print operations).
|
||||
1Cvt* may also appear as a return type
|
||||
(as in the create operation);
|
||||
here it indicates that a returned object
|
||||
will be changed into an object of abstract type.
|
||||
Whether cvt appears as the type of an
|
||||
argument or as a return type,
|
||||
it stipulates a ``conversion'' of viewpoint
|
||||
between the external abstract type and the internal representation type.
|
||||
1Cvt* can be used only within a cluster,
|
||||
and conversion can be done only between the single abstract
|
||||
type being defined and the (single) representation type.
|
||||
.foot
|
||||
1Cvt* corresponds to Morris' seal and unseal [Mor73],
|
||||
except that 1cvt* represents a change in viewpoint only;
|
||||
no computation is required.
|
||||
.efoot
|
||||
.para
|
||||
The procedures in wordbag are very simple.
|
||||
2Create* builds a new instance of the rep by use of the
|
||||
.ne 3
|
||||
record constructor
|
||||
.code
|
||||
rep${contents: wordtree$create (), total: 0}
|
||||
.end_code
|
||||
Here total is initialized to 0, and contents to the
|
||||
empty wordtree (by calling the create operation of wordtree).
|
||||
This rep object is converted to a wordbag object as it
|
||||
is being returned.
|
||||
2Insert* and print are implemented directly
|
||||
in terms of wordtree operations.
|
||||
.nr wordtree current_figure
|
||||
.para
|
||||
The implementation of wordtree is shown in Figure wordtree.
|
||||
In the wordtree representation, each node
|
||||
contains a word and the number of times that word has been
|
||||
inserted into the wordbag, as well as two subtrees.
|
||||
.begin_page_figure "The wordtree cluster."
|
||||
.code
|
||||
wordtree = cluster is
|
||||
create, % create empty contents
|
||||
insert, % add item to contents
|
||||
print; % print contents
|
||||
|
||||
node = record [m!value: string, count: int,
|
||||
(mark!m)lesser: wordtree, greater: wordtree];
|
||||
rep = oneof [empty: null, non_empty: node];
|
||||
|
||||
create = proc () returns (cvt);
|
||||
return rep$make_empty (nil);
|
||||
end create;
|
||||
|
||||
insert = proc (x: cvt, v: string) returns (cvt);
|
||||
tagcase x
|
||||
tag empty:
|
||||
n: node := node${m!value: v, count: 1,
|
||||
(mark!m)lesser: wordtree$create (),
|
||||
(mark!m)greater: wordtree$create ()};
|
||||
return rep$make_non_empty (n);
|
||||
tag non_empty (n: node):
|
||||
if v = n.value
|
||||
then n.count := n.count + 1;
|
||||
elseif v < n.value
|
||||
then n.lesser := wordtree$insert (n.lesser, v);
|
||||
else n.greater := wordtree$insert (n.greater, v);
|
||||
end;
|
||||
return x;
|
||||
end;
|
||||
end insert;
|
||||
|
||||
print = proc (x: cvt, total: int, o: outstream);
|
||||
tagcase x
|
||||
tag empty: ;
|
||||
tag non_empty (n: node):
|
||||
wordtree$print (n.lesser, total, o);
|
||||
print_word (n.value, n.count, total, o);
|
||||
wordtree$print (n.greater, total, o);
|
||||
end;
|
||||
end print;
|
||||
|
||||
end wordtree;
|
||||
.ns
|
||||
.end_code
|
||||
.finish_figure
|
||||
For any
|
||||
particular node, the words in the ``lesser'' subtree must
|
||||
alphabetically precede the word in the node, and the words
|
||||
in the ``greater'' subtree must follow the word in the node.
|
||||
.ne 4
|
||||
This information is described by
|
||||
.code
|
||||
node = record [m!value: string, count: int,
|
||||
(mark!m)lesser: wordtree, greater: wordtree];
|
||||
.end_code
|
||||
which defines ``node'' to be an
|
||||
abbreviation for the information following
|
||||
the equal sign.
|
||||
(The reserved word rep is used similarly,
|
||||
as an abbreviation for the representation type.)
|
||||
.para
|
||||
Now consider the representation of wordtrees.
|
||||
A non-empty wordtree can be represented by its top node.
|
||||
An empty wordtree, however, contains no information.
|
||||
The ideal type to represent an empty wordtree
|
||||
is the CLU type null,
|
||||
which has a single data object nil.
|
||||
So the representation of a wordtree should
|
||||
be either a node or nil.
|
||||
.ne 3
|
||||
This representation is expressed by
|
||||
.code
|
||||
rep = oneof [empty: null, non_empty: node];
|
||||
.end_code
|
||||
.para
|
||||
Just as the record is the basic CLU
|
||||
mechanism to form an object
|
||||
that is a collection of other objects,
|
||||
the oneof is the basic CLU mechanism to form an object
|
||||
that is ``one of'' a set of alternatives.
|
||||
Oneof is CLU's method of forming a
|
||||
discriminated union, and is somewhat similar to
|
||||
a variant component of a record in Pascal [Wir71b].
|
||||
.para
|
||||
An object of the type oneof@[s1:@T1 dots sn:@Tn]
|
||||
can be thought of as a pair.
|
||||
The ``tag'' component is an
|
||||
identifier from the set {s1 dots sn}.
|
||||
The ``value''
|
||||
component is an object of the type corresponding to the
|
||||
tag.
|
||||
That is, if the tag component is si then the
|
||||
value is some object of type Ti.
|
||||
.para
|
||||
Objects of type oneof@[s1:@T1 dots sn:@Tn]
|
||||
are created by the operations 2make_si@(x)*, each of
|
||||
which takes an object x of type Ti
|
||||
and returns the pair <si,@x>.
|
||||
Because the type of the value component of a oneof object is not
|
||||
known at compile-time, allowing direct access
|
||||
to the value component
|
||||
could result in a run-time type error (e.g., assigning an object
|
||||
to a variable of the wrong type). To eliminate this possibility,
|
||||
.ne 7
|
||||
we require the use of a special tagcase statement to decompose
|
||||
a oneof object:
|
||||
.code
|
||||
tagcase e
|
||||
tag s1 (id1: T1): @@@m!statements dots
|
||||
dots
|
||||
tag sn (idn: Tn):(mark!m)statements dots
|
||||
end;
|
||||
.end_code
|
||||
This statement evaluates the expression 2e*
|
||||
to obtain an object of type
|
||||
oneof@[s1:@T1@dots@sn:@Tn].
|
||||
If the tag is si,
|
||||
then the value is assigned to the new variable
|
||||
idi and the statements following the ith alternative
|
||||
are executed.
|
||||
The variable idi is local to those statements.
|
||||
If, for some reason, we do not need the value,
|
||||
we can omit the parenthesized variable declaration.
|
||||
.para
|
||||
The reader should now know enough to understand
|
||||
Figure wordtree.
|
||||
Note, in the create operation, the use
|
||||
of the construction operation 2make_empty*
|
||||
of the representation type of wordtree
|
||||
(the discriminated union oneof@[empty:@null,@non-empty:@node])
|
||||
to create the empty wordtree.
|
||||
The tagcase statement is used in both insert and print.
|
||||
Note that if insert is given an empty wordtree, it creates a
|
||||
new top node for the returned value,
|
||||
but if insert is given a non-empty wordtree,
|
||||
it modifies the given wordtree and returns it.
|
||||
.foot
|
||||
It is necessary for insert to return a value in addition to
|
||||
having a side-effect because, in the case of an empty wordtree
|
||||
argument, side-effects are not possible. Side-effects are not
|
||||
possible because of the representation chosen for the empty
|
||||
wordtree and because of the CLU parameter passing mechanism
|
||||
(see Section semantics).
|
||||
.efoot
|
||||
The insert operation depends on the dynamic
|
||||
allocation of space for newly-created records (see
|
||||
Section semantics).
|
||||
.para
|
||||
The print operation uses the obvious recursive descent.
|
||||
It makes use of procedure
|
||||
2print_word@(w,@c,@t,@o)*, which generates a single line of
|
||||
output on 2o*, consisting of the word 2w*,
|
||||
the count 2c*, and the frequency of occurrence
|
||||
derived from 2c* and 2t*.
|
||||
The implementation of 2print_word* has been omitted.
|
||||
.para
|
||||
We have now completed
|
||||
our first discussion of the count_words procedure.
|
||||
We return to this problem in Section more_abstraction,
|
||||
where we present a superior solution.
|
||||
181
doc/clu/clup3.r
Normal file
181
doc/clu/clup3.r
Normal file
@@ -0,0 +1,181 @@
|
||||
.nd chapter 3-1
|
||||
.nd current_figure 5
|
||||
.nd wordbag 3
|
||||
.so clu/clupap.header
|
||||
.sr m 2m*
|
||||
.sr p 2p*
|
||||
.sr q 2q*
|
||||
.sr x 2x*
|
||||
.sr y 2y*
|
||||
.sr z 2z*
|
||||
.sr a 2a*
|
||||
.sr b 2b*
|
||||
.sr insert 2insert*
|
||||
.sr increment 2increment*
|
||||
.chapter "Semantics"
|
||||
.para
|
||||
All languages present their users with some model of computation.
|
||||
This section describes those aspects of CLU semantics that differ
|
||||
from the common ALGOL-like model.
|
||||
In particular, we discuss
|
||||
CLU's notions of objects and variables,
|
||||
and the definitions of assignment and argument passing that
|
||||
follow from these notions.
|
||||
We also discuss type correctness.
|
||||
.section "Objects and Variables"
|
||||
.para
|
||||
The basic elements of CLU semantics are
|
||||
2objects* and 2variables*.
|
||||
Objects are the data entities that are created and manipulated
|
||||
by CLU programs.
|
||||
Variables are just the names used in a
|
||||
program to refer to objects.
|
||||
.para
|
||||
In CLU, each object has a particular 2type*,
|
||||
which characterizes its behavior.
|
||||
A type defines a set of operations
|
||||
that create and manipulate objects of that type.
|
||||
An object
|
||||
may be created and manipulated only via the operations of its type.
|
||||
.para
|
||||
An object may 2refer* to objects.
|
||||
For example,
|
||||
a record object refers to the objects that are the components
|
||||
of the record.
|
||||
This notion is one of logical, not physical, containment.
|
||||
In particular, it is possible for two distinct record objects to
|
||||
refer to (or 2share*) the same component object.
|
||||
In the case of a cyclic structure, it is even possible for an object
|
||||
to ``contain'' itself.
|
||||
Thus, it is possible to have recursive data
|
||||
structure definitions and shared data objects without explicit
|
||||
reference types.
|
||||
The 2wordtree* type described in the previous
|
||||
section is an example of a recursively-defined data structure.
|
||||
(This notion of object is similar to that in LISP.)
|
||||
.para
|
||||
CLU objects exist independently of procedure activations.
|
||||
Space for objects is allocated from a dynamic storage area
|
||||
as the result of invoking
|
||||
constructor operations of certain primitive CLU types.
|
||||
For example,
|
||||
the record constructor is used in the implementation of 2wordbag*
|
||||
(Figure wordbag) to acquire space for new 2wordbag* objects.
|
||||
In theory, all objects continue to exist forever.
|
||||
In practice,
|
||||
the space used by an object may be reclaimed when that object is
|
||||
no longer accessible to any CLU program.
|
||||
.foot
|
||||
An object is accessible if it is denoted by a variable of an active
|
||||
procedure or is a component of an accessible object.
|
||||
.efoot
|
||||
.para
|
||||
An object may exhibit time-varying behavior.
|
||||
Such an object, called a 2mutable* object,
|
||||
has a state which may be modified by certain operations
|
||||
without changing the identity of the object.
|
||||
Records are examples of mutable objects.
|
||||
The record update operations (2put_s (r,@v)*,
|
||||
written as 2r*.2s*@:=@2v*
|
||||
in the examples) change the state of record objects and
|
||||
therefore affect the behavior of subsequent applications of
|
||||
the select operations (2get_s (r)*, written as 2r*.2s*).
|
||||
The 2wordbag* and 2wordtree* types are additional examples
|
||||
of types with mutable objects.
|
||||
.para
|
||||
If a mutable object m is shared by two other objects x and y,
|
||||
then a modification to m made via x will be visible when m is
|
||||
examined via y.
|
||||
Communication through
|
||||
shared mutable objects is most beneficial in the context
|
||||
of procedure invocation, described below.
|
||||
.para
|
||||
Objects that do not exhibit time-varying behavior are called
|
||||
2immutable* objects, or 2constants*.
|
||||
Examples of constants are integers, booleans,
|
||||
characters, and strings.
|
||||
The value of a constant object can not be modified.
|
||||
For example,
|
||||
new strings may be computed from old ones,
|
||||
but existing strings do not change.
|
||||
Similarly,
|
||||
none of the integer operations
|
||||
modify the integers passed to them as arguments.
|
||||
.para
|
||||
Variables are names used in CLU programs to 2denote*
|
||||
particular objects at execution time.
|
||||
Unlike variables in many common programming languages,
|
||||
which 2are* objects that 2contain* values,
|
||||
CLU variables are simply names
|
||||
that the programmer uses to refer to objects.
|
||||
As such, it is possible for two variables to denote
|
||||
(or 2share*) the same object.
|
||||
CLU variables are much like those in LISP,
|
||||
and are similar to pointer variables in other languages.
|
||||
However, CLU variables are 2not* objects;
|
||||
they cannot be denoted by other variables or referred to by objects.
|
||||
Thus, variables are completely private to the procedure
|
||||
in which they are declared,
|
||||
and cannot be accessed or modified by any other procedure.
|
||||
.section "Assignment and Procedure Invocation"
|
||||
.para
|
||||
The basic actions in CLU are 2assignment* and
|
||||
2procedure invocation*.
|
||||
The assignment primitive 2x*@:=@2E*, where x is a variable
|
||||
and 2E* is an expression, causes x to denote
|
||||
the object resulting from the evaluation of 2E*.
|
||||
For example,
|
||||
if 2E* is a simple variable y, then the assignment x@:=@y
|
||||
causes x to denote the object denoted by y.
|
||||
The object
|
||||
is 2not* copied; after the assignment is performed, it will be
|
||||
2shared* by x and y.
|
||||
Assignment does not affect
|
||||
the state of any object.
|
||||
(Recall that 2r*.2s*@:=@2v* is not a true assignment,
|
||||
but an abbreviation for 2put_s@(r,@v)*.)
|
||||
.para
|
||||
Procedure invocation involves passing argument objects
|
||||
from the caller to the called procedure and returning result
|
||||
objects from the procedure to the caller.
|
||||
The formal arguments
|
||||
of a procedure are considered to be local variables of the procedure,
|
||||
and are initialized, by assignment, to the objects resulting from the
|
||||
evaluation of the argument expressions. Thus, argument
|
||||
objects are shared between the caller and the called procedure.
|
||||
A procedure may modify mutable argument objects (e.g., records),
|
||||
but of course it cannot modify immutable ones (e.g., integers).
|
||||
A procedure has no access to the variables of its caller.
|
||||
.para
|
||||
Procedure invocations may be
|
||||
used directly as statements; those
|
||||
that return objects may also be used as expressions.
|
||||
Arbitrary recursive procedures are permitted.
|
||||
.ne 5
|
||||
.section "Type Correctness"
|
||||
.para
|
||||
Every variable in a CLU module must be declared;
|
||||
the declaration specifies the type of object
|
||||
that the variable may denote.
|
||||
All assignments to a variable must satisfy
|
||||
the variable's declaration.
|
||||
Because argument passing is defined
|
||||
in terms of assignment, the types of actual
|
||||
argument objects must be consistent with the declarations of the
|
||||
corresponding formal arguments.
|
||||
.para
|
||||
These restrictions, plus the restriction that only the code
|
||||
in a cluster may use cvt to convert between the abstract
|
||||
and representation types, ensure that the behavior of an object
|
||||
is indeed characterized completely by the operations of its type.
|
||||
For example, the type restrictions ensure that
|
||||
the only modification possible to a record object that represents
|
||||
a 2wordbag* (Figure wordbag) is the modification performed by
|
||||
the insert operation.
|
||||
.para
|
||||
Type-checking is performed on a module-by-module basis
|
||||
at compile-time (it could also be done at run-time).
|
||||
This checking can catch all type errors -- even those involving
|
||||
inter-module references -- because the CLU library maintains the
|
||||
necessary type information for all modules
|
||||
(see Section 5.)
|
||||
494
doc/clu/clup4.r
Normal file
494
doc/clu/clup4.r
Normal file
@@ -0,0 +1,494 @@
|
||||
.nd chapter 4-1
|
||||
.nd current_figure 7
|
||||
.so clu/clupap.header
|
||||
.
|
||||
.sr words 2words*
|
||||
.sr wordbag 2wordbag*
|
||||
.sr sorted_bag 2sorted_bag*
|
||||
.sr sorted_bags 2sorted_bags*
|
||||
.sr wordtree 2wordtree*
|
||||
.sr tree 2tree*
|
||||
.sr node 2node*
|
||||
.sr r 2r*
|
||||
.sr x 2x*
|
||||
.sr t 2t*
|
||||
.sr count_words 2count_words*
|
||||
.sr count_numeric 2count_numeric*
|
||||
.sr lt 2lt*
|
||||
.sr equal 2equal*
|
||||
.sr print 2print*
|
||||
.sr string_chars 2string_chars*
|
||||
.sr create 2create*
|
||||
.sr insert 2insert*
|
||||
.sr size 2size*
|
||||
.sr increasing 2increasing*
|
||||
.sr s 2s*
|
||||
.sr n 2n*
|
||||
.sr index 2index*
|
||||
.sr limit 2limit*
|
||||
.sr count 2count*
|
||||
.sr next_word 2next_word*
|
||||
.sr elements 2elements*
|
||||
.sr reverse_elements 2reverse_elements*
|
||||
.
|
||||
.
|
||||
.chapter "More Abstraction Mechanisms"
|
||||
.para
|
||||
In this section we continue our discussion of
|
||||
abstraction mechanisms in CLU.
|
||||
A generalization of the 2wordbag* abstraction,
|
||||
called 2sorted_bag*,
|
||||
is presented as an illustration of parameterized clusters,
|
||||
which are a means for implementing
|
||||
more generally applicable data abstractions.
|
||||
The presentation of 2sorted_bag*
|
||||
is also used to motivate the introduction of a control
|
||||
abstraction called an 2iterator*,
|
||||
which is a mechanism for incrementally generating
|
||||
the elements of a collection of objects.
|
||||
Finally, we show an implementation of the sorted_bag
|
||||
abstraction and illustrate how sorted_bag
|
||||
can be used in implementing count_words.
|
||||
.section "Properties of the Sorted_bag Abstraction"
|
||||
.para
|
||||
In the count_words procedure given earlier,
|
||||
a data abstraction called wordbag was used.
|
||||
A wordbag object is a collection of strings,
|
||||
each with an associated count.
|
||||
Strings are inserted into a wordbag object one at a time.
|
||||
Strings in a wordbag object may be printed in alphabetical order,
|
||||
each with a count of the number of times it was inserted.
|
||||
.para
|
||||
Although wordbag has properties that are specific to the usage
|
||||
in count_words,
|
||||
it also has properties in common with a more general abstraction,
|
||||
sorted_bag.
|
||||
A bag is similar to a set
|
||||
(it is sometimes called a multi-set)
|
||||
except that an item can appear in a bag many times.
|
||||
For example, if the integer 1 is inserted in the set {1,2},
|
||||
the result is the set {1,2},
|
||||
but if 1 is inserted in the bag {1,2},
|
||||
the result is the bag {1,1,2}.
|
||||
A sorted_bag is a bag that affords access
|
||||
to the items it contains
|
||||
according to an ordering relation on the items.
|
||||
.para
|
||||
The concept of a sorted_bag is meaningful not only for strings
|
||||
but for many types of items.
|
||||
Therefore, we would like to parameterize the sorted_bag abstraction,
|
||||
the parameter being the type of item to be collected
|
||||
in the sorted_bag objects.
|
||||
.para
|
||||
Most programming languages provide built-in parameterized
|
||||
data abstractions.
|
||||
For example, the concept of an array is a parameterized
|
||||
data abstraction.
|
||||
.ne 3
|
||||
An example of a use of arrays in Pascal is
|
||||
.code
|
||||
1array* 1..n 1of* 1integer*
|
||||
.end_code
|
||||
These arrays have two parameters,
|
||||
one specifying the array bounds (1..n)
|
||||
and one specifying the type of element in the array (integer).
|
||||
In CLU we provide mechanisms allowing user-defined
|
||||
data abstractions (like sorted_bag) to be parameterized.
|
||||
.para
|
||||
In the sorted_bag abstraction,
|
||||
not all types of items make sense.
|
||||
Only types that define a total ordering on their objects
|
||||
are meaningful,
|
||||
since the sorted_bag abstraction depends on the presence
|
||||
of this ordering.
|
||||
In addition, information about the ordering must be
|
||||
expressed in a way that is useful for programming.
|
||||
A natural way to express this information
|
||||
is by means of operations of the item type.
|
||||
Therefore, we require that the item type provide
|
||||
less than and equal operations
|
||||
(called lt and equal).
|
||||
.ne 5
|
||||
This constraint is expressed in the header for sorted_bag:
|
||||
.code
|
||||
sorted_bag = cluster [t: type] is create, insert, dots
|
||||
where t has
|
||||
lt, equal: proctype (t, t) returns (bool);
|
||||
.end_code
|
||||
The item type t is a 2formal parameter* of the sorted_bag
|
||||
cluster; whenever the sorted_bag abstraction is used,
|
||||
.ne 3
|
||||
the item type must be specified as an 2actual parameter*, e.g.,
|
||||
.code
|
||||
sorted_bag[string]
|
||||
.end_code
|
||||
.para
|
||||
The information about required operations
|
||||
informs the programmer about legitimate uses of sorted_bag.
|
||||
The compiler will check each use of sorted_bag to ensure
|
||||
that the item type provides the required operations.
|
||||
The where clause specifies exactly the information
|
||||
that the compiler can check.
|
||||
Of course, more is assumed about the item type 2t*
|
||||
than the presence of
|
||||
operations with appropriate names and functionalities:
|
||||
these operations must also define a total ordering on the items.
|
||||
Although we expect formal and complete specifications
|
||||
for data abstractions to be included in the CLU library eventually,
|
||||
we do not include in the CLU language declarations
|
||||
that the compiler cannot check.
|
||||
This point is discussed further in Section discussion.
|
||||
.para
|
||||
Now that we have decided to define a
|
||||
sorted_bag abstraction that works for many item types,
|
||||
we must decide what operations this abstraction provides.
|
||||
When an abstraction (like wordbag)
|
||||
is written for a very specific purpose,
|
||||
it is reasonable to have
|
||||
some specialized operations.
|
||||
For a more general abstraction,
|
||||
the operations should be more generally useful.
|
||||
.para
|
||||
The 2print* operation is a case in point.
|
||||
Printing is only one possible use of the information contained
|
||||
in a 2sorted_bag*.
|
||||
It was the only use in the case of 2wordbag*,
|
||||
so it was reasonable to have a 2print* operation.
|
||||
However, if 2sorted_bags* are to be generally useful,
|
||||
there should be some way for the user to obtain
|
||||
the elements of the 2sorted_bag*; the user can then
|
||||
perform some action on the elements (for example, print them).
|
||||
.para
|
||||
What we would like is an operation on sorted_bags
|
||||
that makes all of the elements available to the caller
|
||||
in increasing order.
|
||||
One possible approach is to map
|
||||
the elements of a sorted_bag
|
||||
into a sequence object,
|
||||
a solution potentially requiring a large amount of space.
|
||||
A more efficient method is provided by CLU and is discussed below.
|
||||
This solution computes the sequence
|
||||
one element at a time, thus saving space.
|
||||
If only part of the sequence is used
|
||||
(as in a search for some element),
|
||||
then execution time can be saved as well.
|
||||
.section "Control Abstractions"
|
||||
.para
|
||||
The purpose of many loops is to perform an action
|
||||
on some or all of the objects in a collection.
|
||||
For such loops,
|
||||
it is often useful to separate the
|
||||
selection of the next object
|
||||
from the action performed on that object.
|
||||
CLU provides a control abstraction that permits
|
||||
a complete decomposition of the two activities.
|
||||
The for statement available in many programming languages
|
||||
provides a limited ability in this direction:
|
||||
it iterates over ranges of integers.
|
||||
The CLU for statement
|
||||
can iterate over collections of any
|
||||
type of object.
|
||||
The selection of the next object in the collection
|
||||
is done by a user-defined 2iterator*.
|
||||
The iterator
|
||||
produces the objects in the collection one at a time
|
||||
(the entire collection need not physically exist);
|
||||
each object is consumed by the for statement in turn.
|
||||
.nr rra0 current_figure
|
||||
.para
|
||||
Figure rra0 gives an example of a simple iterator
|
||||
called string_chars, which produces the characters in a string in
|
||||
the order in which they appear.
|
||||
.begin_figure "Use and definition of a simple iterator."
|
||||
.code
|
||||
count_numeric = proc (s: string) returns (int);
|
||||
count: int := 0;
|
||||
for c: char in string_chars (s) do
|
||||
if char_is_numeric (c)
|
||||
then count := count + 1;
|
||||
end;
|
||||
end;
|
||||
return count;
|
||||
end count_numeric;
|
||||
|
||||
string_chars = iter (s: string) yields (char);
|
||||
index: int := 1;
|
||||
limit: int := string$size (s);
|
||||
while index <= limit do
|
||||
yield string$fetch (s, index);
|
||||
index := index + 1;
|
||||
end;
|
||||
end string_chars;
|
||||
.ns
|
||||
.end_code
|
||||
.finish_figure
|
||||
This iterator uses string operations 2size@(s)*,
|
||||
which tells how many characters are in the string s,
|
||||
and 2fetch@(s,@n)*,
|
||||
which returns the n!th character in the string s
|
||||
(provided the integer n is greater than zero
|
||||
and does not exceed the size of the string).
|
||||
.foot
|
||||
A while loop is used in the implementation of
|
||||
string_chars so that the example would be based
|
||||
on familiar concepts. In actual practice, such a
|
||||
loop would be written using a for statement invoking
|
||||
a primitive iterator.
|
||||
.efoot
|
||||
.br
|
||||
.ne 5
|
||||
.para
|
||||
The general form of the CLU for statement is
|
||||
.code
|
||||
for declarations in iterator-invocation do
|
||||
body
|
||||
end;
|
||||
.end_code
|
||||
An example of the use of the for statement
|
||||
occurs in the count_numeric procedure
|
||||
(see Figure rra0),
|
||||
which contains a loop
|
||||
that counts the number of numeric characters in a string.
|
||||
Note that the details of how the characters are obtained
|
||||
from the string are entirely contained
|
||||
in the definition of the iterator.
|
||||
.para
|
||||
Iterators work as follows:
|
||||
A for statement initially invokes an iterator,
|
||||
passing it some arguments.
|
||||
Each time a yield statement is executed in the iterator,
|
||||
the objects yielded
|
||||
.foot
|
||||
Zero or more objects may be yielded,
|
||||
but the number and types of objects yielded each time by an iterator
|
||||
must agree with the number and types of variables in
|
||||
a for statement using the iterator.
|
||||
.efoot
|
||||
are assigned to the variables declared in the for statement
|
||||
(following the reserved word for)
|
||||
in corresponding order, and the body of the for
|
||||
statement is executed.
|
||||
Then the iterator is resumed at the statement
|
||||
following the yield statement,
|
||||
in the same environment as when the objects were yielded.
|
||||
When the iterator terminates, by either an implicit
|
||||
or explicit return, then the invoking for statement
|
||||
terminates. The iteration may also be prematurely
|
||||
terminated by a return in the body of the
|
||||
for statement.
|
||||
.para
|
||||
For example, suppose that string_chars is invoked
|
||||
with the string ``a3''.
|
||||
The first character yielded is `a'.
|
||||
At this point within string_chars, index@=@1 and limit@=@2.
|
||||
Next the body of the for statement is performed.
|
||||
Since the character `a' is not numeric,
|
||||
count remains at 0.
|
||||
Next string_chars is resumed at the statement after the yield
|
||||
statement, and when resumed, index@=@1 and limit@=@2.
|
||||
Then index is assigned 2,
|
||||
and the character `3' is selected from the string and yielded.
|
||||
Since `3' is numeric, count becomes@1.
|
||||
Then string_chars is resumed,
|
||||
with index@=@2 and limit@=@2, and index is incremented,
|
||||
which causes the while loop to terminate.
|
||||
The implicit return terminates both the iterator and the
|
||||
for statement, with control resuming at the statement
|
||||
after the for statement,
|
||||
and count@=@1.
|
||||
.para
|
||||
While iterators are useful in general,
|
||||
they are especially valuable in conjunction with data abstractions
|
||||
that are collections of objects (such as sets, arrays, and
|
||||
sorted_bags).
|
||||
Iterators afford users of such abstractions access to all objects
|
||||
in the collection, without exposing irrelevant details.
|
||||
Several iterators may be included in a data abstraction.
|
||||
When the order of obtaining the objects is important,
|
||||
different iterators may provide different orders.
|
||||
.section "Implementation and Use of Sorted_bag"
|
||||
.para
|
||||
Now we can describe a minimal set of operations
|
||||
for sorted_bag.
|
||||
The operations are create, insert, size, and increasing.
|
||||
2Create*, insert, and size are procedural abstractions
|
||||
that, respectively,
|
||||
create a sorted_bag, insert an item into a sorted_bag,
|
||||
and give the number of items in a sorted_bag.
|
||||
2Increasing* is a control abstraction
|
||||
that produces the items in a sorted_bag in increasing order;
|
||||
each item produced is accompanied by
|
||||
an integer representing the number of times
|
||||
the item appears in the sorted_bag.
|
||||
Note that other operations might also
|
||||
be useful for sorted_bag,
|
||||
for example, an iterator yielding the items
|
||||
in decreasing order.
|
||||
In general, the definer of a data abstraction
|
||||
can provide as many operations as seems reasonable.
|
||||
.para
|
||||
In Figure current_figure, we give an implementation
|
||||
of the sorted_bag abstraction.
|
||||
.begin_figure "The sorted_bag cluster."
|
||||
.code
|
||||
sorted_bag = cluster [t: type] is create, insert, size, increasing
|
||||
where t has equal, lt: proctype (t, t) returns (bool);
|
||||
|
||||
rep = record [contents: tree[t], total: int];
|
||||
|
||||
create = proc () returns (cvt);
|
||||
return rep${contents: tree[t]$create (), total: 0};
|
||||
end create;
|
||||
|
||||
insert = proc (sb: cvt, v: t);
|
||||
sb.contents := tree[t]$insert (sb.contents, v);
|
||||
sb.total := sb.total + 1;
|
||||
end insert;
|
||||
|
||||
size = proc (sb: cvt) returns (int);
|
||||
return sb.total;
|
||||
end size;
|
||||
|
||||
increasing = iter (sb: cvt) yields (t, int);
|
||||
for item: t, count: int
|
||||
in tree[t]$increasing (sb.contents) do
|
||||
yield item, count;
|
||||
end;
|
||||
end increasing;
|
||||
|
||||
end sorted_bag;
|
||||
.ns
|
||||
.end_code
|
||||
.finish_figure
|
||||
It is implemented using a sorted binary tree,
|
||||
just as wordbag was implemented.
|
||||
Thus, a subsidiary abstraction is necessary.
|
||||
This abstraction, called tree, is a generalization
|
||||
of the wordtree abstraction (used in Section example),
|
||||
which has been parameterized to work for all ordered types.
|
||||
An implementation of tree is given in Figure current_figure.
|
||||
Notice that both the tree abstraction and the sorted_bag abstraction
|
||||
place the same constraints on their type parameters.
|
||||
.begin_page_figure "The tree cluster."
|
||||
.code
|
||||
tree = cluster [t: type] is create, insert, increasing
|
||||
where t has equal, lt: proctype (t, t) returns (bool);
|
||||
|
||||
node = record [m!value: t, count: int,
|
||||
(mark!m)lesser: tree[t], greater: tree[t]];
|
||||
rep = oneof [empty: null, non_empty: node];
|
||||
|
||||
create = proc () returns (cvt);
|
||||
return rep$make_empty (nil);
|
||||
end create;
|
||||
|
||||
insert = proc (x: cvt, v: t) returns (cvt);
|
||||
tagcase x
|
||||
tag empty:
|
||||
n: node := node${m!value: v, count: 1,
|
||||
(mark!m)lesser: tree[t]$create (),
|
||||
(mark!m)greater: tree[t]$create ()};
|
||||
return rep$make_non_empty (n);
|
||||
tag non_empty (n: node):
|
||||
if t$equal (v, n.value)
|
||||
then n.count := n.count + 1;
|
||||
elseif t$lt (v, n.value)
|
||||
then n.lesser := tree[t]$insert (n.lesser, v);
|
||||
else n.greater := tree[t]$insert (n.greater, v);
|
||||
end;
|
||||
return x;
|
||||
end;
|
||||
end insert;
|
||||
|
||||
increasing = iter (x: cvt) yields (t, int);
|
||||
tagcase x
|
||||
tag empty: ;
|
||||
tag non_empty (n: node):
|
||||
for item: t, count: int
|
||||
in tree[t]$increasing (n.lesser) do
|
||||
yield item, count;
|
||||
end;
|
||||
yield n.value, n.count;
|
||||
for item: t, count: int
|
||||
in tree[t]$increasing (n.greater) do
|
||||
yield item, count;
|
||||
end;
|
||||
end;
|
||||
end increasing;
|
||||
end tree;
|
||||
.ns
|
||||
.end_code
|
||||
.finish_figure
|
||||
.para
|
||||
An important feature of the 2sorted_bag*
|
||||
and 2tree* clusters
|
||||
is the way that the cluster parameter is used in places
|
||||
where the type string was used in wordbag and wordtree.
|
||||
This usage is especially evident in the implementation of tree.
|
||||
For example, tree has a representation that stores values of
|
||||
type t: the 2value* component of a node
|
||||
must be an object of type t.
|
||||
.para
|
||||
In the insert operation of tree,
|
||||
the lt and equal operations of type t are used.
|
||||
We have used the compound form, e.g. 2t$equal@(v,@n.value)*,
|
||||
to emphasize that the equal operation of t is being used.
|
||||
The short form, 2v@=@n.value*, could have been used instead.
|
||||
.para
|
||||
The increasing iterator of tree works as follows:
|
||||
First it yields all items in the current tree
|
||||
that are less than the item at the top node;
|
||||
the items are obtained by a recursive use of itself,
|
||||
passing the 2lesser* subtree as a parameter.
|
||||
Next it yields the contents of the top node,
|
||||
and then it yields all items in the current tree
|
||||
that are greater than the item at the top node
|
||||
(again by a recursive use of itself).
|
||||
In this way it performs a complete walk over the tree,
|
||||
yielding the values at all nodes, in increasing order.
|
||||
.nr rra1 current_figure
|
||||
.para
|
||||
Finally, we show in Figure rra1 how the original
|
||||
procedure count_words can be implemented in terms of sorted_bag.
|
||||
.begin_figure "The count_words procedure using iterators."
|
||||
.code
|
||||
count_words = proc (i: instream, o: outstream);
|
||||
|
||||
wordbag = sorted_bag[string];
|
||||
|
||||
% create an empty wordbag
|
||||
wb: wordbag := wordbag$create ();
|
||||
|
||||
% scan document, adding each word found to wb
|
||||
for word: string in words (i) do
|
||||
wordbag$insert (wb, word);
|
||||
end;
|
||||
|
||||
% print the wordbag
|
||||
total: int := wordbag$size (wb);
|
||||
for w: string, count: int in wordbag$increasing (wb) do
|
||||
print_word (w, count, total, o);
|
||||
end;
|
||||
end count_words;
|
||||
.ns
|
||||
.end_code
|
||||
.finish_figure
|
||||
.
|
||||
Note that the count_words procedure now uses 2sorted_bag*[string]
|
||||
instead of wordbag.
|
||||
2Sorted_bag*[string] is legitimate, since the type string
|
||||
provides both lt and equal operations.
|
||||
Note that two for statements are used in count_words.
|
||||
The second for statement prints the words
|
||||
in alphabetic order,
|
||||
using the increasing iterator of sorted_bag.
|
||||
.ne 4
|
||||
The first for statement inserts the words into the sorted_bag;
|
||||
it uses an iterator
|
||||
.code
|
||||
words = iter (i: instream) yields (string);
|
||||
dots
|
||||
end words;
|
||||
.widow 2
|
||||
.end_code
|
||||
The definition of words is left as an exercise for the reader.
|
||||
112
doc/clu/clup5.r
Normal file
112
doc/clu/clup5.r
Normal file
@@ -0,0 +1,112 @@
|
||||
.nd chapter 5-1
|
||||
.nd current_figure 11
|
||||
.nd wordbag 3
|
||||
.so clu/clupap.header
|
||||
.sr p 2p*
|
||||
.sr q 2q*
|
||||
.sr x 2x*
|
||||
.sr y 2y*
|
||||
.sr z 2z*
|
||||
.sr a 2a*
|
||||
.sr b 2b*
|
||||
.sr insert 2insert*
|
||||
.chapter "The CLU Library"
|
||||
.para
|
||||
So far, we have shown CLU modules as separate pieces of
|
||||
text, without explaining how they are bound together to form a
|
||||
program. This section describes the CLU library, which plays a
|
||||
central role in supporting inter-module references.
|
||||
.para
|
||||
The CLU library contains information about
|
||||
abstractions. The library supports incremental program
|
||||
development, one abstraction at a time, and, in addition,
|
||||
makes abstractions that are defined during the construction of
|
||||
one program available as a basis for subsequent program development.
|
||||
The information in the library permits the separate
|
||||
compilation of single modules, with complete type-checking
|
||||
of all external references (such as procedure
|
||||
invocations).
|
||||
.para
|
||||
The structure of the library derives from the fundamental
|
||||
distinction between abstractions and implementations.
|
||||
For each abstraction, there is a 2description
|
||||
unit* which contains all system-maintained information
|
||||
about that abstraction. Included in the description unit
|
||||
are zero or more modules that implement the abstraction.
|
||||
.foot
|
||||
Other information that may be stored in the library
|
||||
includes information about relationships among
|
||||
abstractions, as might be expressed in a module
|
||||
interconnection language [DK75,@Thomas].
|
||||
.efoot
|
||||
.para
|
||||
The most important information contained in a description
|
||||
unit is the abstraction's 2interface specification*, which
|
||||
is that information needed to type-check uses of the abstraction.
|
||||
For procedural and control abstractions,
|
||||
this information consists of the number and types of
|
||||
parameters, arguments, and output values, plus any constraints
|
||||
on type parameters (i.e., required operations, as described in
|
||||
Section 4). For data abstractions,
|
||||
it includes the number and types of parameters, constraints on
|
||||
type parameters, and the
|
||||
name and interface specification of each
|
||||
operation.
|
||||
.para
|
||||
An abstraction is entered in the library by
|
||||
submitting the interface specification;
|
||||
no implementations are required.
|
||||
In fact, a module can be compiled before any implementations
|
||||
have been provided for the abstractions that it uses;
|
||||
it is necessary only that interface specifications
|
||||
have been given for those abstractions.
|
||||
Ultimately, there can be many implementations
|
||||
of an abstraction;
|
||||
each implementation is required to satisfy the
|
||||
interface specification of the abstraction.
|
||||
Because all uses and implementations
|
||||
of an abstraction are checked against the interface
|
||||
specification, the actual selection
|
||||
of an implementation can be delayed
|
||||
until just before (or perhaps during) execution.
|
||||
We imagine a process of binding together modules
|
||||
into programs, prior to execution, at which time
|
||||
this selection would be made.
|
||||
.para
|
||||
An important detail of the CLU system is
|
||||
the method by which CLU modules refer to abstractions.
|
||||
To avoid problems of name conflicts that can arise in
|
||||
large systems, the names used by a module to refer to
|
||||
abstractions can be chosen to suit the programmer's
|
||||
convenience.
|
||||
When a module is submitted for
|
||||
compilation, its external references must be bound to
|
||||
description units so that type-checking can be
|
||||
performed. The binding is accomplished by constructing
|
||||
an 2association list*,
|
||||
mapping names to description units, which
|
||||
is passed to the compiler along with the source code when
|
||||
compiling the module.
|
||||
The mapping in the association list is stored by the compiler
|
||||
in the library as part of the module.
|
||||
A similar process is involved in entering interface
|
||||
specifications of abstractions, as these will include
|
||||
references to other (data) abstractions.
|
||||
.para
|
||||
When the compiler type-checks a module,
|
||||
it uses the association list to map the external
|
||||
names in the module to description units, and then uses
|
||||
the interface specifications in those description
|
||||
units to check that the abstractions are used correctly.
|
||||
The type-correctness of the module thus
|
||||
depends upon the binding of names to description units
|
||||
and the interface specifications in those description
|
||||
units, and could be invalidated if changes to the
|
||||
binding or the interface specifications were subsequently
|
||||
made. For this reason, the process of compilation
|
||||
permanently binds a module to the abstractions it
|
||||
uses, and the interface description of an abstraction,
|
||||
'ne 2
|
||||
once defined, is not allowed to change.
|
||||
(Of course, a new description unit can be created
|
||||
to describe a modified abstraction.)
|
||||
77
doc/clu/clup6.r
Normal file
77
doc/clu/clup6.r
Normal file
@@ -0,0 +1,77 @@
|
||||
.nd chapter 6-1
|
||||
.nd current_figure 1
|
||||
.nd wordbag 3
|
||||
.so clu/clupap.header
|
||||
.sr insert 2insert*
|
||||
.
|
||||
.chapter "Implementation"
|
||||
.para
|
||||
This section briefly describes the current implementation of CLU
|
||||
and discusses its efficiency.
|
||||
.para
|
||||
The implementation is based on a decision to represent
|
||||
all CLU objects by 2object descriptors*,
|
||||
which are fixed-size values containing a type code and some
|
||||
type-dependent information.
|
||||
.foot
|
||||
Object descriptors are similar to capabilities [Lam71].
|
||||
.efoot
|
||||
In the case of mutable types, the type-dependent information
|
||||
is a pointer to a separately-allocated
|
||||
area containing the state information. For constant
|
||||
types, the information either directly contains
|
||||
the value (if the value can be encoded in the
|
||||
information field, such as for integers, characters,
|
||||
and booleans) or contains a pointer to separately-allocated
|
||||
space (as for strings).
|
||||
The type codes are used by the garbage collector
|
||||
to determine the physical representation of objects
|
||||
so that the accessible objects can be traced;
|
||||
they are also useful for supporting program debugging.
|
||||
.para
|
||||
The use of fixed-size object descriptors
|
||||
allows variables to be fixed-size cells. Assignment
|
||||
is efficient: the object descriptor resulting
|
||||
from the evaluation of the expression is simply
|
||||
copied into the variable. In addition, a single
|
||||
size for variables facilitates the separate compilation
|
||||
of modules and allows most of the code of a
|
||||
parameterized module to be shared among all instantiations
|
||||
of the module. The actual parameters are made available
|
||||
to this code by means of a small parameter-dependent
|
||||
section, which is initialized prior to execution.
|
||||
.para
|
||||
Procedure invocation is relatively efficient.
|
||||
A single program stack is used,
|
||||
and argument passing is as efficient as assignment.
|
||||
Iterators are a form of coroutine;
|
||||
however, their use is sufficiently constrained
|
||||
that they are implemented using just the program stack.
|
||||
Using an iterator is therefore only slightly more expensive
|
||||
than using a procedure.
|
||||
.para
|
||||
The data abstraction mechanism is not inherently
|
||||
expensive. No execution time type-checking is necessary.
|
||||
Furthermore, the type conversion implied by 1cvt*
|
||||
is merely a change in the view taken of an object's type,
|
||||
and does not require any computation.
|
||||
.para
|
||||
A number of optimization techniques can be
|
||||
applied to a collection of modules, if one is
|
||||
willing to give up the flexibility of separate
|
||||
compilation. The most effective such optimization is
|
||||
the inline substitution of procedure (and iterator) bodies
|
||||
for invocations [Sch76].
|
||||
The use of data abstractions tends to introduce
|
||||
extra levels of procedure invocations that perform little or no
|
||||
computation. As an example, consider the 2wordbag$insert*
|
||||
operation (Figure wordbag), which merely invokes the
|
||||
2wordtree$insert* operation and increments a counter.
|
||||
If data abstractions had not been used, these actions would most
|
||||
likely have been performed directly by the 2count_words*
|
||||
procedure. The 2wordbag$insert* operation is thus
|
||||
a good candidate for being compiled inline.
|
||||
Once inline substitution has been performed, the increase
|
||||
in context will enhance the effectiveness of
|
||||
conventional optimization techniques
|
||||
[All71,@All75,@Atk76].
|
||||
151
doc/clu/clup7.r
Normal file
151
doc/clu/clup7.r
Normal file
@@ -0,0 +1,151 @@
|
||||
.nd chapter 7-1
|
||||
.nd current_figure 5
|
||||
.nd wordbag 3
|
||||
.so clu/clupap.header
|
||||
.sr p 2p*
|
||||
.sr q 2q*
|
||||
.sr x 2x*
|
||||
.sr y 2y*
|
||||
.sr z 2z*
|
||||
.sr a 2a*
|
||||
.sr b 2b*
|
||||
.sr insert 2insert*
|
||||
.chapter "Discussion"
|
||||
.para
|
||||
Our intent in this paper has been to provide an
|
||||
informal introduction to the abstraction mechanisms in CLU.
|
||||
By means of programming examples, we have illustrated the
|
||||
use of data, procedural, and control abstractions, and have
|
||||
shown how CLU modules are used to implement these
|
||||
abstractions. We have not attempted to provide a complete
|
||||
description of CLU, but, in the course of explaining
|
||||
the examples, most features of the language have appeared.
|
||||
One important omission is the CLU exception handling mechanism
|
||||
(which does support abstractions); this mechanism
|
||||
is described in [LCS75].
|
||||
.para
|
||||
In addition to describing constructs
|
||||
that support abstraction, previous sections have
|
||||
covered a number of other topics. We have discussed the
|
||||
semantics of CLU. We have described the organization of the
|
||||
CLU library and discussed how it supports incremental
|
||||
program development and separate
|
||||
compilation and type-checking of modules.
|
||||
Also, we have described our current
|
||||
implementation and discussed its efficiency.
|
||||
.para
|
||||
In designing CLU, our goal was to simplify the task
|
||||
of constructing reliable software that is reasonably easy
|
||||
to understand, modify, and maintain. It seems appropriate,
|
||||
therefore, to conclude this paper with a discussion of how
|
||||
CLU contributes to this goal.
|
||||
.para
|
||||
The quality of any program depends upon the skill of
|
||||
the designer. In CLU programs,
|
||||
this skill is reflected in the choice of abstractions.
|
||||
In a good design, abstractions will be used
|
||||
to simplify the connections between modules and to
|
||||
encapsulate decisions that are likely to change [Par71].
|
||||
Data abstractions are particularly valuable for these purposes.
|
||||
For example, through the use of a data abstraction,
|
||||
modules that share a system data base
|
||||
rely only on its abstract behavior as
|
||||
defined by the data base operations. The connections
|
||||
among these modules are much simpler
|
||||
than would be possible if they shared knowledge
|
||||
of the format of the data base and the relationship
|
||||
among its parts. In addition, the data base abstraction
|
||||
can be reimplemented without affecting the code of the modules
|
||||
that use it.
|
||||
CLU encourages the use of data abstractions,
|
||||
and thus aids the programmer during program design.
|
||||
.para
|
||||
The benefits arising from the use of data
|
||||
abstractions are based on the constraint, inherent in CLU
|
||||
and enforced by the CLU compiler, that only the operations
|
||||
of the abstraction may access the representations of the objects.
|
||||
This constraint ensures that the distinction made in CLU
|
||||
between abstractions and implementations
|
||||
applies to data abstractions as well as to procedural
|
||||
and control abstractions.
|
||||
.para
|
||||
The distinction between abstractions and implementations
|
||||
eases program modification and maintenance.
|
||||
Once it has been determined that an abstraction must be
|
||||
reimplemented, CLU guarantees that the code of
|
||||
all modules using that
|
||||
abstraction will be unaffected by the change.
|
||||
The modules need not be reprogrammed or even recompiled;
|
||||
only the process of
|
||||
selecting the implementation of the abstraction must be
|
||||
redone.
|
||||
The problem of determining what modules must be
|
||||
changed is also simplified, because each module has a
|
||||
well-defined purpose, to implement an abstraction,
|
||||
and no other module can interfere with that purpose.
|
||||
.para
|
||||
Understanding and verification of CLU programs is
|
||||
made easier
|
||||
because the distinction between
|
||||
abstractions and implementations permits this task
|
||||
to be decomposed.
|
||||
One module at a time is studied to determine that it
|
||||
implements its abstraction. This study requires
|
||||
understanding the behavior of the abstractions
|
||||
it uses, but it is not necessary to understand the
|
||||
modules implementing those abstractions. Those
|
||||
modules can be studied separately.
|
||||
.para
|
||||
A promising way to establish the
|
||||
correctness of a program is by means of a mathematical
|
||||
proof. For practical reasons, proofs should be
|
||||
performed (or at least checked) by a verification
|
||||
system, since the process of constructing
|
||||
a proof is tedious and error-prone.
|
||||
Decomposition of the proof is essential for
|
||||
program proving, which is practical only for small
|
||||
programs (like CLU modules). Note that when the CLU
|
||||
compiler does type-checking, it is, in addition
|
||||
to enforcing the constraint that permits the proof
|
||||
to be decomposed, also performing a small part of the
|
||||
actual proof.
|
||||
.para
|
||||
We have included as declarations in CLU just
|
||||
the information that the compiler can check with
|
||||
reasonable efficiency.
|
||||
We believe that the other
|
||||
information required for proofs (specifications and
|
||||
assertions) should be expressed in a separate
|
||||
``specification'' language.
|
||||
The properties of such a language are being
|
||||
studied [Guttag, Lis75, Lis76, Spitzen].
|
||||
We intend eventually to add formal specifications to the
|
||||
CLU system; the library is already organized to
|
||||
accommodate this addition. At that time various
|
||||
specification language processors could be added to
|
||||
the system.
|
||||
.para
|
||||
We believe that the constraints imposed by
|
||||
CLU are essential for practical as well as theoretical
|
||||
reasons. It is true that data abstractions
|
||||
can be used in any language by
|
||||
establishing programming conventions to protect the
|
||||
representations of objects. However, conventions are no
|
||||
substitute for enforced constraints. It is inevitable
|
||||
that the conventions will be violated -- and are likely
|
||||
to be violated just when they are needed most, in
|
||||
implementing, maintaining, and modifying large
|
||||
programs. It is precisely at this time, when the
|
||||
.ne 3
|
||||
programming task becomes very difficult, that a
|
||||
language like CLU will be most valuable and
|
||||
appreciated.
|
||||
.chapter "Acknowledgements"
|
||||
.para
|
||||
The authors gratefully acknowledge the contributions
|
||||
made by members of the CLU design group over the
|
||||
last three years. Several people have made
|
||||
helpful comments about this paper, including
|
||||
Toby Bloom, Dorothy Curtis, Mike Hammer,
|
||||
Eliot Moss, Jerry Saltzer, Bob Scheifler,
|
||||
and the referees.
|
||||
117
doc/clu/clupap.header
Normal file
117
doc/clu/clupap.header
Normal file
@@ -0,0 +1,117 @@
|
||||
.
|
||||
header file for clu paper
|
||||
.
|
||||
.nd narrow 0
|
||||
.nd csg_memo 0
|
||||
.nd started 0
|
||||
.if ~started
|
||||
.dv xgp
|
||||
.fo 0 fonts; 30vr kst
|
||||
.fo 1 fonts; 31vgb kst
|
||||
.fo 2 fonts; 30vri kst
|
||||
.fo 3 fonts; 37vrb kst
|
||||
.fo 4 fonts; 75vbee kst
|
||||
.fo 7 fonts; 18fg kst
|
||||
.tr @
|
||||
.nr fnfont 7
|
||||
.nr chapter_starts_page 0
|
||||
.nr reset_per_page 0
|
||||
.nr both_sides 1
|
||||
.sr list_left_margin 500m
|
||||
.sr list_right_margin 500m
|
||||
.if narrow
|
||||
.nr big_font 1
|
||||
.ls 1
|
||||
.ll 4.25i
|
||||
.pl 14i
|
||||
.sr list_left_margin 200m
|
||||
.sr list_right_margin 200m
|
||||
.ef csg_memo==0
|
||||
.ls 2
|
||||
.sr left_heading Abstraction Mechanisms in CLU
|
||||
.sr right_heading date
|
||||
.ef csg_memo>0
|
||||
.ls 1.5
|
||||
.sr left_heading CSG Memo csg_memo-1
|
||||
.sr right_heading Abstraction Mechanisms in CLU
|
||||
.en
|
||||
.sr figure_name Figure \
|
||||
.sr table_name Table \
|
||||
.nr immediate_figure 0
|
||||
.nr tty_table_of_contents 1
|
||||
.nr verbose 1
|
||||
.so r/r.macros
|
||||
.so clu/clukey.r
|
||||
.nr started 1
|
||||
.if narrow
|
||||
.eq begin_page_figure begin_figure
|
||||
.en
|
||||
.en
|
||||
.
|
||||
number register for section references
|
||||
.
|
||||
.nr introduction 1
|
||||
.nr example 2
|
||||
.nr semantics 3
|
||||
.nr more_abstraction 4
|
||||
.nr library 5
|
||||
.nr implementation 6
|
||||
.nr efficiency 6
|
||||
.nr discussion 7
|
||||
.
|
||||
paragraph macro
|
||||
.
|
||||
.if ls<150
|
||||
.de para
|
||||
.sp
|
||||
.ne 3
|
||||
.ti 5
|
||||
.em
|
||||
.ef
|
||||
.de para
|
||||
.br
|
||||
.ne 3
|
||||
.ti 5
|
||||
.em
|
||||
.en
|
||||
.
|
||||
three dots
|
||||
.
|
||||
.sr dots 1...*
|
||||
.sr th 7th*
|
||||
.
|
||||
code brackets - use like table, end_table
|
||||
.
|
||||
.de code
|
||||
.table 2
|
||||
.if narrow
|
||||
.ta 8 11 14 17 20 23 26 29 32
|
||||
.en
|
||||
.em
|
||||
.
|
||||
.de end_code
|
||||
.rtabs
|
||||
.end_table
|
||||
.em
|
||||
.
|
||||
mark position
|
||||
.
|
||||
.de m
|
||||
.nr mark hpos
|
||||
.em
|
||||
.
|
||||
widow eliminator
|
||||
.
|
||||
.de widow <n>
|
||||
.lbegin
|
||||
.nv n \0
|
||||
.if n<1
|
||||
.nr n 1
|
||||
.end
|
||||
.vx n n!l 2m
|
||||
.if vtrap<n&vpos>0
|
||||
.nr n n-vtrap
|
||||
.ct footer_handler +n!m
|
||||
.end
|
||||
.end
|
||||
.em
|
||||
24
doc/clu/clupap.r
Normal file
24
doc/clu/clupap.r
Normal file
@@ -0,0 +1,24 @@
|
||||
CLU paper
|
||||
.
|
||||
set csg_memo to 0 for paper version
|
||||
set it to csg memo number for csg memo version
|
||||
.
|
||||
set narrow to 1 for map version
|
||||
and insert ;SIZE 14 at the beginning of the XGP file
|
||||
.
|
||||
.nr narrow 0
|
||||
.nr csg_memo 0
|
||||
.
|
||||
.so clu/clupap.header
|
||||
.so clu/clup0.r
|
||||
.so clu/clup1.r
|
||||
.so clu/clup2.r
|
||||
.so clu/clup3.r
|
||||
.so clu/clup4.r
|
||||
.so clu/clup5.r
|
||||
.so clu/clup6.r
|
||||
.so clu/clup7.r
|
||||
.if narrow
|
||||
.ns p
|
||||
.en
|
||||
.insert_refs
|
||||
Reference in New Issue
Block a user