From 74c3facf096cd25b1c6234a056b0d0f3d588cd72 Mon Sep 17 00:00:00 2001 From: Lars Brinkhoff Date: Thu, 26 Aug 2021 13:22:30 +0200 Subject: [PATCH] CSG memo 144: Abstraction Mechanisms in CLU. --- build/timestamps.txt | 11 + doc/clu/clukey.r | 27 ++ doc/clu/clup0.r | 293 +++++++++++++++++++++ doc/clu/clup1.r | 212 +++++++++++++++ doc/clu/clup2.r | 595 ++++++++++++++++++++++++++++++++++++++++++ doc/clu/clup3.r | 181 +++++++++++++ doc/clu/clup4.r | 494 +++++++++++++++++++++++++++++++++++ doc/clu/clup5.r | 112 ++++++++ doc/clu/clup6.r | 77 ++++++ doc/clu/clup7.r | 151 +++++++++++ doc/clu/clupap.header | 117 +++++++++ doc/clu/clupap.r | 24 ++ 12 files changed, 2294 insertions(+) create mode 100644 doc/clu/clukey.r create mode 100644 doc/clu/clup0.r create mode 100644 doc/clu/clup1.r create mode 100644 doc/clu/clup2.r create mode 100644 doc/clu/clup3.r create mode 100644 doc/clu/clup4.r create mode 100644 doc/clu/clup5.r create mode 100644 doc/clu/clup6.r create mode 100644 doc/clu/clup7.r create mode 100644 doc/clu/clupap.header create mode 100644 doc/clu/clupap.r diff --git a/build/timestamps.txt b/build/timestamps.txt index 29c0a32d..fcaab2d4 100644 --- a/build/timestamps.txt +++ b/build/timestamps.txt @@ -156,6 +156,17 @@ clib/-read-.-this- 198002261810.43 clib/tv.128 197908312338.58 clu/clu.order 197711161922.32 clu/action.refman 197806022022.04 +clu/clukey.r 197909041308.12 +clu/clup0.r 197711261712.18 +clu/clup1.r 197711261712.27 +clu/clup2.r 197711261714.24 +clu/clup3.r 197711261712.44 +clu/clup4.r 197711261716.29 +clu/clup5.r 197711261713.02 +clu/clup6.r 197711261713.08 +clu/clup7.r 197711261713.18 +clu/clupap.header 197711261651.18 +clu/clupap.r 197711261651.32 clu/clusym.r 197806271243.01 clu/exampl.refman 197805301747.35 clu/except.refman 197806061946.59 diff --git a/doc/clu/clukey.r b/doc/clu/clukey.r new file mode 100644 index 00000000..5ad79d67 --- /dev/null +++ b/doc/clu/clukey.r @@ -0,0 +1,27 @@ +. + string registers for CLU keywords +. +.de bold +.fr i 0 nargs-1 +.sr \\i 1\\i* +.en +.em +. +.bold any array +.bold begin bool break +.bold cand char cluster continue cor cvt +.bold do down +.bold else elseif end except exit +.bold false for force +.bold has +.bold if in int is iter itertype +.bold nil null +.bold oneof others own +.bold proc proctype +.bold real record rep resignal return returns +.bold sequence signal signals string struct +.bold tag tagcase then true type +.bold up +.bold variant +.bold when where while +.bold yield yields diff --git a/doc/clu/clup0.r b/doc/clu/clup0.r new file mode 100644 index 00000000..ffc6c492 --- /dev/null +++ b/doc/clu/clup0.r @@ -0,0 +1,293 @@ +.nd started 0 +.nr do_refs 0 +.if ~started +.nr do_refs 1 +.en +.so clu/clupap.header +.so r/ref3.rmac +.if csg_memo==0 +.ls 1 +.if narrow +.new_font 1 +.ef +.new_font 3 +.en +.nf c +.vp 2i +Abstraction Mechanisms in CLU +.if narrow +.new_font 0 +.ef +.new_font 1 +.en +.sp .5i +Barbara Liskov +Alan Snyder +Russell Atkinson +Craig Schaffert +.sp .3i +Laboratory for Computer Science +Massachusetts Institute of Technology +545 Technology Square +Cambridge, MA 02139 +.sp 2 +.if ~narrow +.vp 8.5i +.en +.nf l +.fi +.new_font 0 +This research was supported in part by the Advanced Research +Projects Agency of the Department of Defense, monitored by the +Office of Naval Research under contract N00014-75-C-0661, and +in part by the National Science Foundation under grant DCR74-21892. +.ls +.end +.if csg_memo>0 +.ls 1 +.nf c +.new_font 3 +Massachusetts Institute of Technology +Laboratory for Computer Science +.new_font 0 +(formerly Project MAC) +.sp 1.25i +Computation Structures Group Memo csg_memo-1 +.sp 1.25i +.new_font 4 +Abstraction Mechanisms in CLU +.new_font 1 +.sp +by +.sp +Barbara Liskov +Alan Snyder +Russell Atkinson +Craig Schaffert +.new_font 0 +.nf l +.vp 8.25i +.fi +.new_font 0 +This research was supported in part by the Advanced Research +Projects Agency of the Department of Defense, monitored by the +Office of Naval Research under contract N00014-75-C-0661, and +in part by the National Science Foundation under grant DCR74-21892. +.nf c +.sp .5i +January 1977 +.nf l +.fi +.ls +.end +. +. +.if narrow +.sp 2 +.new_font 1 +.ll 7i +.ef +.bp +.rs +.vp 3i +.new_font 3 +.en +ABSTRACT +.new_font 0 +.sp +.ns +.para +CLU is a new programming language designed to support +the use of abstractions in program construction. +Work in programming methodology has led to the realization +that three kinds of abstractions, +procedural, control, and especially data abstractions, +are useful in the programming process. +Of these, only the procedural abstraction +is supported well by conventional languages, +through the procedure or subroutine. +CLU provides, in addition to procedures, +novel linguistic mechanisms that +support the use of data and control abstractions. +.para +This paper provides an introduction to the abstraction mechanisms +in CLU. +By means of programming examples, we illustrate the utility of +the three kinds of abstractions in program construction +and show how CLU programs may be written to use +and implement abstractions. +We also discuss the CLU library, which permits +incremental program development with complete +type-checking performed at compile-time. +.sp +.fi l +Key words and phrases: programming languages, data types, +data abstractions, control abstractions, programming +methodology, separate compilation. +.sp +CR categories: 4.0, 4.12, 4.20, 4.22. +.br +.fi b +.if narrow +.ll +.en +. +.ref All71 +Allen, F. E. and Cocke, J. +A catalogue of optimizing transformations. +Rep. RC 3548, +IBM Thomas J. Watson Research Center, +Yorktown Heights, N.@Y., 1971. +.em +.ref All75 +Allen, F. E. +A program data flow analysis procedure. +Rep. RC 5278, +IBM Thomas J. Watson Research Center, +Yorktown Heights, N.@Y., 1975. +.em +.ref Atk76 +Atkinson, R. R. +Optimization techniques for a structured programming language. +S.M. Thesis, +Dept. of Electrical Engineering and Computer Science, +M.@I.@T., Cambridge, Mass., June 1976. +.em +.ref Dah70 +Dahl, O. J., Myhrhaug, B., and Nygaard, K. +The SIMULA 67 common base language. +Publication S-22, Norwegian Computing Center, Oslo, 1970. +.em +.ref DK75 +DeRemer, F. and Kron, H. +Programming-in-the-large versus programming-in-the-small. +2Proceedings of International Conference on Reliable Software*, +2SIGPLAN Notices 10*, 6 (June 1975), 114-121. +.em +.ref Dij72 +Dijkstra, E. W. +Notes on structured programming. +2Structured Programming, +A.P.I.C. Studies in Data Processing No. 8*, +Academic Press, New York 1972, 1-81. +.em +.ref Guttag +Guttag, J. V., Horowitz, E., and Musser, D. R. +Abstract data types and software validation. +Rep. ISI/RR-76-48, Information Sciences Institute, +University of Southern California, Marina del Rey, +Calif., August 1976. +.em +.ref Hoare72 +Hoare, C. A. R. +Proof of correctness of data representations. +2Acta Informatica*, 4 (1972), 271-281. +.em +.ref Knu73 +Knuth, D. +2The Art of Computer Programming*, vol. 3. +Addison Wesley, Reading, Mass., 1973. +.em +.ref LCS75 +2Laboratory for Computer Science Progress Report 1974-1975*, +Computation Structures Group. +Rep. PR-XII, +Laboratory for Computer Science, M.@I.@T., +to be published. +.em +.ref Lam71 +Lampson, B. W. +Protection. +Proc. Fifth Annual Princeton Conference on Information +Sciences and Systems, Princeton University, 1971, 437-443. +.em +.ref Lis74 +Liskov, B. H. and Zilles, S. N. +Programming with abstract data types. +Proc. ACM SIGPLAN Conference on Very High Level Languages, +2SIGPLAN Notices 9*, 4 (April 1974), 50-59. +.em +.ref Lis75 +Liskov, B. H. and Zilles, S. N. +Specification techniques for data abstractions. +2IEEE Trans. on Software Engineering*, 2SE-1*, +(1975), 7-19. +.em +.ref Lis76 +Liskov, B. H. and Berzins, V. +An appraisal of program specifications. +Computation Structures Group Memo 141, +Laboratory for Computer Science, +M.@I.@T., Cambridge, Mass., July 1976. +.em +.ref McC62 +McCarthy, J., et al. +2LISP 1.5 Programmer's Manual*, MIT Press, 1962. +.em +.ref Mor73 +Morris, J. H. +Protection in programming languages. +2Comm. ACM 16*, 1 (Jan 1973), 15-21. +.em + .ref Mor74 + Morris, J. H. + Toward more flexible type systems. + Proceedings of the Programming Symposium, Paris, April 9-11, 1974, + 2Lecture Notes in Computer Science 19*, Springer-Verlag, New York, + 377-384. + .. +.ref Par71 +Parnas, D. L. +Information distribution aspects of design methodology. +Proc. IFIP 1971. +.em +.ref Sch76 +Scheifler, R. W. +An analysis of inline substitution for the CLU programming language. +Computation Structures Group Memo 139, +Laboratory for Computer Science, +M.@I.@T., Cambridge, Mass., June 1976. +.em +.ref Spitzen +Spitzen, J. and Wegbreit, B. +The verification and synthesis of data structures. +2Acta Informatica*, 4 (1975), 127-144. +.em +.ref Standish +Standish, T. A. +2Data structures: an axiomatic approach*. +Rep. 2639, Bolt Beranek and Newman, Cambridge, +Mass., 1973. +.em +.ref Thomas +Thomas, J. W. +Module interconnection in programming systems supporting +abstraction. +Rep. CS-16, Computer Science Program, Brown University, +Providence, R.@I., 1976. +.em +.ref Wir71a +Wirth, N. +Program development by stepwise refinement. +2Comm. ACM 14*, 4 (1971), 221-227. +.em +.ref Wir71b +Wirth, N. +The programming language PASCAL. +2Acta Informatica*, 1 (1971), 35-63. +.em +.ref Wul84 +Wulf, W. A., London, R., and Shaw, M. +An introduction to the construction and verification +of Alphard programs. +2IEEE Transactions on Software Engineering SE-2*, +(1976), 253-264. +.em +.bp +.if do_refs +.insert_refs +.en +.if narrow +.rs +.sp 3i +.en diff --git a/doc/clu/clup1.r b/doc/clu/clup1.r new file mode 100644 index 00000000..199e5de6 --- /dev/null +++ b/doc/clu/clup1.r @@ -0,0 +1,212 @@ +.so clu/clupap.header +.chapter "Introduction" +.para +The motivation for the design of the CLU programming +language was to provide programmers with a tool that would +enhance their effectiveness in constructing programs of +high quality -- programs that are reliable and reasonably +easy to understand, modify, and maintain. +CLU aids programmers +by providing constructs that support +the use of abstractions in program design and implementation. +.para +The quality of software depends primarily on +the programming methodology in use. +The choice of programming language, however, can have a major impact on +the effectiveness of a methodology. +A methodology can be easy +or difficult to apply in a given language, depending on +how well the language constructs match the +structures that the methodology deems desirable. +The presence of constructs that give a concrete form +for the desired structures makes the methodology more understandable. +In addition, a programming language influences the way that +its users think about programming; +matching a language to a methodology increases the likelihood that +the methodology will be used. +.para +CLU has been designed to support a methodology +(similar to +[Dij72,@Wir71a]) +in which programs are developed by +means of problem decomposition based on the recognition +of abstractions. +A program is constructed in many +stages. +At each stage, the problem to be solved is +how to implement some abstraction (the initial problem +is to implement the abstract behavior required of the +entire program). +The implementation is developed by envisioning a number +of subsidiary abstractions (abstract objects and +operations) that are useful in the problem domain. +Once the behavior of the abstract objects and operations +has been defined, a program can be written to solve the +original problem; in this program, the abstract objects +and operations are used as primitives. +Now the original +problem has been solved, but new problems have arisen, +namely, how to implement the subsidiary abstractions. +Each of these abstractions is +considered in turn as a new problem; its implementation +may introduce further abstractions. +This process +terminates when all the abstractions introduced at various +stages have been implemented or are present in the +programming language in use. +.para +In this methodology, programs are developed +incrementally, one abstraction at a time. +Further, a distinction is made between an abstraction, +which is a kind of behavior, and a program, +or 2module*, which implements that behavior. +An abstraction isolates +use from implementation: an abstraction can be used +without knowledge of its implementation and implemented +without knowledge of its use. +These aspects of the methodology are supported by the +CLU 2library*, which maintains +information about abstractions +and the CLU modules that implement them. +The library permits separate compilation of +modules with complete type-checking at +compile-time. +.para +To make effective use of the +methodology, it is necessary to understand the kinds +of abstractions that are useful in constructing programs. +In studying this question, +we identified an important kind of abstraction, +the data abstraction, that +had been largely neglected in discussions of programming methodology. +.para +A data abstraction [Hoare72,@Lis74,@Standish] +is used to introduce a new +type of data object that is deemed useful +in the domain of the problem being solved. +At the level of use, the programmer is +concerned with the 2behavior* of these data objects, +what kinds of information can be stored in them and +obtained from them. +The programmer is 2not* concerned +with how the data objects are represented in storage, +nor with the algorithms used to store and access +information in them. +In fact, a data abstraction is +often introduced to delay such implementation +decisions until a later stage of design. +.para +The behavior of the data objects is expressed most +naturally in terms of a set of operations that are meaningful +for those objects. +This set will include operations +to create objects, to obtain information from them, +and possibly to modify them. +For example, +push and pop are among the meaningful operations for stacks, +while meaningful operations for integers include the usual +arithmetic operations. +Thus, a data abstraction consists of a +set of objects and a set of operations +characterizing the behavior of the +objects. +.para +If a data abstraction is to be +understandable at an abstract level, +the behavior of the data objects must be +2completely* characterized by the set of operations. +This property is ensured by making the operations the +2only direct means* of creating and manipulating the objects. +One effect of this restriction +is that, when defining an abstraction, +the programmer must be careful to include a +sufficient set of operations, since every action +he wishes to perform on the objects must be +realized in terms of this set. +.para +We have identified the following requirements that must be +satisfied by a language supporting data abstractions: +.ilist 3 +1. A linguistic construct is needed that permits +a data abstraction to be implemented as a unit. +The implementation involves selecting a representation +for the data objects and defining an algorithm for each +operation in terms of that representation. +.next +2. The language must limit access to the +representation to just the operations. This limitation +is necessary to ensure that the operations completely +characterize the behavior of the objects. +.end_list +CLU satisfies these requirements by providing a linguistic construct +called a 2cluster* for implementing data abstractions. +Data abstractions are integrated into the language +through the data type mechanism. +Access to the representation is +controlled by type-checking, which is done at +compile time. +.para +In addition to data abstractions, CLU +supports two other kinds of abstractions: +procedural abstractions and control abstractions. +A procedural abstraction performs a computation on a +set of input objects and produces a set of output objects; +examples of procedural abstractions are sorting an +array and computing a square root. +CLU supports procedural abstractions by means of procedures, +which are similar to procedures in other programming languages. +.para +A control abstraction defines a method +for sequencing arbitrary actions. +All languages provide built-in control abstractions; +examples are the if statement and the while statement. +In addition, however, +CLU allows user definitions of a simple kind of control abstraction. +The method provided is a generalization of the +repetition methods available in many programming +languages. +Frequently the programmer desires to +perform the same action for all the objects in a +collection, such as all +characters in a string or all items in a set. +CLU +provides a linguistic construct called an 2iterator* +for defining how the objects in the +collection are obtained. +The iterator is used in +conjunction with the for statement; the body +of the for statement describes the action to be +taken. +.para +The purpose of this paper is to illustrate +the utility of the three kinds of abstractions +in program construction, +and to provide an informal introduction to CLU. +We do not attempt a complete description of the language; +rather, we concentrate on the constructs that +support abstractions. +The presence of these +constructs constitutes the most important way in +which CLU differs from other languages. +The language closest to CLU is Alphard [Wul84], +which represents a concurrent design effort with goals similar to +our own. +The design of CLU has been influenced by +SIMULA 67 [Dah70], and to a lesser extent by +Pascal [Wir71b] and LISP [McC62]. +.para +In the next section we introduce CLU and, +by means of a programming example, +illustrate the use and implementation +of data abstractions. +Section semantics describes the basic semantics of CLU. +In Section more_abstraction, we discuss +control abstractions and more powerful kinds of +data abstractions. +We present the CLU library in Section library. +Section implementation briefly describes +the current implementation of CLU +and discusses efficiency considerations. +.ne 2 +Finally, we conclude by discussing +the quality of CLU programs. diff --git a/doc/clu/clup2.r b/doc/clu/clup2.r new file mode 100644 index 00000000..c2954170 --- /dev/null +++ b/doc/clu/clup2.r @@ -0,0 +1,595 @@ +.nd chapter 2-1 +.so clu/clupap.header +. + string registers for italic variable names +. +.sr i 2i* +.sr s 2s* +.sr o 2o* +.sr c 2c* +.sr n 2n* +.sr t 2t* +.sr r 2r* +.sr x 2x* +.sr tr 2tr* +.sr w 2w* +.sr wb 2wb* +.sr total 2total* +.sr contents 2contents* +.sr count_words 2count_words* +.sr next_word 2next_word* +.sr wordbag 2wordbag* +.sr wordtree 2wordtree* +.sr wordbags 2wordbags* +.sr wordtrees 2wordtrees* +.sr insert 2insert* +.sr create 2create* +.sr print 2print* +.sr instream 2instream* +.sr instreams 2instreams* +.sr outstream 2outstream* +.sr outstreams 2outstreams* +. +.chapter "An Example of Data Abstraction" +.para +This section introduces the basic data +abstraction mechanism of CLU, the cluster. +By means of an example, we intend to show how +abstractions occur naturally in program design, +and how they are used and implemented in CLU. +In particular, we show how a data abstraction +can be used as structured intermediate storage. +.para +Consider the following problem: +Given some document, we wish to compute, +for each distinct word in the document, +the number of times the word occurs +and its frequency of occurrence as a percentage of the total +number of words. +The document will be +represented as a sequence of characters. +A word is any non-empty sequence of +alphabetic characters. +Adjacent words are +separated by one or more non-alphabetic +characters such as spaces, punctuation, or newline +characters. +In recognizing distinct words, the +difference between upper and lower case letters should +be ignored. +.para +The output is also to be a sequence of characters, +divided into lines. +Successive lines should contain an alphabetical +list of all the distinct words in the document, +one word per line. +Accompanying each word should +be the total number of occurrences and the +.ne 5 +frequency of occurrence. For example: +.table +.ta 8 20 28 + a 2 3.509% + access 1 1.754% + and 2 3.509% + dots +.rtabs +.end_table +.para +Specifically, we are required to write the +procedure count_words, which takes two arguments: +an instream and an outstream. +The former is the +source of the document to be processed, and the latter +is the destination of the required output. +.ne 5 +The form of this procedure will be +.code + count_words = proc (i: instream, o: outstream); + dots + end count_words; +.end_code +Note that count_words does not return any results; +its only effects are modifications of i (reading the entire +document) and of o (printing the required statistics). +.para +2Instream* and outstream are data abstractions. +An instream i contains a sequence of characters. +Of the primitive +operations on instreams, only two will be of interest to us. +2Empty@(i)* returns true if there are no characters available +in i, and returns false otherwise. +2Next@(i)* removes the first character from the sequence +and returns it. +Invoking the next operation on an empty instream is an +error. +.foot +The CLU error handling mechanism is discussed in [LCS75]. +.efoot +An outstream also contains a sequence of characters. +The interesting operation on outstreams is +2put_string@(s,@o)*, +which appends the string s to the existing sequence of characters +in o. +.para +Now consider how we might implement count_words. +We begin by deciding how to handle words. +We could define a new abstract data type 2word*. +However, we choose instead to use strings (a primitive +CLU type), with the restriction that only strings of +lower-case alphabetic characters will be used. +.foot +Sometimes it is difficult to decide whether to introduce +a new data abstraction or to use an existing abstraction. +Our decision to use strings to represent words was made +partly to shorten the presentation. +.efoot +.para +Next, we investigate how to scan the document. +Reading a word requires knowledge of the +exact way in which words occur in the input stream. +We choose to isolate this information in a procedural abstraction, +called next_word, +which takes in the instream i and returns the next word +(converted to lower case characters) in the document. +If there are no more words, +next_word must communicate this fact to count_words. +A simple way to indicate that there are no +more words is by returning an ``end of document'' word, +one that is distinct from any other word. +A reasonable choice for the ``end of document'' word is +the empty string. +.para +It is clear that in count_words we must scan the +entire document before we can print our results, and +therefore, we need some receptacle +to retain information about words between these two +actions (scanning and printing). +Recording the +information gained in the scan and organizing it +for easy printing will probably be fairly complex. +Therefore, we will defer such considerations until later +by introducing a data abstraction wordbag with the +appropriate properties. +In particular, wordbag provides +three operations: create, which creates an empty wordbag; +insert, which adds a word to the wordbag; and print, which +prints the desired statistical information about the words +in the wordbag. +.foot +The print operation is not the ideal choice, but a better +solution requires the use of control abstractions. +This solution is presented in Section more_abstraction. +.efoot +.nr count_words current_figure +.para +The implementation of count_words is shown in +Figure count_words. +.begin_figure "The count_words procedure." +.code +count_words = proc (i: instream, o: outstream); + + % create an empty wordbag + wb: wordbag := wordbag$create (); + + % scan document, adding each word found to wb + w: string := next_word (i); + while w ~= "" do + wordbag$insert (wb, w); + w := next_word (i); + end; + + % print the wordbag + wordbag$print (wb, o); + + end count_words; +.ns +.end_code +.finish_figure +The ``%'' character starts a comment, +which continues to the end of the line. +The ``~'' character stands for boolean negation. +The notation 2variable:@type* is used +in formal argument lists and declarations +to specify the types of variables; +a declaration may be combined with an assignment +specifying the initial value of the variable. +Boldface is used for reserved words, including the +names of primitive CLU types. + CLU does not permit + redefinition of the primitive types; however, + primitive types are used in the same way as abstract + types. +.para +The count_words procedure declares four variables: +i, o, wb, and w. +The first two denote the instream and +outstream that are passed as arguments to count_words. +The third, wb, denotes the wordbag used to hold +the words read so far, +and the fourth, w, the word +currently being processed. +.para +Operations of a data abstraction are named by +a compound form that specifies both the type and +the operation name. Three examples of operation calls +appear in count_words: 2wordbag$create@()*, +2wordbag$insert@(wb,@w)* +and 2wordbag$print@(wb,@o)*. +The CLU system provides a mechanism that avoids conflicts +between names of abstractions; this mechanism is discussed in +Section library. +However, operations of two different data abstractions may have +the same name; +the compound form serves to resolve this ambiguity. +Although the ambiguity could in most cases be resolved by context, +we have found in using CLU that the compound +form enhances the readability of programs. +.nr next_word current_figure +.para +The implementation of next_word is shown in +Figure next_word. +.begin_figure "The next_word procedure." +.code +next_word = proc (i: instream) returns (string); + + c: char := 1' '*; + + % scan for first alphabetic character + while ~alpha (c) do + if instream$empty (i) + then return ""; + end; + c := instream$next (i); + end; + + % accumulate characters in word + w: string := ""; + while alpha (c) do + w := string$append (w, c); + if instream$empty (i) + then return w; + end; + c := instream$next (i); + end; + + return w; % the non-alphabetic character c is lost + + end next_word; +.ns +.end_code +.finish_figure +The 2string$append* operation creates a new string +by appending a character to the characters in the +string argument +(it does 2not* modify the string argument). +Note the use of the instream operations +2next* and 2empty*. +Note also that two additional procedures have been used: +2alpha@(c)*, +which tests whether a character is alphabetic or not, +and 2lower_case@(c)*, +which returns the lower case version of a character. +The implementations of these procedures are not shown in the paper. +.para +Now we must implement the type wordbag. +.ne 5 +The cluster will have the form +.code + wordbag = cluster is create, insert, print; + dots + end wordbag; +.end_code +This form expresses the idea that the data abstraction is a set +of operations as well as a set of objects. +The cluster must +provide a representation for objects of the type wordbag and +an implementation for each of the operations. +We are free to choose from the possible representations the +one best suited to our use of the wordbag cluster. +.para +The representation that we choose should allow +reasonably efficient storage of words and easy printing, +in alphabetic order, of the words and associated statistics. +For efficiency in computing the statistics, maintaining +a count of the total number of words in the document +would be helpful. +Since the total number of words in the document is probably +much larger than the number of distinct words, the +representation of a wordbag should contain only one ``item'' for +each distinct word (along with a multiplicity count), rather +than one ``item'' for each occurrence. +This choice of representation requires that, at +each insertion, we check whether the new word is already +present in the wordbag. +We would like a representation that +allows the search for a matching ``item'' and the insertion of a +not-previously-present ``item'' to be efficient. +A binary tree representation [Knu73] fits our requirements nicely. +.para +Thus the main part of the wordbag representation will +consist of a binary tree. +The binary tree is another data abstraction, +wordtree. The data abstraction wordtree +provides operations very similar to those of wordbag: +2create@()* returns an empty wordtree; +2insert@(tr,@w)* returns a wordtree containing all the +words in the wordtree tr plus the additional word w +(the wordtree tr may be modified in the process); +and 2print@(tr,@n,@o)* prints the contents of the +wordtree tr in alphabetic order on outstream o, along with the +number of occurrences and the frequency (based on a total of +n words). +.nr wordbag current_figure +.para +The implementation of wordbag is given in Figure wordbag. +.begin_figure "The wordbag cluster." +.code +wordbag = cluster is + create, % create an empty bag + insert, % insert an element + print; % print contents of bag + + rep = record [contents: wordtree, total: int]; + +create = proc () returns (cvt); + return rep${contents: wordtree$create (), total: 0}; + end create; + +insert = proc (x: cvt, v: string); + x.contents := wordtree$insert (x.contents, v); + x.total := x.total + 1; + end insert; + +print = proc (x: cvt, o: outstream); + wordtree$print (x.contents, x.total, o); + end print; + +end wordbag; +.ns +.end_code +.finish_figure +Following the header, we find the definition of the +.ne 3 +representation selected for wordbag objects: +.code + rep = record [contents: wordtree, total: int]; +.end_code +The reserved type identifier rep indicates that the type +specification to the right of the equal sign is the representing +type for the cluster. +We have defined the representation of a wordbag object to +consist of two pieces: a wordtree, +as explained above, and an integer, which records the total +number of words in the wordbag. +.para +A CLU record is an object with one or more named +components. +For each component name, there is an operation to select +and an operation to set the corresponding component. +The operation 2get_n@(r)* returns the n component +of the record r (this operation is usually +abbreviated 2r.n*). +The operation 2put_n@(r,@x)* makes x the n component +of the record r (this operation is usually +abbreviated 2r.n@*:=2@x*, +by analogy with the assignment statement). +A new record is created by an expression of the form +type${name1: value1, dots}. +.para +There are two different +types associated with any cluster: the abstract +type being defined (wordbag in this case) and the +representation type (the record). +Outside of the cluster, +type-checking will ensure that a wordbag object will always be +treated as such. +In particular, the ability to convert a wordbag object into its +representation is not provided (unless one of the +wordbag operations does so explicitly). +.para +Inside the cluster, however, it is necessary to view +a wordbag object as being of the representation type, +because the implementations of the +operations are defined in terms of the representation. +This change of viewpoint is signalled by having the +reserved word cvt appear as the type of an +argument (as in the insert and print operations). +1Cvt* may also appear as a return type +(as in the create operation); +here it indicates that a returned object +will be changed into an object of abstract type. +Whether cvt appears as the type of an +argument or as a return type, +it stipulates a ``conversion'' of viewpoint +between the external abstract type and the internal representation type. +1Cvt* can be used only within a cluster, +and conversion can be done only between the single abstract +type being defined and the (single) representation type. +.foot +1Cvt* corresponds to Morris' seal and unseal [Mor73], +except that 1cvt* represents a change in viewpoint only; +no computation is required. +.efoot +.para +The procedures in wordbag are very simple. +2Create* builds a new instance of the rep by use of the +.ne 3 +record constructor +.code + rep${contents: wordtree$create (), total: 0} +.end_code +Here total is initialized to 0, and contents to the +empty wordtree (by calling the create operation of wordtree). +This rep object is converted to a wordbag object as it +is being returned. +2Insert* and print are implemented directly +in terms of wordtree operations. +.nr wordtree current_figure +.para +The implementation of wordtree is shown in Figure wordtree. +In the wordtree representation, each node +contains a word and the number of times that word has been +inserted into the wordbag, as well as two subtrees. +.begin_page_figure "The wordtree cluster." +.code +wordtree = cluster is + create, % create empty contents + insert, % add item to contents + print; % print contents + + node = record [m!value: string, count: int, +(mark!m)lesser: wordtree, greater: wordtree]; + rep = oneof [empty: null, non_empty: node]; + +create = proc () returns (cvt); + return rep$make_empty (nil); + end create; + +insert = proc (x: cvt, v: string) returns (cvt); + tagcase x + tag empty: + n: node := node${m!value: v, count: 1, +(mark!m)lesser: wordtree$create (), +(mark!m)greater: wordtree$create ()}; + return rep$make_non_empty (n); + tag non_empty (n: node): + if v = n.value + then n.count := n.count + 1; + elseif v < n.value + then n.lesser := wordtree$insert (n.lesser, v); + else n.greater := wordtree$insert (n.greater, v); + end; + return x; + end; + end insert; + +print = proc (x: cvt, total: int, o: outstream); + tagcase x + tag empty: ; + tag non_empty (n: node): + wordtree$print (n.lesser, total, o); + print_word (n.value, n.count, total, o); + wordtree$print (n.greater, total, o); + end; + end print; + +end wordtree; +.ns +.end_code +.finish_figure +For any +particular node, the words in the ``lesser'' subtree must +alphabetically precede the word in the node, and the words +in the ``greater'' subtree must follow the word in the node. +.ne 4 +This information is described by +.code + node = record [m!value: string, count: int, +(mark!m)lesser: wordtree, greater: wordtree]; +.end_code +which defines ``node'' to be an +abbreviation for the information following +the equal sign. +(The reserved word rep is used similarly, +as an abbreviation for the representation type.) +.para +Now consider the representation of wordtrees. +A non-empty wordtree can be represented by its top node. +An empty wordtree, however, contains no information. +The ideal type to represent an empty wordtree +is the CLU type null, +which has a single data object nil. +So the representation of a wordtree should +be either a node or nil. +.ne 3 +This representation is expressed by +.code + rep = oneof [empty: null, non_empty: node]; +.end_code +.para +Just as the record is the basic CLU +mechanism to form an object +that is a collection of other objects, +the oneof is the basic CLU mechanism to form an object +that is ``one of'' a set of alternatives. +Oneof is CLU's method of forming a +discriminated union, and is somewhat similar to +a variant component of a record in Pascal [Wir71b]. +.para +An object of the type oneof@[s1:@T1 dots sn:@Tn] +can be thought of as a pair. +The ``tag'' component is an +identifier from the set {s1 dots sn}. +The ``value'' +component is an object of the type corresponding to the +tag. +That is, if the tag component is si then the +value is some object of type Ti. +.para +Objects of type oneof@[s1:@T1 dots sn:@Tn] +are created by the operations 2make_si@(x)*, each of +which takes an object x of type Ti +and returns the pair . +Because the type of the value component of a oneof object is not +known at compile-time, allowing direct access +to the value component +could result in a run-time type error (e.g., assigning an object +to a variable of the wrong type). To eliminate this possibility, +.ne 7 +we require the use of a special tagcase statement to decompose +a oneof object: +.code + tagcase e + tag s1 (id1: T1): @@@m!statements dots + dots + tag sn (idn: Tn):(mark!m)statements dots + end; +.end_code +This statement evaluates the expression 2e* +to obtain an object of type +oneof@[s1:@T1@dots@sn:@Tn]. +If the tag is si, +then the value is assigned to the new variable +idi and the statements following the ith alternative +are executed. +The variable idi is local to those statements. +If, for some reason, we do not need the value, +we can omit the parenthesized variable declaration. +.para +The reader should now know enough to understand +Figure wordtree. +Note, in the create operation, the use +of the construction operation 2make_empty* +of the representation type of wordtree +(the discriminated union oneof@[empty:@null,@non-empty:@node]) +to create the empty wordtree. +The tagcase statement is used in both insert and print. +Note that if insert is given an empty wordtree, it creates a +new top node for the returned value, +but if insert is given a non-empty wordtree, +it modifies the given wordtree and returns it. +.foot +It is necessary for insert to return a value in addition to +having a side-effect because, in the case of an empty wordtree +argument, side-effects are not possible. Side-effects are not +possible because of the representation chosen for the empty +wordtree and because of the CLU parameter passing mechanism +(see Section semantics). +.efoot +The insert operation depends on the dynamic +allocation of space for newly-created records (see +Section semantics). +.para +The print operation uses the obvious recursive descent. +It makes use of procedure +2print_word@(w,@c,@t,@o)*, which generates a single line of +output on 2o*, consisting of the word 2w*, +the count 2c*, and the frequency of occurrence +derived from 2c* and 2t*. +The implementation of 2print_word* has been omitted. +.para +We have now completed +our first discussion of the count_words procedure. +We return to this problem in Section more_abstraction, +where we present a superior solution. diff --git a/doc/clu/clup3.r b/doc/clu/clup3.r new file mode 100644 index 00000000..f7874a3c --- /dev/null +++ b/doc/clu/clup3.r @@ -0,0 +1,181 @@ +.nd chapter 3-1 +.nd current_figure 5 +.nd wordbag 3 +.so clu/clupap.header +.sr m 2m* +.sr p 2p* +.sr q 2q* +.sr x 2x* +.sr y 2y* +.sr z 2z* +.sr a 2a* +.sr b 2b* +.sr insert 2insert* +.sr increment 2increment* +.chapter "Semantics" +.para +All languages present their users with some model of computation. +This section describes those aspects of CLU semantics that differ +from the common ALGOL-like model. +In particular, we discuss +CLU's notions of objects and variables, +and the definitions of assignment and argument passing that +follow from these notions. +We also discuss type correctness. +.section "Objects and Variables" +.para +The basic elements of CLU semantics are +2objects* and 2variables*. +Objects are the data entities that are created and manipulated +by CLU programs. +Variables are just the names used in a +program to refer to objects. +.para +In CLU, each object has a particular 2type*, +which characterizes its behavior. +A type defines a set of operations +that create and manipulate objects of that type. +An object +may be created and manipulated only via the operations of its type. +.para +An object may 2refer* to objects. +For example, +a record object refers to the objects that are the components +of the record. +This notion is one of logical, not physical, containment. +In particular, it is possible for two distinct record objects to +refer to (or 2share*) the same component object. +In the case of a cyclic structure, it is even possible for an object +to ``contain'' itself. +Thus, it is possible to have recursive data +structure definitions and shared data objects without explicit +reference types. +The 2wordtree* type described in the previous +section is an example of a recursively-defined data structure. +(This notion of object is similar to that in LISP.) +.para +CLU objects exist independently of procedure activations. +Space for objects is allocated from a dynamic storage area +as the result of invoking +constructor operations of certain primitive CLU types. +For example, +the record constructor is used in the implementation of 2wordbag* +(Figure wordbag) to acquire space for new 2wordbag* objects. +In theory, all objects continue to exist forever. +In practice, +the space used by an object may be reclaimed when that object is +no longer accessible to any CLU program. +.foot +An object is accessible if it is denoted by a variable of an active +procedure or is a component of an accessible object. +.efoot +.para +An object may exhibit time-varying behavior. +Such an object, called a 2mutable* object, +has a state which may be modified by certain operations +without changing the identity of the object. +Records are examples of mutable objects. +The record update operations (2put_s (r,@v)*, +written as 2r*.2s*@:=@2v* +in the examples) change the state of record objects and +therefore affect the behavior of subsequent applications of +the select operations (2get_s (r)*, written as 2r*.2s*). +The 2wordbag* and 2wordtree* types are additional examples +of types with mutable objects. +.para +If a mutable object m is shared by two other objects x and y, +then a modification to m made via x will be visible when m is +examined via y. +Communication through +shared mutable objects is most beneficial in the context +of procedure invocation, described below. +.para +Objects that do not exhibit time-varying behavior are called +2immutable* objects, or 2constants*. +Examples of constants are integers, booleans, +characters, and strings. +The value of a constant object can not be modified. +For example, +new strings may be computed from old ones, +but existing strings do not change. +Similarly, +none of the integer operations +modify the integers passed to them as arguments. +.para +Variables are names used in CLU programs to 2denote* +particular objects at execution time. +Unlike variables in many common programming languages, +which 2are* objects that 2contain* values, +CLU variables are simply names +that the programmer uses to refer to objects. +As such, it is possible for two variables to denote +(or 2share*) the same object. +CLU variables are much like those in LISP, +and are similar to pointer variables in other languages. +However, CLU variables are 2not* objects; +they cannot be denoted by other variables or referred to by objects. +Thus, variables are completely private to the procedure +in which they are declared, +and cannot be accessed or modified by any other procedure. +.section "Assignment and Procedure Invocation" +.para +The basic actions in CLU are 2assignment* and +2procedure invocation*. +The assignment primitive 2x*@:=@2E*, where x is a variable +and 2E* is an expression, causes x to denote +the object resulting from the evaluation of 2E*. +For example, +if 2E* is a simple variable y, then the assignment x@:=@y +causes x to denote the object denoted by y. +The object +is 2not* copied; after the assignment is performed, it will be +2shared* by x and y. +Assignment does not affect +the state of any object. +(Recall that 2r*.2s*@:=@2v* is not a true assignment, +but an abbreviation for 2put_s@(r,@v)*.) +.para +Procedure invocation involves passing argument objects +from the caller to the called procedure and returning result +objects from the procedure to the caller. +The formal arguments +of a procedure are considered to be local variables of the procedure, +and are initialized, by assignment, to the objects resulting from the +evaluation of the argument expressions. Thus, argument +objects are shared between the caller and the called procedure. +A procedure may modify mutable argument objects (e.g., records), +but of course it cannot modify immutable ones (e.g., integers). +A procedure has no access to the variables of its caller. +.para +Procedure invocations may be +used directly as statements; those +that return objects may also be used as expressions. +Arbitrary recursive procedures are permitted. +.ne 5 +.section "Type Correctness" +.para +Every variable in a CLU module must be declared; +the declaration specifies the type of object +that the variable may denote. +All assignments to a variable must satisfy +the variable's declaration. +Because argument passing is defined +in terms of assignment, the types of actual +argument objects must be consistent with the declarations of the +corresponding formal arguments. +.para +These restrictions, plus the restriction that only the code +in a cluster may use cvt to convert between the abstract +and representation types, ensure that the behavior of an object +is indeed characterized completely by the operations of its type. +For example, the type restrictions ensure that +the only modification possible to a record object that represents +a 2wordbag* (Figure wordbag) is the modification performed by +the insert operation. +.para +Type-checking is performed on a module-by-module basis +at compile-time (it could also be done at run-time). +This checking can catch all type errors -- even those involving +inter-module references -- because the CLU library maintains the +necessary type information for all modules +(see Section 5.) diff --git a/doc/clu/clup4.r b/doc/clu/clup4.r new file mode 100644 index 00000000..fd508428 --- /dev/null +++ b/doc/clu/clup4.r @@ -0,0 +1,494 @@ +.nd chapter 4-1 +.nd current_figure 7 +.so clu/clupap.header +. +.sr words 2words* +.sr wordbag 2wordbag* +.sr sorted_bag 2sorted_bag* +.sr sorted_bags 2sorted_bags* +.sr wordtree 2wordtree* +.sr tree 2tree* +.sr node 2node* +.sr r 2r* +.sr x 2x* +.sr t 2t* +.sr count_words 2count_words* +.sr count_numeric 2count_numeric* +.sr lt 2lt* +.sr equal 2equal* +.sr print 2print* +.sr string_chars 2string_chars* +.sr create 2create* +.sr insert 2insert* +.sr size 2size* +.sr increasing 2increasing* +.sr s 2s* +.sr n 2n* +.sr index 2index* +.sr limit 2limit* +.sr count 2count* +.sr next_word 2next_word* +.sr elements 2elements* +.sr reverse_elements 2reverse_elements* +. +. +.chapter "More Abstraction Mechanisms" +.para +In this section we continue our discussion of +abstraction mechanisms in CLU. +A generalization of the 2wordbag* abstraction, +called 2sorted_bag*, +is presented as an illustration of parameterized clusters, +which are a means for implementing +more generally applicable data abstractions. +The presentation of 2sorted_bag* +is also used to motivate the introduction of a control +abstraction called an 2iterator*, +which is a mechanism for incrementally generating +the elements of a collection of objects. +Finally, we show an implementation of the sorted_bag +abstraction and illustrate how sorted_bag +can be used in implementing count_words. +.section "Properties of the Sorted_bag Abstraction" +.para +In the count_words procedure given earlier, +a data abstraction called wordbag was used. +A wordbag object is a collection of strings, +each with an associated count. +Strings are inserted into a wordbag object one at a time. +Strings in a wordbag object may be printed in alphabetical order, +each with a count of the number of times it was inserted. +.para +Although wordbag has properties that are specific to the usage +in count_words, +it also has properties in common with a more general abstraction, +sorted_bag. +A bag is similar to a set +(it is sometimes called a multi-set) +except that an item can appear in a bag many times. +For example, if the integer 1 is inserted in the set {1,2}, +the result is the set {1,2}, +but if 1 is inserted in the bag {1,2}, +the result is the bag {1,1,2}. +A sorted_bag is a bag that affords access +to the items it contains +according to an ordering relation on the items. +.para +The concept of a sorted_bag is meaningful not only for strings +but for many types of items. +Therefore, we would like to parameterize the sorted_bag abstraction, +the parameter being the type of item to be collected +in the sorted_bag objects. +.para +Most programming languages provide built-in parameterized +data abstractions. +For example, the concept of an array is a parameterized +data abstraction. +.ne 3 +An example of a use of arrays in Pascal is +.code + 1array* 1..n 1of* 1integer* +.end_code +These arrays have two parameters, +one specifying the array bounds (1..n) +and one specifying the type of element in the array (integer). +In CLU we provide mechanisms allowing user-defined +data abstractions (like sorted_bag) to be parameterized. +.para +In the sorted_bag abstraction, +not all types of items make sense. +Only types that define a total ordering on their objects +are meaningful, +since the sorted_bag abstraction depends on the presence +of this ordering. +In addition, information about the ordering must be +expressed in a way that is useful for programming. +A natural way to express this information +is by means of operations of the item type. +Therefore, we require that the item type provide +less than and equal operations +(called lt and equal). +.ne 5 +This constraint is expressed in the header for sorted_bag: +.code + sorted_bag = cluster [t: type] is create, insert, dots + where t has + lt, equal: proctype (t, t) returns (bool); +.end_code +The item type t is a 2formal parameter* of the sorted_bag +cluster; whenever the sorted_bag abstraction is used, +.ne 3 +the item type must be specified as an 2actual parameter*, e.g., +.code + sorted_bag[string] +.end_code +.para +The information about required operations +informs the programmer about legitimate uses of sorted_bag. +The compiler will check each use of sorted_bag to ensure +that the item type provides the required operations. +The where clause specifies exactly the information +that the compiler can check. +Of course, more is assumed about the item type 2t* +than the presence of +operations with appropriate names and functionalities: +these operations must also define a total ordering on the items. +Although we expect formal and complete specifications +for data abstractions to be included in the CLU library eventually, +we do not include in the CLU language declarations +that the compiler cannot check. +This point is discussed further in Section discussion. +.para +Now that we have decided to define a +sorted_bag abstraction that works for many item types, +we must decide what operations this abstraction provides. +When an abstraction (like wordbag) +is written for a very specific purpose, +it is reasonable to have +some specialized operations. +For a more general abstraction, +the operations should be more generally useful. +.para +The 2print* operation is a case in point. +Printing is only one possible use of the information contained +in a 2sorted_bag*. +It was the only use in the case of 2wordbag*, +so it was reasonable to have a 2print* operation. +However, if 2sorted_bags* are to be generally useful, +there should be some way for the user to obtain +the elements of the 2sorted_bag*; the user can then +perform some action on the elements (for example, print them). +.para +What we would like is an operation on sorted_bags +that makes all of the elements available to the caller +in increasing order. +One possible approach is to map +the elements of a sorted_bag +into a sequence object, +a solution potentially requiring a large amount of space. +A more efficient method is provided by CLU and is discussed below. +This solution computes the sequence +one element at a time, thus saving space. +If only part of the sequence is used +(as in a search for some element), +then execution time can be saved as well. +.section "Control Abstractions" +.para +The purpose of many loops is to perform an action +on some or all of the objects in a collection. +For such loops, +it is often useful to separate the +selection of the next object +from the action performed on that object. +CLU provides a control abstraction that permits +a complete decomposition of the two activities. +The for statement available in many programming languages +provides a limited ability in this direction: +it iterates over ranges of integers. +The CLU for statement +can iterate over collections of any +type of object. +The selection of the next object in the collection +is done by a user-defined 2iterator*. +The iterator +produces the objects in the collection one at a time +(the entire collection need not physically exist); +each object is consumed by the for statement in turn. +.nr rra0 current_figure +.para +Figure rra0 gives an example of a simple iterator +called string_chars, which produces the characters in a string in +the order in which they appear. +.begin_figure "Use and definition of a simple iterator." +.code +count_numeric = proc (s: string) returns (int); + count: int := 0; + for c: char in string_chars (s) do + if char_is_numeric (c) + then count := count + 1; + end; + end; + return count; + end count_numeric; + +string_chars = iter (s: string) yields (char); + index: int := 1; + limit: int := string$size (s); + while index <= limit do + yield string$fetch (s, index); + index := index + 1; + end; + end string_chars; +.ns +.end_code +.finish_figure +This iterator uses string operations 2size@(s)*, +which tells how many characters are in the string s, +and 2fetch@(s,@n)*, +which returns the n!th character in the string s +(provided the integer n is greater than zero +and does not exceed the size of the string). +.foot +A while loop is used in the implementation of +string_chars so that the example would be based +on familiar concepts. In actual practice, such a +loop would be written using a for statement invoking +a primitive iterator. +.efoot +.br +.ne 5 +.para +The general form of the CLU for statement is +.code + for declarations in iterator-invocation do + body + end; +.end_code +An example of the use of the for statement +occurs in the count_numeric procedure +(see Figure rra0), +which contains a loop +that counts the number of numeric characters in a string. +Note that the details of how the characters are obtained +from the string are entirely contained +in the definition of the iterator. +.para +Iterators work as follows: +A for statement initially invokes an iterator, +passing it some arguments. +Each time a yield statement is executed in the iterator, +the objects yielded +.foot +Zero or more objects may be yielded, +but the number and types of objects yielded each time by an iterator +must agree with the number and types of variables in +a for statement using the iterator. +.efoot +are assigned to the variables declared in the for statement +(following the reserved word for) +in corresponding order, and the body of the for +statement is executed. +Then the iterator is resumed at the statement +following the yield statement, +in the same environment as when the objects were yielded. +When the iterator terminates, by either an implicit +or explicit return, then the invoking for statement +terminates. The iteration may also be prematurely +terminated by a return in the body of the +for statement. +.para +For example, suppose that string_chars is invoked +with the string ``a3''. +The first character yielded is `a'. +At this point within string_chars, index@=@1 and limit@=@2. +Next the body of the for statement is performed. +Since the character `a' is not numeric, +count remains at 0. +Next string_chars is resumed at the statement after the yield +statement, and when resumed, index@=@1 and limit@=@2. +Then index is assigned 2, +and the character `3' is selected from the string and yielded. +Since `3' is numeric, count becomes@1. +Then string_chars is resumed, +with index@=@2 and limit@=@2, and index is incremented, +which causes the while loop to terminate. +The implicit return terminates both the iterator and the +for statement, with control resuming at the statement +after the for statement, +and count@=@1. +.para +While iterators are useful in general, +they are especially valuable in conjunction with data abstractions +that are collections of objects (such as sets, arrays, and +sorted_bags). +Iterators afford users of such abstractions access to all objects +in the collection, without exposing irrelevant details. +Several iterators may be included in a data abstraction. +When the order of obtaining the objects is important, +different iterators may provide different orders. +.section "Implementation and Use of Sorted_bag" +.para +Now we can describe a minimal set of operations +for sorted_bag. +The operations are create, insert, size, and increasing. +2Create*, insert, and size are procedural abstractions +that, respectively, +create a sorted_bag, insert an item into a sorted_bag, +and give the number of items in a sorted_bag. +2Increasing* is a control abstraction +that produces the items in a sorted_bag in increasing order; +each item produced is accompanied by +an integer representing the number of times +the item appears in the sorted_bag. +Note that other operations might also +be useful for sorted_bag, +for example, an iterator yielding the items +in decreasing order. +In general, the definer of a data abstraction +can provide as many operations as seems reasonable. +.para +In Figure current_figure, we give an implementation +of the sorted_bag abstraction. +.begin_figure "The sorted_bag cluster." +.code +sorted_bag = cluster [t: type] is create, insert, size, increasing + where t has equal, lt: proctype (t, t) returns (bool); + + rep = record [contents: tree[t], total: int]; + +create = proc () returns (cvt); + return rep${contents: tree[t]$create (), total: 0}; + end create; + +insert = proc (sb: cvt, v: t); + sb.contents := tree[t]$insert (sb.contents, v); + sb.total := sb.total + 1; + end insert; + +size = proc (sb: cvt) returns (int); + return sb.total; + end size; + +increasing = iter (sb: cvt) yields (t, int); + for item: t, count: int + in tree[t]$increasing (sb.contents) do + yield item, count; + end; + end increasing; + +end sorted_bag; +.ns +.end_code +.finish_figure +It is implemented using a sorted binary tree, +just as wordbag was implemented. +Thus, a subsidiary abstraction is necessary. +This abstraction, called tree, is a generalization +of the wordtree abstraction (used in Section example), +which has been parameterized to work for all ordered types. +An implementation of tree is given in Figure current_figure. +Notice that both the tree abstraction and the sorted_bag abstraction +place the same constraints on their type parameters. +.begin_page_figure "The tree cluster." +.code +tree = cluster [t: type] is create, insert, increasing + where t has equal, lt: proctype (t, t) returns (bool); + + node = record [m!value: t, count: int, +(mark!m)lesser: tree[t], greater: tree[t]]; + rep = oneof [empty: null, non_empty: node]; + +create = proc () returns (cvt); + return rep$make_empty (nil); + end create; + +insert = proc (x: cvt, v: t) returns (cvt); + tagcase x + tag empty: + n: node := node${m!value: v, count: 1, +(mark!m)lesser: tree[t]$create (), +(mark!m)greater: tree[t]$create ()}; + return rep$make_non_empty (n); + tag non_empty (n: node): + if t$equal (v, n.value) + then n.count := n.count + 1; + elseif t$lt (v, n.value) + then n.lesser := tree[t]$insert (n.lesser, v); + else n.greater := tree[t]$insert (n.greater, v); + end; + return x; + end; + end insert; + +increasing = iter (x: cvt) yields (t, int); + tagcase x + tag empty: ; + tag non_empty (n: node): + for item: t, count: int + in tree[t]$increasing (n.lesser) do + yield item, count; + end; + yield n.value, n.count; + for item: t, count: int + in tree[t]$increasing (n.greater) do + yield item, count; + end; + end; + end increasing; +end tree; +.ns +.end_code +.finish_figure +.para +An important feature of the 2sorted_bag* +and 2tree* clusters +is the way that the cluster parameter is used in places +where the type string was used in wordbag and wordtree. +This usage is especially evident in the implementation of tree. +For example, tree has a representation that stores values of +type t: the 2value* component of a node +must be an object of type t. +.para +In the insert operation of tree, +the lt and equal operations of type t are used. +We have used the compound form, e.g. 2t$equal@(v,@n.value)*, +to emphasize that the equal operation of t is being used. +The short form, 2v@=@n.value*, could have been used instead. +.para +The increasing iterator of tree works as follows: +First it yields all items in the current tree +that are less than the item at the top node; +the items are obtained by a recursive use of itself, +passing the 2lesser* subtree as a parameter. +Next it yields the contents of the top node, +and then it yields all items in the current tree +that are greater than the item at the top node +(again by a recursive use of itself). +In this way it performs a complete walk over the tree, +yielding the values at all nodes, in increasing order. +.nr rra1 current_figure +.para +Finally, we show in Figure rra1 how the original +procedure count_words can be implemented in terms of sorted_bag. +.begin_figure "The count_words procedure using iterators." +.code +count_words = proc (i: instream, o: outstream); + + wordbag = sorted_bag[string]; + + % create an empty wordbag + wb: wordbag := wordbag$create (); + + % scan document, adding each word found to wb + for word: string in words (i) do + wordbag$insert (wb, word); + end; + + % print the wordbag + total: int := wordbag$size (wb); + for w: string, count: int in wordbag$increasing (wb) do + print_word (w, count, total, o); + end; + end count_words; +.ns +.end_code +.finish_figure +. +Note that the count_words procedure now uses 2sorted_bag*[string] +instead of wordbag. +2Sorted_bag*[string] is legitimate, since the type string +provides both lt and equal operations. +Note that two for statements are used in count_words. +The second for statement prints the words +in alphabetic order, +using the increasing iterator of sorted_bag. +.ne 4 +The first for statement inserts the words into the sorted_bag; +it uses an iterator +.code + words = iter (i: instream) yields (string); + dots + end words; +.widow 2 +.end_code +The definition of words is left as an exercise for the reader. diff --git a/doc/clu/clup5.r b/doc/clu/clup5.r new file mode 100644 index 00000000..4a11e3b3 --- /dev/null +++ b/doc/clu/clup5.r @@ -0,0 +1,112 @@ +.nd chapter 5-1 +.nd current_figure 11 +.nd wordbag 3 +.so clu/clupap.header +.sr p 2p* +.sr q 2q* +.sr x 2x* +.sr y 2y* +.sr z 2z* +.sr a 2a* +.sr b 2b* +.sr insert 2insert* +.chapter "The CLU Library" +.para +So far, we have shown CLU modules as separate pieces of +text, without explaining how they are bound together to form a +program. This section describes the CLU library, which plays a +central role in supporting inter-module references. +.para +The CLU library contains information about +abstractions. The library supports incremental program +development, one abstraction at a time, and, in addition, +makes abstractions that are defined during the construction of +one program available as a basis for subsequent program development. +The information in the library permits the separate +compilation of single modules, with complete type-checking +of all external references (such as procedure +invocations). +.para +The structure of the library derives from the fundamental +distinction between abstractions and implementations. +For each abstraction, there is a 2description +unit* which contains all system-maintained information +about that abstraction. Included in the description unit +are zero or more modules that implement the abstraction. +.foot +Other information that may be stored in the library +includes information about relationships among +abstractions, as might be expressed in a module +interconnection language [DK75,@Thomas]. +.efoot +.para +The most important information contained in a description +unit is the abstraction's 2interface specification*, which +is that information needed to type-check uses of the abstraction. +For procedural and control abstractions, +this information consists of the number and types of +parameters, arguments, and output values, plus any constraints +on type parameters (i.e., required operations, as described in +Section 4). For data abstractions, +it includes the number and types of parameters, constraints on +type parameters, and the +name and interface specification of each +operation. +.para +An abstraction is entered in the library by +submitting the interface specification; +no implementations are required. +In fact, a module can be compiled before any implementations +have been provided for the abstractions that it uses; +it is necessary only that interface specifications +have been given for those abstractions. +Ultimately, there can be many implementations +of an abstraction; +each implementation is required to satisfy the +interface specification of the abstraction. +Because all uses and implementations +of an abstraction are checked against the interface +specification, the actual selection +of an implementation can be delayed +until just before (or perhaps during) execution. +We imagine a process of binding together modules +into programs, prior to execution, at which time +this selection would be made. +.para +An important detail of the CLU system is +the method by which CLU modules refer to abstractions. +To avoid problems of name conflicts that can arise in +large systems, the names used by a module to refer to +abstractions can be chosen to suit the programmer's +convenience. +When a module is submitted for +compilation, its external references must be bound to +description units so that type-checking can be +performed. The binding is accomplished by constructing +an 2association list*, +mapping names to description units, which +is passed to the compiler along with the source code when +compiling the module. +The mapping in the association list is stored by the compiler +in the library as part of the module. +A similar process is involved in entering interface +specifications of abstractions, as these will include +references to other (data) abstractions. +.para +When the compiler type-checks a module, +it uses the association list to map the external +names in the module to description units, and then uses +the interface specifications in those description +units to check that the abstractions are used correctly. +The type-correctness of the module thus +depends upon the binding of names to description units +and the interface specifications in those description +units, and could be invalidated if changes to the +binding or the interface specifications were subsequently +made. For this reason, the process of compilation +permanently binds a module to the abstractions it +uses, and the interface description of an abstraction, +'ne 2 +once defined, is not allowed to change. +(Of course, a new description unit can be created +to describe a modified abstraction.) diff --git a/doc/clu/clup6.r b/doc/clu/clup6.r new file mode 100644 index 00000000..db7a32af --- /dev/null +++ b/doc/clu/clup6.r @@ -0,0 +1,77 @@ +.nd chapter 6-1 +.nd current_figure 1 +.nd wordbag 3 +.so clu/clupap.header +.sr insert 2insert* +. +.chapter "Implementation" +.para +This section briefly describes the current implementation of CLU +and discusses its efficiency. +.para +The implementation is based on a decision to represent +all CLU objects by 2object descriptors*, +which are fixed-size values containing a type code and some +type-dependent information. +.foot +Object descriptors are similar to capabilities [Lam71]. +.efoot +In the case of mutable types, the type-dependent information +is a pointer to a separately-allocated +area containing the state information. For constant +types, the information either directly contains +the value (if the value can be encoded in the +information field, such as for integers, characters, +and booleans) or contains a pointer to separately-allocated +space (as for strings). +The type codes are used by the garbage collector +to determine the physical representation of objects +so that the accessible objects can be traced; +they are also useful for supporting program debugging. +.para +The use of fixed-size object descriptors +allows variables to be fixed-size cells. Assignment +is efficient: the object descriptor resulting +from the evaluation of the expression is simply +copied into the variable. In addition, a single +size for variables facilitates the separate compilation +of modules and allows most of the code of a +parameterized module to be shared among all instantiations +of the module. The actual parameters are made available +to this code by means of a small parameter-dependent +section, which is initialized prior to execution. +.para +Procedure invocation is relatively efficient. +A single program stack is used, +and argument passing is as efficient as assignment. +Iterators are a form of coroutine; +however, their use is sufficiently constrained +that they are implemented using just the program stack. +Using an iterator is therefore only slightly more expensive +than using a procedure. +.para +The data abstraction mechanism is not inherently +expensive. No execution time type-checking is necessary. +Furthermore, the type conversion implied by 1cvt* +is merely a change in the view taken of an object's type, +and does not require any computation. +.para +A number of optimization techniques can be +applied to a collection of modules, if one is +willing to give up the flexibility of separate +compilation. The most effective such optimization is +the inline substitution of procedure (and iterator) bodies +for invocations [Sch76]. +The use of data abstractions tends to introduce +extra levels of procedure invocations that perform little or no +computation. As an example, consider the 2wordbag$insert* +operation (Figure wordbag), which merely invokes the +2wordtree$insert* operation and increments a counter. +If data abstractions had not been used, these actions would most +likely have been performed directly by the 2count_words* +procedure. The 2wordbag$insert* operation is thus +a good candidate for being compiled inline. +Once inline substitution has been performed, the increase +in context will enhance the effectiveness of +conventional optimization techniques +[All71,@All75,@Atk76]. diff --git a/doc/clu/clup7.r b/doc/clu/clup7.r new file mode 100644 index 00000000..74d1bb3f --- /dev/null +++ b/doc/clu/clup7.r @@ -0,0 +1,151 @@ +.nd chapter 7-1 +.nd current_figure 5 +.nd wordbag 3 +.so clu/clupap.header +.sr p 2p* +.sr q 2q* +.sr x 2x* +.sr y 2y* +.sr z 2z* +.sr a 2a* +.sr b 2b* +.sr insert 2insert* +.chapter "Discussion" +.para +Our intent in this paper has been to provide an +informal introduction to the abstraction mechanisms in CLU. +By means of programming examples, we have illustrated the +use of data, procedural, and control abstractions, and have +shown how CLU modules are used to implement these +abstractions. We have not attempted to provide a complete +description of CLU, but, in the course of explaining +the examples, most features of the language have appeared. +One important omission is the CLU exception handling mechanism +(which does support abstractions); this mechanism +is described in [LCS75]. +.para +In addition to describing constructs +that support abstraction, previous sections have +covered a number of other topics. We have discussed the +semantics of CLU. We have described the organization of the +CLU library and discussed how it supports incremental +program development and separate +compilation and type-checking of modules. +Also, we have described our current +implementation and discussed its efficiency. +.para +In designing CLU, our goal was to simplify the task +of constructing reliable software that is reasonably easy +to understand, modify, and maintain. It seems appropriate, +therefore, to conclude this paper with a discussion of how +CLU contributes to this goal. +.para +The quality of any program depends upon the skill of +the designer. In CLU programs, +this skill is reflected in the choice of abstractions. +In a good design, abstractions will be used +to simplify the connections between modules and to +encapsulate decisions that are likely to change [Par71]. +Data abstractions are particularly valuable for these purposes. +For example, through the use of a data abstraction, +modules that share a system data base +rely only on its abstract behavior as +defined by the data base operations. The connections +among these modules are much simpler +than would be possible if they shared knowledge +of the format of the data base and the relationship +among its parts. In addition, the data base abstraction +can be reimplemented without affecting the code of the modules +that use it. +CLU encourages the use of data abstractions, +and thus aids the programmer during program design. +.para +The benefits arising from the use of data +abstractions are based on the constraint, inherent in CLU +and enforced by the CLU compiler, that only the operations +of the abstraction may access the representations of the objects. +This constraint ensures that the distinction made in CLU +between abstractions and implementations +applies to data abstractions as well as to procedural +and control abstractions. +.para +The distinction between abstractions and implementations +eases program modification and maintenance. +Once it has been determined that an abstraction must be +reimplemented, CLU guarantees that the code of +all modules using that +abstraction will be unaffected by the change. +The modules need not be reprogrammed or even recompiled; +only the process of +selecting the implementation of the abstraction must be +redone. +The problem of determining what modules must be +changed is also simplified, because each module has a +well-defined purpose, to implement an abstraction, +and no other module can interfere with that purpose. +.para +Understanding and verification of CLU programs is +made easier +because the distinction between +abstractions and implementations permits this task +to be decomposed. +One module at a time is studied to determine that it +implements its abstraction. This study requires +understanding the behavior of the abstractions +it uses, but it is not necessary to understand the +modules implementing those abstractions. Those +modules can be studied separately. +.para +A promising way to establish the +correctness of a program is by means of a mathematical +proof. For practical reasons, proofs should be +performed (or at least checked) by a verification +system, since the process of constructing +a proof is tedious and error-prone. +Decomposition of the proof is essential for +program proving, which is practical only for small +programs (like CLU modules). Note that when the CLU +compiler does type-checking, it is, in addition +to enforcing the constraint that permits the proof +to be decomposed, also performing a small part of the +actual proof. +.para +We have included as declarations in CLU just +the information that the compiler can check with +reasonable efficiency. +We believe that the other +information required for proofs (specifications and +assertions) should be expressed in a separate +``specification'' language. +The properties of such a language are being +studied [Guttag, Lis75, Lis76, Spitzen]. +We intend eventually to add formal specifications to the +CLU system; the library is already organized to +accommodate this addition. At that time various +specification language processors could be added to +the system. +.para +We believe that the constraints imposed by +CLU are essential for practical as well as theoretical +reasons. It is true that data abstractions +can be used in any language by +establishing programming conventions to protect the +representations of objects. However, conventions are no +substitute for enforced constraints. It is inevitable +that the conventions will be violated -- and are likely +to be violated just when they are needed most, in +implementing, maintaining, and modifying large +programs. It is precisely at this time, when the +.ne 3 +programming task becomes very difficult, that a +language like CLU will be most valuable and +appreciated. +.chapter "Acknowledgements" +.para +The authors gratefully acknowledge the contributions +made by members of the CLU design group over the +last three years. Several people have made +helpful comments about this paper, including +Toby Bloom, Dorothy Curtis, Mike Hammer, +Eliot Moss, Jerry Saltzer, Bob Scheifler, +and the referees. diff --git a/doc/clu/clupap.header b/doc/clu/clupap.header new file mode 100644 index 00000000..a21ffdce --- /dev/null +++ b/doc/clu/clupap.header @@ -0,0 +1,117 @@ +. + header file for clu paper +. +.nd narrow 0 +.nd csg_memo 0 +.nd started 0 +.if ~started +.dv xgp +.fo 0 fonts; 30vr kst +.fo 1 fonts; 31vgb kst +.fo 2 fonts; 30vri kst +.fo 3 fonts; 37vrb kst +.fo 4 fonts; 75vbee kst +.fo 7 fonts; 18fg kst +.tr @ +.nr fnfont 7 +.nr chapter_starts_page 0 +.nr reset_per_page 0 +.nr both_sides 1 +.sr list_left_margin 500m +.sr list_right_margin 500m +.if narrow +.nr big_font 1 +.ls 1 +.ll 4.25i +.pl 14i +.sr list_left_margin 200m +.sr list_right_margin 200m +.ef csg_memo==0 +.ls 2 +.sr left_heading Abstraction Mechanisms in CLU +.sr right_heading date +.ef csg_memo>0 +.ls 1.5 +.sr left_heading CSG Memo csg_memo-1 +.sr right_heading Abstraction Mechanisms in CLU +.en +.sr figure_name Figure \ +.sr table_name Table \ +.nr immediate_figure 0 +.nr tty_table_of_contents 1 +.nr verbose 1 +.so r/r.macros +.so clu/clukey.r +.nr started 1 +.if narrow +.eq begin_page_figure begin_figure +.en +.en +. + number register for section references +. +.nr introduction 1 +.nr example 2 +.nr semantics 3 +.nr more_abstraction 4 +.nr library 5 +.nr implementation 6 +.nr efficiency 6 +.nr discussion 7 +. + paragraph macro +. +.if ls<150 +.de para +.sp +.ne 3 +.ti 5 +.em +.ef +.de para +.br +.ne 3 +.ti 5 +.em +.en +. + three dots +. +.sr dots 1...* +.sr th 7th* +. + code brackets - use like table, end_table +. +.de code +.table 2 +.if narrow +.ta 8 11 14 17 20 23 26 29 32 +.en +.em +. +.de end_code +.rtabs +.end_table +.em +. + mark position +. +.de m +.nr mark hpos +.em +. + widow eliminator +. +.de widow +.lbegin +.nv n \0 +.if n<1 +.nr n 1 +.end +.vx n n!l 2m +.if vtrap0 +.nr n n-vtrap +.ct footer_handler +n!m +.end +.end +.em diff --git a/doc/clu/clupap.r b/doc/clu/clupap.r new file mode 100644 index 00000000..f88c8259 --- /dev/null +++ b/doc/clu/clupap.r @@ -0,0 +1,24 @@ + CLU paper +. + set csg_memo to 0 for paper version + set it to csg memo number for csg memo version +. + set narrow to 1 for map version + and insert ;SIZE 14 at the beginning of the XGP file +. +.nr narrow 0 +.nr csg_memo 0 +. +.so clu/clupap.header +.so clu/clup0.r +.so clu/clup1.r +.so clu/clup2.r +.so clu/clup3.r +.so clu/clup4.r +.so clu/clup5.r +.so clu/clup6.r +.so clu/clup7.r +.if narrow +.ns p +.en +.insert_refs