www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

commit ac805e30e9e8a5e9f2dd279efbfb647cc4edc959
parent 9fe7f03a46fbeb6c568e0c9717d937da0b997acb
Author: Georges Dupéron <georges.duperon@gmail.com>
Date:   Thu,  8 Jun 2017 23:38:31 +0200

Wrote the overview of the future chapters and sections.

Diffstat:
Mscribblings/abbreviations.rkt | 14+++++++++++---
Mscribblings/bibliography.bib | 24++++++++++++++++++++++++
Mscribblings/introduction.scrbl | 338++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Mscribblings/state-of-the-art.scrbl | 17+++++++++++------
Mscribblings/util.rkt | 9++++++---
5 files changed, 378 insertions(+), 24 deletions(-)

diff --git a/scribblings/abbreviations.rkt b/scribblings/abbreviations.rkt @@ -1,6 +1,7 @@ #lang at-exp racket (provide typedracket Typedracket csharp CAML CLOS NIT CPP DeBruijn HOAS PHOAS - monocecil dotnet DLL nanopass nanopass-c-f) + monocecil dotnet DLL nanopass nanopass-c-f haskell haskell98 Hackett + turnstile Turnstile cur Cur) (require scribble/base scribble/core @@ -34,4 +35,11 @@ (define PHOAS "PHOAS") (define monocecil @tt{"Mono.Cecil"}) (define dotnet ".NET") -(define DLL "DLL") -\ No newline at end of file +(define DLL "DLL") +(define haskell "haskell") +(define haskell98 "Haskell 98") +(define Hackett "Hackett") +(define turnstile "Turnstile") +(define Turnstile "Turnstile") +(define cur "Cur") +(define Cur "Cur") +\ No newline at end of file diff --git a/scribblings/bibliography.bib b/scribblings/bibliography.bib @@ -1,3 +1,14 @@ +@article{ganz2001macros-as-multi-stage-computations, + title={Macros as multi-stage computations: type-safe, generative, binding macros in MacroML}, + author={Ganz, Steven E and Sabry, Amr and Taha, Walid}, + journal={ACM SIGPLAN Notices}, + volume={36}, + number={10}, + pages={74--85}, + year={2001}, + publisher={ACM} +} + % One factor that influences the cost of changes is the length of time it takes % to complete the edit-compile-link-run-test cycle. Under conventional % operating systems, this cycle is usually on the orde r of a few minutes. In @@ -42,6 +53,19 @@ pages = {135--152}, } +@inproceedings{torlak_rosette_symbolic_vm, + author = {Torlak, Emina and Bodik, Rastislav}, + title = {A Lightweight Symbolic Virtual Machine for Solver-aided Host Languages}, + booktitle = {Proceedings of the 35th ACM SIGPLAN Conference on Programming Language Design and Implementation}, + series = {PLDI '14}, + year = {2014}, + isbn = {978-1-4503-2784-8}, + pages = {530--541}, + numpages = {12}, + doi = {10.1145/2594291.2594340}, + publisher = {ACM}, +} + @inproceedings{flatt_scheme_classes_2006, title = {Scheme with classes, mixins, and traits}, url = {http://link.springer.com/chapter/10.1007/11924661_17}, diff --git a/scribblings/introduction.scrbl b/scribblings/introduction.scrbl @@ -151,7 +151,7 @@ @asection{ @atitle{Graphs} - @htodo{full stop in the middle of sentence} + @htodo{full stop in the middle of sentence} Nontrivial programs are inherently graphs: they may contain mutually recursive functions (which directly refer to each other, and therefore will form a cycle in a representation of the program), circular and (possibly @@ -362,8 +362,136 @@ @asection{ @atitle{Verification} + @todo{Needs a transition from the previous section, or this should be moved + elsewhere.} + The implementation presented in this thesis cannot be immediately extended to - support end-to-end } + support end-to-end formal verification of the compiler being written. However, + it contributes to pave the way for writing formally verified compilers: + Firstly, the smaller passes are easier to verify. Secondly, the use of + intermediate representations which closely match the input and output data can + be used, given a formal semantic of each @tech{IR}, to assert that a + transformation pass is systematically preserving the semantics of the input. + Thirdly, the use of a typed language instead of the currently ``untyped'' + @|nanopass| framework means that a lot of properties can be ensured by relying + on the type system. @|Typedracket|'s type checker is not formally verified + itself and would have to be trusted (alternatively, the adventurous researcher + could attempt to derive well-typedness proofs automatically by hijacking the + type checker to generate traces of the @htodo{type checking} steps involved, + or manually, only using the type checker as a helper tool to detect and weed + out issues during development. Fourthly, the explicit specification of the + dependencies of passes on their input via row types is a form of frame + specification@htodo{reference}, and can significantly ease the verification + effort, as the engineer can rely on the fact that irrelevant parts were not + modified in the output. These @htodo{frame properties}@htodo{or rather frame + specifications?} are statically enforced by our extension of @|typedracket|'s + type system, which we formalise in chapter @todo{??}. This relies on trusting + @|typedracket| itself once again, and on the correctness of the implementation + of our translation from the extended type system to @|typedracket|'s core type + system. Fifthly we provide means to express graph transformations as such + instead of working with an encoding of graphs as abstract syntax trees (or + directed acyclic graphs), with explicit backward and transversal references. + We are hopeful that eliminating this mismatch will be beneficial to the formal + verification of the transformation passes. + + These advantages would be directly available to engineers attempting a formal + proof of their compiler, while trusting the correctness of @|typedracket|, as + well as that of our framework. The implementation of our framework is not + hardened, and @|typedracket| itself suffers from a small number of known + sources of unsoundness@note{See + @url["https://github.com/racket/typed-racket/issues"].}, however. In order to + do an end-to-end verification of a compiler, it would be necessary to port our + approach to a language better suited to formal verification. Alternatively, + Racket could in principle be extended to help formalisation efforts. Two + approaches come to mind: the first consists in proving that the macros + correctly implement the abstraction which they attempt to model; the second + would be to have the macros inject annotations and hints indicating properties + that must be proven, in the same way that type annotations are currently + inserted. These hints could then be used by the prover to generate proof + obligations, which could then be solved manually or automatically. + + @htodo{atitle{…}} + + Mixing macros and language features which help obtaining static guarantees is + a trending topic in the Racket ecosystem and in other communities. + + @|Typedracket| is still in active development, and several other projects + were presented recently. + + @|Hackett|@note{@url["https://github.com/lexi-lambda/hackett"]}, mainly + developed by by @lastname{King}, is a recent effort to bring a + @|haskell98|-like type system to Racket. + + @|Hackett| is built on the techniques developed by @lastname{Chang}, + @lastname{ Knauth} and @lastname{Greenman} in + @~cite["chang2017type-systems-as-macros"], which lead to the @|turnstile| + library@note{@url["https://bitbucket.org/stchang/macrotypes.git"]}. + @|Turnstile| is a helper library which facilitates the creation of typed + languages in Racket. Macros are amended with typing rules, which are used to + thread type information through uses of macros, definition forms and other + special forms. Type checking is therefore performed during macro-expansion, + and does not rely on an external type checker which would work on the expanded + code. As a result, new languages built with Racket and @|turnstile| are not + limited to a pre-existing type system, but can rather devise their own from + the ground up. This approach brings a lot of flexibility, the drawback being + that more trust is put in the language designer. + + The work presented in this thesis@htodo{, which started before @|turnstile| + was publicly announced,} aims to follow a different path than that followed + by @|turnstile|: we chose to implement our extended type system as an + abstraction over the existing type system of @|typedracket|. This means that + we do not rely so much on the correctness of our typing rules: instead of + verifying ourselves the well-typedness of compilers written using our + framework, we inject type annotations in the expanded code, which are then + verified by @|typedracket|. Therefore, we are confident that type errors will + be caught by @|typedracket|, safe in the knowledge that the code obtained + after macro-expansion is type-safe@note{We actually use on a few occasions + @racket[unsafe-cast]. Such a blunt instrument is however only used in cases + where @|typedracket| already has the required type information, but the type + checker fails to deduce the equivalence between two formulations of the same + type, or does not acknowledge that the term being checked has the expected + type. These issues are being worked on by the developers of @|typedracket|, + however, and we hope to be able to remove these uses of @racket[unsafe-cast] + in later versions.}. This increased serenity comes at the cost of + flexibility, as @|typedracket|'s type system was not able to express the type + constructs that we wanted to add. We therefore had to resort to a few hacks to + translate our types into constructs that could be expressed using + @|typedracket|. + + The approach of building more complex type constructs atop small trusted + kernel has been pursued by + @|cur|@note{@url["https://github.com/wilbowma/cur"]}, developed by @lastname{ + Bowman}. @|Cur| is a dependently typed language which permits theorem proving + and verified programming. It is based on a small kernel (the Curnel), which + does not contain language features which can be expressed by macro + transformations. Most notably, the prover's tactics are defined using + metaprogramming tools, and are not part of the core language. + + Another tool worth mentioning is + Rosette@note{@url["https://github.com/emina/rosette"]}@todo{reference}. + Rosette, mainly developed by @lastname{Torlak}, is a solver-aided language: it + tightly integrates an SMT solver with some Racket constructs, so that powerful + queries can be performed and answered, for example ``what input values to the + function f will generate outputs which satisfy the predicate p?''. It can also + generate simple functions which satisfy given conditions. These features allow + it to be used both as a helper tool during development, for engineers coming + from various domains, and as a verifier, as the solver can be used to assert + that a function will never give an erroneous output, given a set of + constraints on its input. + + @htodo{Check that I'm not saying anything wrong about Turnstile here.} The + idea of expressing the type of macro transformations at some level (e.g. by + indicating the type of the resulting expression in terms of the type of the + input terms, as is allowed by @|turnstile|) is not new: in 2001, Ganz, Sabry + and Taha already presented in 2001 + MacroML@~cite["ganz2001macros-as-multi-stage-computations"], an experimental + language which allows type checking programs before macro-expansion. However, + it seems that there is interest, both in the Racket community and elsewhere, + for languages with powerful metaprogramming facilities, coupled with an + expressive type system. We are hopeful that this will lead to innovations + concerning the formal verification of programs making heavy use of complex + macros. + } @;{ @@ -374,16 +502,20 @@ analysing data scattered across the program. We build upon the achievements of the @|nanopass-c-f| project, - which is presented in more detail in section XYZ. Simply put, @|nanopass| helps - the programmer define a myriad of compiler passes, each doing a very small - amount of code (and therefore easy to test and maintain), and each with a - different input and output type. + which is presented in more detail in section XYZ. Simply put, @|nanopass| + helps the programmer define a myriad of compiler passes, each doing a very + small amount of code (and therefore easy to test and maintain), and each with + a different input and output type. } } @asection{ @atitle{Summary} + + @epigraph[#:width "8cm" + @elem{Unknown}]{ + Once upon a time…} @asection{ @atitle{Extensible type system} We implemented a different type system on top of @|typedracket|, using macros. Macros have been used not only @@ -396,11 +528,11 @@ logic programming (Racket Datalog@note{@url{http://docs.racket-lang.org/datalog/}} and Racklog@note{@url{http://docs.racket-lang.org/racklog/}}, - Rosette@~cite["torlak_growing_rosette_2013"]). In the past, - @|typedracket|@~cite["tobin-hochstadt_design_2008"] has proved that a type - system can be successfully fitted onto an existing ``untyped'' language, using - macros. We implemented the @racketmodname[type-expander] library atop - @|typedracket|, without altering @|typedracket| itself. Our + Rosette@~cite["torlak_growing_rosette_2013" "torlak_rosette_symbolic_vm"]). In + the past, @|typedracket|@~cite["tobin-hochstadt_design_2008"] has proved that + a type system can be successfully fitted onto an existing ``untyped'' + language, using macros. We implemented the @racketmodname[type-expander] + library atop @|typedracket|, without altering @|typedracket| itself. Our @racketmodname[type-expander] library allows macros to be used in contexts where a type is expected. @@ -479,5 +611,187 @@ bleargh). } -} + @asection{ + @atitle{Overview} + + The rest of this document is structured as follows: + + @htodo{auto chapter and section numbers} + @(let ([related-chap + @seclink["State_of_the_art" + #:doc '(lib "phc-thesis/scribblings/phc-thesis.scrbl") + ]{Chapter 2}] + [related-type-expander + @seclink["related-type-expander" + #:doc '(lib "phc-thesis/scribblings/phc-thesis.scrbl") + ]{Section 2.1}] + [related-adt + @seclink["related-adt" + #:doc '(lib "phc-thesis/scribblings/phc-thesis.scrbl") + ]{Section 2.2}] + [related-nanopass + @seclink["related-nanopass" + #:doc '(lib "phc-thesis/scribblings/phc-thesis.scrbl") + ]{section 2.3}] + [related-cycles + @seclink["related-cycles" + #:doc '(lib "phc-thesis/scribblings/phc-thesis.scrbl") + ]{section 2.4}] + [initial-examples-chap + @seclink["initial-examples-chap" + #:doc '(lib "phc-thesis/scribblings/phc-thesis.scrbl") + ]{Chapter 3}] + [tr-chap + @seclink["tr-chap" + #:doc '(lib "phc-thesis/scribblings/phc-thesis.scrbl") + ]{Section 4.1}] + [type-expander-chap + @seclink["type-expander-chap" + #:doc '(lib "phc-thesis/scribblings/phc-thesis.scrbl") + ]{Section 4.2}] + [adt-chap + @seclink["adt-chap" + #:doc '(lib "phc-thesis/scribblings/phc-thesis.scrbl") + ]{Section 4.3}] + [typed-nanotrees-chap + @seclink["typed-nanotrees-chap" + #:doc '(lib "phc-thesis/scribblings/phc-thesis.scrbl") + ]{Section 5.1}] + [typed-nanodags-chap + @seclink["typed-nanodags-chap" + #:doc '(lib "phc-thesis/scribblings/phc-thesis.scrbl") + ]{Section 5.2}] + [typed-nanographs-chap + @seclink["typed-nanographs-chap" + #:doc '(lib "phc-thesis/scribblings/phc-thesis.scrbl") + ]{Section 5.3}] + [structural-invariants-chap + @seclink["structural-invariants-chap" + #:doc '(lib "phc-thesis/scribblings/phc-thesis.scrbl") + ]{Section 5.4}] + [future-extensions-chap + @seclink["future-extensions-chap" + #:doc '(lib "phc-thesis/scribblings/phc-thesis.scrbl") + ]{Section 5.5}] + [results-chap + @seclink["results-chap" + #:doc '(lib "phc-thesis/scribblings/phc-thesis.scrbl") + ]{Chapter 6}]) + @list{ + @itemlist[ + @item{@|related-chap| presents related work. It discusses approaches to + extensible type systems in @|related-type-expander|. @|related-adt| + considers how structures, variants and polymorphism may exhibit different + properties in different languages, and makes the case for bounded row + polymorphism as a well-suited tool for building compilers. + The @|nanopass-c-f| is presented in @|related-nanopass|, and techniques + used to encode and manipulate graphs are studied in @|related-cycles|.} + @item{In @|initial-examples-chap|, we give some example uses of the compiler + framework described in this thesis. We indicate the pitfalls, bugs + and boilerplate avoided thanks to its use. + @todo{Move this above the related work?}} + @item{@|tr-chap| gives an overview of the features of @|typedracket|'s type + system. It then formalises the features relevant to our work, by giving + the subtyping rules, as well as the builder and accessor primitives + for values of these types.} + @item{@|type-expander-chap| explains how we implemented support for type-level + macros as an extension to @|typedracket|, without modifying the core + implementation. We detail the reduction rules which allow translating + a type making use of expanders into a plain type.} + @item{@|adt-chap| discusses the flavour of algebraic datatypes which we chose + to implement atop @|typedracket|, as well as its extension with row types. + We explain in detail our goals and constraints, and present two + implementations — a first attempt, which unfortunately required verbose + annotations for some ``row operations'', and a second solution, which + greatly reduces the number of annotations required, by using a different + implementation strategy. We give formal semantics for these extensions, + give the reduction rules which translate them back to @|typedracket|'s type + system, and show that this reduction preserves the original semantics. We + then finally define a layer of syntactic sugar which allows convenient + use of these new type primitives.} + @item{@|typed-nanotrees-chap| builds upon these algebraic datatypes and row + types to define a typed version of the @|nanopass-c-f| which operates on + abstract syntax trees. We explain the notions of + @htodo{tech}@tech{tree nodes}, @htodo{tech}@tech{mapping functions} and + @htodo{tech}@tech{cata-morphisms}, show how these interact with row typing, + and give an overview of the implementation. We then extend this with + @tech{detached mappings}: this feature allows the user to map a function + over all nodes of a given type in a graph, regardless of the structure of + the graph outisde of the relevant parts which are manipulated by the + mapping function. This allows test data to not only omit irrelevant + branches in the abstract syntax tree by omitting the field pointing to + these branches, but also irrelevant parts located above the interesting + nodes. In other words, this feature allows cutting and removing the top of + the abstract syntax tree, and glue together the resulting forest. + We also formally describe the result of applying a set of detached or + regular mapping functions to an input tree.} + @item{@|typed-nanodags-chap| extends this typed version of @|nanopass| to + handle directed acyclic graphs. We start by considering concerns such as + equality of nodes (for which the previous chapter assumed a predicate + existed, without actually implementing it), and hash consing. This allows + us to prepare the ground for the extension presented in the next chapter, + namely graphs.} + @item{We can then introduce support for cycles in @|typed-nanographs-chap|: + instead of describing abstract syntax tree transformations, it then becomes + possible to describe graphs transformations. To this end, we introduce new + kinds of nodes: @tech{placeholders}, which are used during the construction + of the graph, @tech{with-indices} nodes, which encode references to + neighbouring nodes as indices into arrays containing all nodes of a given + type, for the current graph, and @tech{with-promises} nodes, which hide + away this implementation detail by lazily resolving all references, using + a uniform API. This allows all fields of a given node to be accessed in the + same way, while still allowing logical cycles built atop a purely immutable + representation. + + We give an overview of how our implementation handles cycles, using + worklists which gather and return @tech{placeholders} when the mapping + functions perform recursive calls, and subsequently turn the results into + first into @tech{with-indices} nodes, then into @tech{with-promises} ones. + We then update our notion of equality and hash consing to account for + cycles, and update the formal semantics too.} + @item{@;{The fourth feature…} + Extra safety can be obtained by introducing some structural invariants + which constrain the shape of the graph. For example, it is possible to + ensure the well-scopedness of variable references. Another desirable + property would be the absence of cycles, either to better model the + @tech{IR}, knowing that cyclic references are not allowed at some point by + the target language, or to detect early on changes in the @tech{IR} which + may break code assuming the absence of cycles. A third option would be to + ensure that ``parent'' pointers are correct, and that the node containing + them is indeed referenced by the parent (i.e., ensure the @emph{presence} + of well-formed cycles). @|structural-invariants-chap| presents an extension + of our graph manipulation framework, which allows the specification of + structural invariants. These can in some cases be checked statically, + in other cases it may be necessary to follow a macro-enforced discipline, + and as a last resort, a dynamic check may be performed. + + @htodo{+ singleton / all nodes of a given type referenced by a central + point with bounded length / only one node with a given value for a given + property.} + + We further explain how we use phantom types to reflect at the type level + which invariants were checked on an instance of a graph. The types used to + represent that an instance satisfies an invariant are chosen so that + instances with stronger invariants are subtypes of instances with weaker + invariants.} + @item{Finally, in @|future-extensions-chap| we succinctly present some + extensions which could be added to the framework presented in the previous + chapters. We discuss how it would be possible to garbage-collect unused + parts of the graph when only a reference to an internal node is kept, and + the root is logically unreachable. Another useful feature would be the + ability to define general-purpose graph algorithm (depth-first traversal, + topological sort, graph colouring, and so on), operating on a subset of + the graph's fields. This would allow to perform these common operations + while considering only a subgraph of the one being manipulated. Finally, + we mention the possibility to implement an α-equivalence comparison + operator.@;{α-normal form}} + @item{In @|results-chap|, we present more examples and revisit the initial + examples illustrating our goals in the light of the previous chapters. + + We ported the most complete compiler written with @|nanopass| (a Scheme + compiler) to our framework; we summarise our experience and compare our + approach with @|nanopass|, by indicating the changes required, in + particular how many additional type annotations were necessary.} + @item{Finally, we conclude and list future work directions. + @htodo{(turnstile, improvements to Racket itself, etc.)}}]})}} diff --git a/scribblings/state-of-the-art.scrbl b/scribblings/state-of-the-art.scrbl @@ -9,7 +9,7 @@ #:version (version-text)]{State of the art} @asection{ - @atitle{Extending the type system via macros (type-expander)} + @atitle[#:tag "prelated-type-expander"]{Extending the type system via macros} Our work explores one interesting use of macros: their use to extend a programming language's type system. @@ -69,7 +69,7 @@ libraries@~cite["tobin-hochstadt_languages_as_libraries_2011"]} @asection{ - @atitle{Algebraic datatypes for compilers (phc-adt)} + @atitle[#:tag "related-adt"]{Algebraic datatypes for compilers} The @tt{phc-adt} library implements algebraic datatypes (variants and structures) which are adapted to compiler-writing. @@ -92,6 +92,8 @@ new fields, regardless of optional fields. We implement the latter.@htodo{ Probably the former too.}}] + @todo{Cite the variations on variants paper (for Haskell)} + @asection{ @atitle{The case for bounded row polymorphism} @todo{Explain the ``expression problem''.} @@ -329,7 +331,7 @@ @; TODO: fix the footnote here! See also post on Quora@superscript{\ref{quora-url-footnote}}, which links to @~cite["cardelli1985understanding"], and to a blog post by Sam - Tobin-Hochstadt@note{@url|{https://medium.com/@samth/on-typed-untyped-and-uni-typed-languages-8a3b4bedf68c}|} + Tobin-Hochstadt@note{@url["https://medium.com/@samth/on-typed-untyped-and-uni-typed-languages-8a3b4bedf68c"]} The blog post by Sam Tobin-Hochstadt explains how @typedracket tries to explore and understand how programmers think about programs written in so-called ``untyped'' languages (namely that the programmers still @@ -417,12 +419,15 @@ #:style (style #f (list (short-title "Writing compilers using many small passes"))) - ]{Writing compilers using many small passes (a.k.a following the Nanopass - Compiler Framework philosophy)} + #:tag "related-nanopass" + ]{Writing compilers using many small passes @elem[ + #:style (style #f '(aux))]{(a.k.a following the Nanopass Compiler + Framework philosophy)}} } @asection{ - @atitle{Cycles in intermediate representations of programs} + @atitle[#:tag "related-cycles"]{Cycles in intermediate representations of + programs} @todo{There already were a few references in my proposal for JFLA.} @todo{Look for articles about graph rewriting systems.} diff --git a/scribblings/util.rkt b/scribblings/util.rkt @@ -25,7 +25,8 @@ part-style-update epigraph usetech - hr) + hr + lastname) (require racket/stxparam racket/splicing @@ -428,4 +429,6 @@ EOTEX (string->bytes/utf-8 #<<EOTEX \def\hrStyle#1{\noindent{\centerline{\rule[0.5ex]{0.5\linewidth}{0.5pt}}}} EOTEX - )))))) -\ No newline at end of file + )))))) + +(define lastname list) +\ No newline at end of file