commit 7fce7b06b453082e9680326f9604c3e39e888086
parent d91139284da5687534f793486ef7bee1ade48ddd
Author: Georges Dupéron <georges.duperon@gmail.com>
Date: Fri, 2 Jun 2017 16:16:23 +0200
Started writing the introduction
Diffstat:
4 files changed, 186 insertions(+), 2 deletions(-)
diff --git a/scribblings/bibliography.bib b/scribblings/bibliography.bib
@@ -1,3 +1,22 @@
+% One factor that influences the cost of changes is the length of time it takes
+% to complete the edit-compile-link-run-test cycle. Under conventional
+% operating systems, this cycle is usually on the orde r of a few minutes. In
+% Smalltalk, it is a few seconds
+@article{smalltalk-programmer-efficiency-cycle,
+ author = {Sandberg, D. W.},
+ title = {Smalltalk and Exploratory Programming},
+ journal = {SIGPLAN Notices},
+ issue_date = {Oct. 1988},
+ volume = {23},
+ number = {10},
+ month = oct,
+ year = {1988},
+ issn = {0362-1340},
+ pages = {85--92},
+ numpages = {8},
+ doi = {10.1145/51607.51614},
+ publisher = {ACM},
+}
@article{bobrow_common_1988,
title = {Common lisp object system specification},
diff --git a/scribblings/introduction.scrbl b/scribblings/introduction.scrbl
@@ -0,0 +1,80 @@
+#lang scribble/manual
+
+@require["util.rkt"]
+@(use-mathjax)
+
+@title[#:style (with-html5 manual-doc-style)
+ #:version (version-text)]{Introduction}
+
+@asection{
+ @atitle{The challenges of writing compilers}
+
+ @epigraph[#:width "8cm"
+ @elem{@italic{That Mitchell & Webb Look, Series 3} — BBC Two}]{
+ Brain surgery? — It’s not exactly rocket science, is it?}
+
+ Compilers are an essential part of today's software systems. Compilers
+ translate high-level languages with complex semantics into lower-level
+ languages. A compiler will parse the program, transform it in various ways,
+ perform some more or less advanced static checks, and optimise the input
+ program before producing an output in the desired target language. A compiler
+ must be correct, extensible and fast: correct because programmers are
+ concerned with logical errors in their own code, and should not fear that the
+ compiler introduces erroneous behaviour on its own; extensible because the
+ language is likely to evolve over time; and fast because the programmer's
+ edit-build-test cycle should be as frequent as possible@todo{@~cite{
+ smalltalk-programmer-efficiency-cycle}}.
+
+ Given their broad role, the complexity of the transformations involved, and
+ the stringent requirements, writing compilers is a difficult task.
+
+ The overall structure of a compiler will usually include a lexer and parser,
+ which turn the the program's source into an in-memory representation. This
+ initial representation will often be translated into an @deftech[#:key "IR"]{
+ intermediate representation} (IR) better suited to the subsequent steps. At
+ some early point, the program will be analysed for syntactical or semantic
+ inconsistencies (ranging from missing parentheses to duplicate definitions of
+ the same variable), and may also perform a more thorough static analysis. The
+ translation can then include an optimisation phase, based on
+ locally-recognisable patterns or on the results of the program-wide analysis
+ performed separately. Finally, code in the target language or for the target
+ architecture is generated.
+
+ Some pitfalls await the compiler-writer: it is easy to reuse excessively a
+ single intermediate representation; and there is a high risk associated with
+ the writing of large, monolithic passes, which are hard to test, debug, and
+ extend. We will discuss these pitfalls in more detail in the following
+ paragraphs. Both issues are prone to manifestations of some form or another of
+ the ``god object'' anti-pattern@note{The ``god object'' anti-pattern describes
+ object-oriented classes which @emph{do} too much or @emph{know} too much. The
+ size of these classes tends to grow out of control, and there is usually a
+ tight coupling between the methods of the object, which in turn means that
+ performing small changes may require understanding the interactions between
+ random parts of a very large file, in order to avoid breaking existing
+ functionality.}.
+
+
+ The static analysis, optimisation and code generation phases could in
+ principle work on the same intermediate representation. Several issues arise
+ from this situation, however. First, new information gained by the static
+ analysis may be added to the existing representation via mutation, or the
+ optimiser could directly alter the @tech{IR}. This means that the @tech{IR}
+ will initially contain holes (e.g. represented by @racketid[null] values),
+ which will get filled in gradually. Manipulating these parts is then extremely
+ risky, as it is easy to accidentally attempt to retrieve a value before it was
+ actually computed. Using the same @tech{IR} throughout the compiler also makes
+ it difficult for later passes to assume that some constructions have been
+ eliminated by previous simplification passes. One has to rely on the order of
+ execution of the passes in order to know what the data structure contains,
+ instead of having this information indicated by the @tech{IR}'s type.
+
+ @;{
+ The static analysis, optimisation and code generation phases will often work
+ on that intermediate representation.
+
+ These transformations are often non-trivial and may require aggregating and
+ analysing data scattered across the program.
+
+ triggering anti-patterns like ``god object''
+ }
+}
+\ No newline at end of file
diff --git a/scribblings/phc-thesis.scrbl b/scribblings/phc-thesis.scrbl
@@ -35,6 +35,7 @@
@(table-of-contents)
+@include-asection{introduction.scrbl}
@include-asection{state-of-the-art.scrbl}
@;@(generate-bibliography-section)
diff --git a/scribblings/util.rkt b/scribblings/util.rkt
@@ -22,7 +22,8 @@
include-section*
include-asection
struct-update
- part-style-update)
+ part-style-update
+ epigraph)
(require racket/stxparam
racket/splicing
@@ -38,7 +39,9 @@
scribble-math
phc-toolkit/untyped/meta-struct
"abbreviations.rkt"
- (for-syntax syntax/parse))
+ (for-syntax syntax/parse)
+ scribble/html-properties
+ scribble/latex-properties)
(use-mathjax)
@@ -271,3 +274,82 @@
(if precision
(list (apply ~cite rest) ", " precision)
(apply ~cite rest)))
+
+(define epigraph-css
+ #"
+.epigraphStyle p:last-child {
+ padding-bottom: 0.2em;
+ margin-bottom: 0;
+}
+
+.epigraphAuthorStyle p:first-child {
+ padding-top: 0;
+ margin-top: 0;
+}
+
+.epigraphOuter {
+ text-align: right;
+ display: table;
+ margin-right: 0;
+ padding-right: 0;
+ margin-left: auto;
+}
+
+.epigraphStyle {
+ display: table-cell;
+ border-bottom: thin solid gray;
+ font-style: italic;
+}
+
+.epigraphAuthorStyle {
+ display: table-cell;
+ padding-top: 0.5em;
+}
+
+.epigraphOuter > .SIntrapara {
+ margin: 0;
+ display: table-row;
+}
+
+.epigraphOuter * {
+ margin-right: 0;
+ padding-right: 0;
+ margin-left: 0;
+ padding-left: 0;
+}
+")
+
+(define epigraph-tex
+ (string->bytes/utf-8
+ #<<EOTEX
+\usepackage{epigraph}
+\usepackage{environ}
+\def\lastepigraph{}
+\def\lastepigraphauthor{}
+\newenvironment{epigraphOuter}{}{}
+\def\setepigraphwidth#1{\setlength{\epigraphwidth}{#1}}
+\NewEnviron{epigraphStyle}{\global\let\lastepigraph\BODY}
+\NewEnviron{epigraphAuthorStyle}{%
+ \global\let\lastepigraphauthor\BODY%
+ \epigraph{\emph{\lastepigraph}}{\lastepigraphauthor}%
+}
+EOTEX
+ ))
+
+(define epigraph-additions
+ (list (css-addition epigraph-css)
+ (tex-addition epigraph-tex)))
+
+(define (epigraph #:width [width "6cm"] author . rest)
+ (nested #:style (style "epigraphOuter"
+ '()
+ #;(list
+ (attributes
+ `([style . ,(format "max-width: ~a;" width)]))))
+ (cond-element
+ [latex (elem #:style (style "setepigraphwidth" '()) width)]
+ [else (elem)])
+ (apply nested #:style (style "epigraphStyle" epigraph-additions)
+ rest)
+ (nested #:style (style "epigraphAuthorStyle" epigraph-additions)
+ author)))
+\ No newline at end of file