@ -0,0 +1,15 @@ | |||||
/.site | |||||
/.store | |||||
/.stack-work | |||||
/.vscode | |||||
/node_modules | |||||
/uni | |||||
/portfolio.md | |||||
/fonts | |||||
/css/fonts | |||||
/.mailmap | |||||
.katex_cache |
@ -0,0 +1,27 @@ | |||||
name: blag | |||||
version: 0.1.0.0 | |||||
build-type: Simple | |||||
cabal-version: >= 1.10 | |||||
executable site | |||||
main-is: site.hs | |||||
build-depends: base | |||||
, text | |||||
, hsass | |||||
, aeson | |||||
, hakyll | |||||
, pandoc | |||||
, binary | |||||
, process | |||||
, deepseq | |||||
, hashable | |||||
, directory | |||||
, containers | |||||
, bytestring | |||||
, uri-encode | |||||
, hakyll-sass | |||||
, skylighting | |||||
, pandoc-types | |||||
, unordered-containers | |||||
ghc-options: -threaded | |||||
default-language: Haskell2010 |
@ -0,0 +1,98 @@ | |||||
$code-bg: hsl(230,1%,98%); | |||||
$code-fg: #ABB2BF; | |||||
$code-red: #D65122; | |||||
$code-red-br: #AE3B36; | |||||
$code-green: #88B966; | |||||
$code-yellow: #DEB468; | |||||
$code-orange: #C58853; | |||||
$code-blue: #519DEB; | |||||
$code-pink: #C678DD; | |||||
$code-cyan: #48A8B5; | |||||
$code-white: #ABB2BF; | |||||
$code-grey: #7F848E; | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 400; | |||||
font-stretch: normal; | |||||
font-style: normal; | |||||
src: url('/static/woff2/iosevk-abbie-regular.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-regular.ttf') format('truetype'); | |||||
} | |||||
body { | |||||
margin: 0; | |||||
background-color:$code-bg; | |||||
color:#ABB2BF; | |||||
} | |||||
html { | |||||
background-color: $code-bg; | |||||
color:#ABB2BF; | |||||
} | |||||
pre.Agda { | |||||
margin: 0; | |||||
padding: 1em; | |||||
background-color: $code-bg; | |||||
color: $code-fg; | |||||
} | |||||
@keyframes highlight { | |||||
0% { | |||||
background-color: #F5DEB3; | |||||
} | |||||
100% { | |||||
background-color: $code-bg; | |||||
} | |||||
} | |||||
/* Aspects. */ | |||||
.Agda { | |||||
.Comment { color: $code-grey; } | |||||
.Background { background-color: $code-bg; } | |||||
.Markup { color: $code-fg; } | |||||
.Keyword { color: $code-orange; } | |||||
.String { color: $code-red; } | |||||
.Number { color: $code-pink; } | |||||
.Symbol { color: $code-fg; } | |||||
.PrimitiveType { color: $code-blue; } | |||||
.Pragma { color: $code-fg; } | |||||
/* NameKinds. */ | |||||
.Bound { color: $code-fg; } | |||||
.Generalizable { color: $code-fg; } | |||||
.InductiveConstructor { color: $code-green; } | |||||
.CoinductiveConstructor { color: $code-green; } | |||||
.Datatype { color: $code-blue; } | |||||
.Field { color: #F570B7; } | |||||
.Function { color: $code-blue; } | |||||
.Module { color: $code-pink; } | |||||
.Postulate { color: $code-blue; } | |||||
.Primitive { color: $code-blue; } | |||||
.Record { color: $code-blue; } | |||||
/* OtherAspects. */ | |||||
.UnsolvedMeta { color: $code-fg; background: yellow } | |||||
.UnsolvedConstraint { color: $code-fg; background: yellow } | |||||
.TerminationProblem { color: $code-fg; background: #FFA07A } | |||||
.IncompletePattern { color: $code-fg; background: #F5DEB3 } | |||||
.Error { color: red; text-decoration: underline } | |||||
.TypeChecks { color: $code-fg; background: #ADD8E6 } | |||||
.Deadcode { color: $code-fg; background: #808080 } | |||||
.ShadowingInTelescope { color: $code-fg; background: #808080 } | |||||
/* Standard attributes. */ | |||||
a { text-decoration: none } | |||||
a[href]:hover { | |||||
text-decoration: 2px #B4EEB4 underline dotted; | |||||
} | |||||
a[href]:target { | |||||
animation: highlight 2.5s; | |||||
} | |||||
background-color: #282C34; | |||||
font-family: 'Iosevka', 'Fantasque Sans Mono', 'Roboto Mono', monospace; | |||||
font-weight: 400; | |||||
font-size: 16pt; | |||||
} |
@ -0,0 +1,528 @@ | |||||
@import "vars.scss"; | |||||
@mixin center-that-bitch { | |||||
display: flex; | |||||
flex-direction: column; | |||||
align-items: center; | |||||
} | |||||
html { | |||||
min-height: 100%; | |||||
height: 100%; | |||||
max-width: 100%; | |||||
margin: 0; | |||||
overflow-x: clip; | |||||
} | |||||
body { | |||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; | |||||
width: 100%; | |||||
@include center-that-bitch; | |||||
margin: 0; | |||||
color: $bluegray-900; | |||||
font-size: $font-size; | |||||
overflow-x: clip; | |||||
} | |||||
body > header { | |||||
background-color: $purple-600; | |||||
display: flex; | |||||
height: $nav-height; | |||||
width: 100%; | |||||
z-index: 999; | |||||
position: fixed; | |||||
justify-content: space-between; | |||||
align-items: center; | |||||
padding-left: 1em; | |||||
padding-right: 1em; | |||||
border-bottom: 3px solid $purple-700; | |||||
box-sizing: border-box; | |||||
div, nav > a { | |||||
height: $nav-height; | |||||
padding-left: 0.3em; | |||||
padding-right: 0.3em; | |||||
display: flex; | |||||
align-items: center; | |||||
} | |||||
div:hover, nav > a:hover { | |||||
background-color: $purple-500; | |||||
transition: background-color 0.3s ease-in-out; | |||||
} | |||||
a { | |||||
color: white; | |||||
font-size: $font-size * 1.2; | |||||
text-decoration: none; | |||||
} | |||||
nav { | |||||
display: flex; | |||||
align-items: center; | |||||
gap: 0.5em; | |||||
} | |||||
} | |||||
@mixin left-bordered-block($color) { | |||||
padding-left: 1em; | |||||
padding-top: 0.2em; | |||||
padding-bottom: 0.2em; | |||||
border-left: 5px dashed $color; | |||||
} | |||||
@mixin material { | |||||
padding: 1em; | |||||
margin-top: 1em; | |||||
margin-bottom: 1em; | |||||
box-shadow: 2px 2px 6px black; | |||||
border-radius: 10px; | |||||
} | |||||
main { | |||||
max-width: 100ch; | |||||
width: 100%; | |||||
margin: 0px auto 0px auto; | |||||
flex: 1 0 auto; | |||||
padding: 2ch; | |||||
padding-top: $nav-height; | |||||
box-sizing: border-box; | |||||
div#title h2 { | |||||
display: none; | |||||
} | |||||
div#post-toc-container { | |||||
aside#toc { | |||||
display: none; | |||||
} | |||||
article { | |||||
grid-column: 2; | |||||
width: 100%; | |||||
line-height: 1.5; | |||||
} | |||||
} | |||||
div#post-info { | |||||
font-style: italic; | |||||
line-height: 1.2; | |||||
@include left-bordered-block($bluegray-500); | |||||
} | |||||
} | |||||
div.warning { | |||||
@include material; | |||||
background-color: $red-200; | |||||
} | |||||
figure.wraparound { | |||||
float: right; | |||||
width: auto; | |||||
margin-left: 2em; | |||||
} | |||||
figure { | |||||
overflow-x: auto; | |||||
overflow-y: clip; | |||||
width: 100%; | |||||
margin: auto; | |||||
@include center-that-bitch; | |||||
figcaption { | |||||
margin-top: 0.3em; | |||||
display: inline-block; | |||||
text-align: center; | |||||
} | |||||
p { | |||||
margin: 0; | |||||
} | |||||
} | |||||
ol, ul { | |||||
padding-left: 1.2em; | |||||
li { | |||||
margin-top: 5px; | |||||
margin-bottom: 5px; | |||||
p { | |||||
margin-top: 5px; | |||||
margin-bottom: 5px; | |||||
} | |||||
} | |||||
} | |||||
.katex-display { | |||||
> span.katex { | |||||
white-space: normal; | |||||
} | |||||
} | |||||
div.mathpar { | |||||
display: flex; | |||||
flex-flow: row wrap; | |||||
justify-content: center; | |||||
align-items: center; | |||||
gap: 1em; | |||||
> figure { | |||||
width: auto; | |||||
max-width: 33%; | |||||
} | |||||
} | |||||
div.columns { | |||||
blockquote, details.blockquote { | |||||
padding-right: 1em; | |||||
padding-left: 1em; | |||||
padding-top: 0.2em; | |||||
padding-bottom: 0.2em; | |||||
border: 0; | |||||
} | |||||
} | |||||
code, pre, .sourceCode { | |||||
font-size: $font-size; | |||||
font-family: 'Iosevka', 'Fantasque Sans Mono', Consolas, "Andale Mono WT", "Andale Mono", "Lucida Console", "Lucida Sans Typewriter", "DejaVu Sans Mono", "Bitstream Vera Sans Mono", "Liberation Mono", "Nimbus Mono L", Monaco, "Courier New", Courier, monospace; | |||||
font-weight: 500; | |||||
} | |||||
div.sourceCode, pre { | |||||
background-color: $code-bg; | |||||
color: $code-fg; | |||||
flex-grow: 0; | |||||
@include material; | |||||
overflow-x: auto; | |||||
line-height: 1.2; | |||||
code { | |||||
display: block; | |||||
} | |||||
> pre { | |||||
padding: unset; | |||||
margin-top: unset; | |||||
margin-bottom: unset; | |||||
box-shadow: unset; | |||||
margin: 0; | |||||
overflow-y: clip; | |||||
} | |||||
} | |||||
p > code { | |||||
white-space: nowrap; | |||||
} | |||||
blockquote, details.blockquote { | |||||
@include material; | |||||
background-color: $purple-100; | |||||
margin-left: 0; | |||||
margin-right: 0; | |||||
h2 { | |||||
margin-top: 0; | |||||
} | |||||
} | |||||
table { | |||||
width: 70%; | |||||
margin: auto; | |||||
border-collapse: collapse; | |||||
td, th { | |||||
text-align: center; | |||||
padding: 0px 1em 0px 1em; | |||||
border: 2px solid $purple-400; | |||||
} | |||||
} | |||||
ul#post-list { | |||||
list-style-type: none; | |||||
display: flex; | |||||
flex-direction: column; | |||||
.post-list-item { | |||||
@include left-bordered-block($yellow-500); | |||||
@include material; | |||||
margin: 0; | |||||
background-color: $yellow-50; | |||||
.post-list-header { | |||||
margin-top: 0.2em; | |||||
display: flex; | |||||
justify-content: space-between; | |||||
line-height: 14pt; | |||||
font-style: italic; | |||||
font-size: 10pt; | |||||
a { | |||||
font-size: 14pt; | |||||
font-style: normal; | |||||
color: $bluegray-800; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
div.contact-list { | |||||
display: flex; | |||||
justify-content: space-evenly; | |||||
align-items: stretch; | |||||
gap: 3em; | |||||
div.contact-card { | |||||
background-color: $purple-200; | |||||
@include material; | |||||
width: 33%; | |||||
max-width: 33%; | |||||
flex-grow: 1; | |||||
p { | |||||
margin: 0; | |||||
} | |||||
div.contact-header { | |||||
// I really hate Pandoc sometimes | |||||
display: flex; | |||||
align-items: center; | |||||
gap: 1em; | |||||
margin-bottom: 10px; | |||||
img { | |||||
height: 48px; | |||||
clip-path: url(#squircle); | |||||
} | |||||
span.username { | |||||
font-size: 16pt; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
@media only screen and (max-width: 450px) { | |||||
body > header { | |||||
div#logo { | |||||
width: 100%; | |||||
display: flex; | |||||
flex-direction: row; | |||||
justify-content: center; | |||||
} | |||||
nav { | |||||
display: none; | |||||
} | |||||
} | |||||
} | |||||
@media only screen and (min-width: 1500px) { | |||||
.narrow-only { | |||||
display: none !important; | |||||
} | |||||
main { | |||||
max-width: 100%; | |||||
> div#title { | |||||
font-size: 15pt; | |||||
h1, h2 { | |||||
margin: 0; | |||||
} | |||||
h2 { | |||||
font-style: italic; | |||||
font-weight: normal; | |||||
display: block; | |||||
z-index: 1; | |||||
} | |||||
margin-top: 0.5em; | |||||
margin-bottom: 1em; | |||||
@include center-that-bitch; | |||||
} | |||||
div#post-toc-container { | |||||
display: grid; | |||||
grid-template-columns: 0.5fr 2fr 0.5fr; | |||||
gap: 1em; | |||||
aside#toc { | |||||
display: block !important; | |||||
h3 { @include center-that-bitch; } | |||||
div#toc-container { | |||||
overflow-x: hidden; | |||||
width: 100%; | |||||
position: sticky; | |||||
top: 2em; | |||||
overflow-y: auto; | |||||
max-height: 90vh; | |||||
bottom: 2em; | |||||
ul { | |||||
border-left: 2px solid $bluegray-400; | |||||
list-style-type: none; | |||||
padding-left: 1em; | |||||
a { | |||||
text-decoration: none; | |||||
} | |||||
a:hover { | |||||
text-decoration: underline; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
article { | |||||
max-width: 100ch; | |||||
margin-top: -100px; | |||||
margin: auto; | |||||
} | |||||
} | |||||
div.columns { | |||||
display: grid; | |||||
grid-template-columns: 1fr 1fr; | |||||
gap: 1em; | |||||
} | |||||
} | |||||
} | |||||
#index { | |||||
padding-top: 4em; | |||||
a.ico-left { | |||||
img { | |||||
clip-path: url(#squircle); | |||||
width: 96px; | |||||
height: 96px; | |||||
} | |||||
float: left; | |||||
margin-right: 1em; | |||||
width: 96px; | |||||
height: 96px; | |||||
} | |||||
a.ico-right { | |||||
img { | |||||
clip-path: url(#squircle); | |||||
width: 96px; | |||||
height: 96px; | |||||
} | |||||
float: right; | |||||
margin-left: 1em; | |||||
width: 96px; | |||||
height: 96px; | |||||
} | |||||
div#social { | |||||
display: flex; | |||||
flex-direction: row; | |||||
justify-content: center; | |||||
flex-wrap: wrap; | |||||
width: 100%; | |||||
gap: 8px; | |||||
img { | |||||
width: 48px; | |||||
height: 48px; | |||||
clip-path: url(#squircle); | |||||
transition: width 0.25s, height 0.25s; | |||||
&:hover { | |||||
width: 54px; | |||||
height: 54px; | |||||
} | |||||
} | |||||
a { | |||||
filter: drop-shadow(2px 2px 3px rgba(50, 50, 0, 0.5)); | |||||
height: 54px; | |||||
} | |||||
} | |||||
display: flex; | |||||
flex-direction: column; | |||||
} | |||||
@media only screen and (min-width: 1500px) { | |||||
#index { | |||||
display: grid; | |||||
grid-template-columns: 0.20fr 0.75fr 0.20fr 1fr 0.20fr; | |||||
} | |||||
} | |||||
details { | |||||
margin-top: 1em; | |||||
margin-bottom: 1em; | |||||
} | |||||
// Styles for code | |||||
code.kw, span.kw { color: $code-pink; } /* Keyword */ | |||||
code.dt, span.dt { color: $code-blue; } /* DataType */ | |||||
code.dv, span.dv { color: $code-orange; } /* DecVal */ | |||||
code.bn, span.bn { color: $code-orange; } /* BaseN */ | |||||
code.fl, span.fl { color: $code-orange; } /* Float */ | |||||
code.ch, span.ch { color: $code-green; } /* Char */ | |||||
code.st, span.st { color: $code-green; } /* String */ | |||||
code.co, span.co { color: $code-grey; } /* Comment */ | |||||
code.ot, span.ot { color: $code-green; } /* Other */ | |||||
code.al, span.al { color: #ff0000; } /* Alert */ | |||||
code.fu, span.fu { color: $code-fg; } /* Function */ | |||||
code.er, span.er { color: #ff0000; } /* Error */ | |||||
code.wa, span.wa { color: #60a0b0; } /* Warning */ | |||||
code.cn, span.cn { color: $code-orange; } /* Constant */ | |||||
code.sc, span.sc { color: $code-yellow; } /* SpecialChar */ | |||||
code.vs, span.vs { color: $code-blue; } /* VerbatimString */ | |||||
code.ss, span.ss { color: $code-green; } /* SpecialString */ | |||||
code.va, span.va { color: $code-fg; } /* Variable */ | |||||
code.cf, span.cf { color: $code-pink; } /* ControlFlow */ | |||||
code.op, span.op { color: $code-green; } /* Operator */ | |||||
code.pp, span.pp { color: $code-orange; } /* Preprocessor */ | |||||
code.at, span.at { color: $code-green; } /* Attribute */ | |||||
code.do, span.do { color: $code-red; } /* Documentation */ | |||||
code.an, span.an { color: $code-red; } /* Annotation */ | |||||
code.cv, span.cv { color: $code-red; } /* CommentVar */ |
@ -0,0 +1,419 @@ | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 400; | |||||
font-stretch: normal; | |||||
font-style: normal; | |||||
src: url('/static/woff2/iosevk-abbie-regular.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-regular.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 400; | |||||
font-stretch: expanded; | |||||
font-style: normal; | |||||
src: url('/static/woff2/iosevk-abbie-extended.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extended.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 400; | |||||
font-stretch: normal; | |||||
font-style: oblique; | |||||
src: url('/static/woff2/iosevk-abbie-oblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-oblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka Oblique'; | |||||
font-display: swap; | |||||
font-weight: 400; | |||||
font-stretch: normal; | |||||
src: url('/static/woff2/iosevk-abbie-oblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-oblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 400; | |||||
font-stretch: expanded; | |||||
font-style: oblique; | |||||
src: url('/static/woff2/iosevk-abbie-extendedoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka Oblique'; | |||||
font-display: swap; | |||||
font-weight: 400; | |||||
font-stretch: expanded; | |||||
src: url('/static/woff2/iosevk-abbie-extendedoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 400; | |||||
font-stretch: normal; | |||||
font-style: italic; | |||||
src: url('/static/woff2/iosevk-abbie-italic.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-italic.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 400; | |||||
font-stretch: expanded; | |||||
font-style: italic; | |||||
src: url('/static/woff2/iosevk-abbie-extendeditalic.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendeditalic.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 500; | |||||
font-stretch: normal; | |||||
font-style: normal; | |||||
src: url('/static/woff2/iosevk-abbie-medium.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-medium.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 500; | |||||
font-stretch: expanded; | |||||
font-style: normal; | |||||
src: url('/static/woff2/iosevk-abbie-extendedmedium.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedmedium.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 500; | |||||
font-stretch: normal; | |||||
font-style: oblique; | |||||
src: url('/static/woff2/iosevk-abbie-mediumoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-mediumoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka Oblique'; | |||||
font-display: swap; | |||||
font-weight: 500; | |||||
font-stretch: normal; | |||||
src: url('/static/woff2/iosevk-abbie-mediumoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-mediumoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 500; | |||||
font-stretch: expanded; | |||||
font-style: oblique; | |||||
src: url('/static/woff2/iosevk-abbie-extendedmediumoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedmediumoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka Oblique'; | |||||
font-display: swap; | |||||
font-weight: 500; | |||||
font-stretch: expanded; | |||||
src: url('/static/woff2/iosevk-abbie-extendedmediumoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedmediumoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 500; | |||||
font-stretch: normal; | |||||
font-style: italic; | |||||
src: url('/static/woff2/iosevk-abbie-mediumitalic.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-mediumitalic.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 500; | |||||
font-stretch: expanded; | |||||
font-style: italic; | |||||
src: url('/static/woff2/iosevk-abbie-extendedmediumitalic.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedmediumitalic.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 600; | |||||
font-stretch: normal; | |||||
font-style: normal; | |||||
src: url('/static/woff2/iosevk-abbie-semibold.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-semibold.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 600; | |||||
font-stretch: expanded; | |||||
font-style: normal; | |||||
src: url('/static/woff2/iosevk-abbie-extendedsemibold.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedsemibold.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 600; | |||||
font-stretch: normal; | |||||
font-style: oblique; | |||||
src: url('/static/woff2/iosevk-abbie-semiboldoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-semiboldoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka Oblique'; | |||||
font-display: swap; | |||||
font-weight: 600; | |||||
font-stretch: normal; | |||||
src: url('/static/woff2/iosevk-abbie-semiboldoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-semiboldoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 600; | |||||
font-stretch: expanded; | |||||
font-style: oblique; | |||||
src: url('/static/woff2/iosevk-abbie-extendedsemiboldoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedsemiboldoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka Oblique'; | |||||
font-display: swap; | |||||
font-weight: 600; | |||||
font-stretch: expanded; | |||||
src: url('/static/woff2/iosevk-abbie-extendedsemiboldoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedsemiboldoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 600; | |||||
font-stretch: normal; | |||||
font-style: italic; | |||||
src: url('/static/woff2/iosevk-abbie-semibolditalic.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-semibolditalic.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 600; | |||||
font-stretch: expanded; | |||||
font-style: italic; | |||||
src: url('/static/woff2/iosevk-abbie-extendedsemibolditalic.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedsemibolditalic.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 700; | |||||
font-stretch: normal; | |||||
font-style: normal; | |||||
src: url('/static/woff2/iosevk-abbie-bold.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-bold.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 700; | |||||
font-stretch: expanded; | |||||
font-style: normal; | |||||
src: url('/static/woff2/iosevk-abbie-extendedbold.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedbold.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 700; | |||||
font-stretch: normal; | |||||
font-style: oblique; | |||||
src: url('/static/woff2/iosevk-abbie-boldoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-boldoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka Oblique'; | |||||
font-display: swap; | |||||
font-weight: 700; | |||||
font-stretch: normal; | |||||
src: url('/static/woff2/iosevk-abbie-boldoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-boldoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 700; | |||||
font-stretch: expanded; | |||||
font-style: oblique; | |||||
src: url('/static/woff2/iosevk-abbie-extendedboldoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedboldoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka Oblique'; | |||||
font-display: swap; | |||||
font-weight: 700; | |||||
font-stretch: expanded; | |||||
src: url('/static/woff2/iosevk-abbie-extendedboldoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedboldoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 700; | |||||
font-stretch: normal; | |||||
font-style: italic; | |||||
src: url('/static/woff2/iosevk-abbie-bolditalic.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-bolditalic.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 700; | |||||
font-stretch: expanded; | |||||
font-style: italic; | |||||
src: url('/static/woff2/iosevk-abbie-extendedbolditalic.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedbolditalic.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 800; | |||||
font-stretch: normal; | |||||
font-style: normal; | |||||
src: url('/static/woff2/iosevk-abbie-extrabold.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extrabold.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 800; | |||||
font-stretch: expanded; | |||||
font-style: normal; | |||||
src: url('/static/woff2/iosevk-abbie-extendedextrabold.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedextrabold.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 800; | |||||
font-stretch: normal; | |||||
font-style: oblique; | |||||
src: url('/static/woff2/iosevk-abbie-extraboldoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extraboldoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka Oblique'; | |||||
font-display: swap; | |||||
font-weight: 800; | |||||
font-stretch: normal; | |||||
src: url('/static/woff2/iosevk-abbie-extraboldoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extraboldoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 800; | |||||
font-stretch: expanded; | |||||
font-style: oblique; | |||||
src: url('/static/woff2/iosevk-abbie-extendedextraboldoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedextraboldoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka Oblique'; | |||||
font-display: swap; | |||||
font-weight: 800; | |||||
font-stretch: expanded; | |||||
src: url('/static/woff2/iosevk-abbie-extendedextraboldoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedextraboldoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 800; | |||||
font-stretch: normal; | |||||
font-style: italic; | |||||
src: url('/static/woff2/iosevk-abbie-extrabolditalic.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extrabolditalic.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 800; | |||||
font-stretch: expanded; | |||||
font-style: italic; | |||||
src: url('/static/woff2/iosevk-abbie-extendedextrabolditalic.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedextrabolditalic.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 900; | |||||
font-stretch: normal; | |||||
font-style: normal; | |||||
src: url('/static/woff2/iosevk-abbie-heavy.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-heavy.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 900; | |||||
font-stretch: expanded; | |||||
font-style: normal; | |||||
src: url('/static/woff2/iosevk-abbie-extendedheavy.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedheavy.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 900; | |||||
font-stretch: normal; | |||||
font-style: oblique; | |||||
src: url('/static/woff2/iosevk-abbie-heavyoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-heavyoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka Oblique'; | |||||
font-display: swap; | |||||
font-weight: 900; | |||||
font-stretch: normal; | |||||
src: url('/static/woff2/iosevk-abbie-heavyoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-heavyoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 900; | |||||
font-stretch: expanded; | |||||
font-style: oblique; | |||||
src: url('/static/woff2/iosevk-abbie-extendedheavyoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedheavyoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka Oblique'; | |||||
font-display: swap; | |||||
font-weight: 900; | |||||
font-stretch: expanded; | |||||
src: url('/static/woff2/iosevk-abbie-extendedheavyoblique.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedheavyoblique.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 900; | |||||
font-stretch: normal; | |||||
font-style: italic; | |||||
src: url('/static/woff2/iosevk-abbie-heavyitalic.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-heavyitalic.ttf') format('truetype'); | |||||
} | |||||
@font-face { | |||||
font-family: 'Iosevka'; | |||||
font-display: swap; | |||||
font-weight: 900; | |||||
font-stretch: expanded; | |||||
font-style: italic; | |||||
src: url('/static/woff2/iosevk-abbie-extendedheavyitalic.woff2') format('woff2'), url('/static/ttf/iosevk-abbie-extendedheavyitalic.ttf') format('truetype'); | |||||
} |
@ -0,0 +1,62 @@ | |||||
$purple-50: #faf5ff; | |||||
$purple-100: #f3e8ff; | |||||
$purple-200: #e9d5ff; | |||||
$purple-300: #d8b4fe; | |||||
$purple-400: #c084fc; | |||||
$purple-500: #a855f7; | |||||
$purple-600: #9333ea; | |||||
$purple-700: #7e22ce; | |||||
$purple-800: #6b21a8; | |||||
$purple-900: #581c87; | |||||
$yellow-50: #fefce8; | |||||
$yellow-100: #fef9c3; | |||||
$yellow-200: #fef08a; | |||||
$yellow-300: #fde047; | |||||
$yellow-400: #facc15; | |||||
$yellow-500: #eab308; | |||||
$yellow-600: #ca8a04; | |||||
$yellow-700: #a16207; | |||||
$yellow-800: #854d0e; | |||||
$yellow-900: #713f12; | |||||
$bluegray-50: #f8fafc; | |||||
$bluegray-100: #f1f5f9; | |||||
$bluegray-200: #e2e8f0; | |||||
$bluegray-300: #cbd5e1; | |||||
$bluegray-400: #94a3b8; | |||||
$bluegray-500: #64748b; | |||||
$bluegray-600: #475569; | |||||
$bluegray-700: #334155; | |||||
$bluegray-800: #1e293b; | |||||
$bluegray-900: #0f172a; | |||||
$red-50: #fef2f2; | |||||
$red-100: #fee2e2; | |||||
$red-200: #fecaca; | |||||
$red-300: #fca5a5; | |||||
$red-400: #f87171; | |||||
$red-500: #ef4444; | |||||
$red-600: #dc2626; | |||||
$red-700: #b91c1c; | |||||
$red-800: #991b1b; | |||||
$red-900: #7f1d1d; | |||||
$nav-height: 48px; | |||||
$font-size: 14pt; | |||||
$code-bg: #282C34; | |||||
$code-fg: #ABB2BF; | |||||
$code-red: #D65122; | |||||
$code-red-br: #AE3B36; | |||||
$code-green: #88B966; | |||||
$code-yellow: #DEB468; | |||||
$code-orange: #C58853; | |||||
$code-blue: #519DEB; | |||||
$code-pink: #C678DD; | |||||
$code-cyan: #48A8B5; | |||||
$code-white: #ABB2BF; | |||||
$code-grey: #7F848E; | |||||
// foo |
@ -0,0 +1,27 @@ | |||||
{ pkgs ? import <nixpkgs> { }, stdenv ? pkgs.stdenv }: | |||||
let | |||||
site = pkgs.haskellPackages.callCabal2nix "blag-site" ./. { }; | |||||
our-texlive = with pkgs; texlive.combine { | |||||
inherit (texlive) | |||||
collection-basic | |||||
collection-latex | |||||
xcolor | |||||
preview | |||||
pgf tikz-cd | |||||
mathpazo | |||||
varwidth xkeyval standalone | |||||
jknapltx; | |||||
}; | |||||
in | |||||
stdenv.mkDerivation { | |||||
name = "blag"; | |||||
src = ./.; | |||||
buildInputs = with pkgs; [ | |||||
poppler_utils | |||||
rubber | |||||
nodePackages.katex | |||||
our-texlive | |||||
site | |||||
]; | |||||
} |
@ -0,0 +1,15 @@ | |||||
\begin{scope}[node distance=0.75cm] | |||||
\node (Stk0) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, 0) {shift}; | |||||
\node (Stk1) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -0.75) {foo}; | |||||
\node (Stk2) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -1.5) {bar}; | |||||
\node (Stk3) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -2.25) {reset}; | |||||
\node (Stk3) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -3) {baz}; | |||||
\draw [red, very thick, dashed] (-3.6, -2.625) -- (-1.89, -2.625) -- (-1.89, 0.375) -- (-3.6, 0.375) -- cycle; | |||||
\draw [arrows={Latex}-] (-4, 0.375) -- (-4, -3.375); | |||||
\end{scope} |
@ -0,0 +1,2 @@ | |||||
\draw[->,thick] (0,0)--(1,0) node[midway,below]{\large{i}}; | |||||
\draw[->,thick] (0,0)--(0,1) node[midway,left]{\large{j}}; |
@ -0,0 +1,3 @@ | |||||
\node[draw,circle,label=below:{$\mathrm{base}$},fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (a0) at (0, -1) {}; | |||||
\draw[->] (0, 0) circle (1cm); | |||||
\node[] (loop) at (0, 0) {$\mathrm{loop}\ i$}; |
@ -0,0 +1,11 @@ | |||||
\node[] (i0j0) at (-1, -1) {x}; | |||||
\node[] (i1j0) at (1, -1) {y}; | |||||
\node[] (i0j1) at (-1, 1) {x}; | |||||
\node[] (i1j1) at (1, 1) {z}; | |||||
\node (in) at (0, 0) {}; | |||||
\draw[->] (i0j0) -- (i0j1) node [midway] {$a$}; | |||||
\draw[->] (i0j0) -- (i1j0) node [midway, below] {$p(i)$}; | |||||
\draw[->,dashed] (i0j1) -- (i1j1) node [midway] {}; | |||||
\draw[->] (i1j0) -- (i1j1) node [midway, right] {$q(j)$}; |
@ -0,0 +1,4 @@ | |||||
\node[draw,circle,label=left:{$i0$},fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i0) at (-1, 0) {}; | |||||
\node[draw,circle,label=right:{$i1$},fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i1) at (1, 0) {}; | |||||
\draw[->] (i0) -- (i1) node [midway] {$\lambda i. i$}; |
@ -0,0 +1,11 @@ | |||||
\node[] (i0j0) at (-1, -1) {a}; | |||||
\node[] (i1j0) at (1, -1) {a}; | |||||
\node[] (i0j1) at (-1, 1) {a}; | |||||
\node[] (i1j1) at (1, 1) {b}; | |||||
\node (in) at (0, 0) {$\lambda i j. p (i \land j)$}; | |||||
\draw[->] (i0j0) -- (i0j1) node [midway] {$\lambda j. p\ i0$}; | |||||
\draw[->] (i0j0) -- (i1j0) node [midway, below] {$\lambda i. p\ i0$}; | |||||
\draw[->] (i0j1) -- (i1j1) node [midway] {$p$}; | |||||
\draw[->] (i1j0) -- (i1j1) node [midway, right] {$p$}; |
@ -0,0 +1,11 @@ | |||||
\node[] (i0j0) at (-1, -1) {a}; | |||||
\node[] (i1j0) at (1, -1) {b}; | |||||
\node[] (i0j1) at (-1, 1) {b}; | |||||
\node[] (i1j1) at (1, 1) {b}; | |||||
\node (in) at (0, 0) {$\lambda i j. p (i \lor j)$}; | |||||
\draw[->] (i0j0) -- (i0j1) node [midway] {$p$}; | |||||
\draw[->] (i0j0) -- (i1j0) node [midway, below] {$p$}; | |||||
\draw[->] (i0j1) -- (i1j1) node [midway] {$\lambda i. p\ i1$}; | |||||
\draw[->] (i1j0) -- (i1j1) node [midway, right] {$\lambda j. p\ i1$}; |
@ -0,0 +1,4 @@ | |||||
\node[draw,circle,label=left:{$a$},fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i0) at (-1, 0) {}; | |||||
\node[draw,circle,label=right:{$b$},fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i1) at (1, 0) {}; | |||||
\draw[->] (i0) -> (i1) node [midway] {$\lambda i. p(i)$}; |
@ -0,0 +1,4 @@ | |||||
\node[draw,circle,label=left:{$b$},fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i0) at (-1, 0) {}; | |||||
\node[draw,circle,label=right:{$a$},fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i1) at (1, 0) {}; | |||||
\draw[<-] (i0) -> (i1) node [midway] {$\lambda i. p(\neg i)$}; |
@ -0,0 +1,4 @@ | |||||
\node[draw,circle,label=left:{$a$},fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i0) at (-1, 0) {}; | |||||
\node[draw,circle,label=right:{$a$},fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i1) at (1, 0) {}; | |||||
\draw[->] (i0) -> (i1) node [midway] {$\lambda i. a$}; |
@ -0,0 +1,6 @@ | |||||
\node[] (i0j1) at (-1, 1) {A}; | |||||
\node[] (i1j1) at (1, 1) {C}; | |||||
\node[] (i0j0) at (-1, -1) {B}; | |||||
\draw[<-] (i0j0) -- (i0j1) node [midway] {$f$}; | |||||
\draw[->] (i0j1) -- (i1j1) node [midway] {$g$}; |
@ -0,0 +1,9 @@ | |||||
\node[] (i1j0) at (1, -1) {P}; | |||||
\node[] (i1j1) at (1, 1) {C}; | |||||
\node[] (i0j0) at (-1, -1) {B}; | |||||
\node[] (i0j1) at (-1, 1) {A}; | |||||
\draw[<-] (i0j0) -- (i0j1) node [midway] {$f$}; | |||||
\draw[->] (i0j0) -- (i1j0) node [midway, below] {$i_1$}; | |||||
\draw[->] (i0j1) -- (i1j1) node [midway] {$g$}; | |||||
\draw[<-] (i1j0) -- (i1j1) node [midway, right] {$i_2$}; |
@ -0,0 +1 @@ | |||||
\node[draw,circle,label=below:{$a0 : A(i0)$},fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (a0) at (-1, 0) {}; |
@ -0,0 +1,9 @@ | |||||
\node[] (i0j1) at (-1, 1) {A}; | |||||
\node[] (i1j1) at (1, 1) {B}; | |||||
\node[] (i0j0) at (-1, -1) {B}; | |||||
\node[] (i1j0) at (1, -1) {B}; | |||||
\draw[<-] (i0j0) -- (i0j1) node [midway] {$\mathrm{equiv}$} node [midway, above, rotate=-90] {$\sim$}; | |||||
\draw[->] (i0j0) -- (i1j0) node [midway, below] {$B$}; | |||||
\draw[->,dashed] (i0j1) -- (i1j1) node [midway] {}; | |||||
\draw[<-] (i1j0) -- (i1j1) node [midway, right] {$\mathrm{id}_B$} node [midway, above, rotate=90] {$\sim$}; |
@ -0,0 +1,30 @@ | |||||
\node (a) at (-2.5, 2.5) {a}; | |||||
\node (b) at (2.5, 2.5) {b}; | |||||
\node (c) at (-2.5, -2.5) {c}; | |||||
\node (d) at (2.5, -2.5) {d}; | |||||
\node (w) at (-1, 1) {w}; | |||||
\node (x) at (1, 1) {x}; | |||||
\node (y) at (-1, -1) {y}; | |||||
\node (z) at (1, -1) {z}; | |||||
\draw[->] (a) -- node[midway] {f} (b); | |||||
\draw[->] (b) -- node[midway,right] {q} (d); | |||||
\draw[->] (a) -- node[midway,left] {p} (c); | |||||
\draw[->] (c) -- node[midway,below] {g} (d); | |||||
\draw[->] (w) -- node[midway,below] {h} (x); | |||||
\draw[->] (x) -- node[midway,left] {j} (z); | |||||
\draw[->] (y) -- node[midway,above] {k} (z); | |||||
\draw[->] (w) -- node[midway,right] {l} (y); | |||||
\draw[->] (a) -- node[midway] {$\upsilon$} (w); | |||||
\draw[->] (b) -- node[midway] {$\phi$} (x); | |||||
\draw[->] (c) -- node[midway] {$\chi$} (y); | |||||
\draw[->] (d) -- node[midway] {$\psi$} (z); | |||||
\node (wxyz) at (0, 0) {$\kappa$}; | |||||
\node (awyc) at (-1.8, 0) {$\lambda$}; | |||||
\node (awxb) at (0, 1.8) {$\mu$}; | |||||
\node (bxzd) at (1.8, 0) {$\nu$}; | |||||
\node (cyzd) at (0, -1.8) {$\xi$}; |
@ -0,0 +1,6 @@ | |||||
\node (a) at (-1, 0) {a}; | |||||
\node (b) at (1, 0) {d}; | |||||
\draw[->] (a) to[out=45,in=135] node[midway] (f) {$q \circ f$} (b); | |||||
\draw[->] (a) to[out=-45,in=-135] node[midway,below] (g) {$g \circ p$} (b); | |||||
\draw[double,->] ([yshift=-2pt]f.south) -- node[midway,right] {$\sigma$} ([yshift=2pt]g.north); |
@ -0,0 +1,10 @@ | |||||
\node (fi0j1) at (-0.75,0.75) {$a$}; | |||||
\node (fi0j0) at (-0.75,-0.75) {$b$}; | |||||
\node (fi1j1) at (0.75,0.75) {$c$}; | |||||
\node (fi1j0) at (0.75,-0.75) {$d$}; | |||||
\node (f) at (0, 0) {$\sigma$}; | |||||
\draw[->] (fi0j1) -- (fi1j1) node[midway] (f) {f}; | |||||
\draw[->] (fi0j0) -- (fi1j0) node[midway,below] (g) {g}; | |||||
\draw[<-] (fi0j0) -- (fi0j1) node[midway,left] (p) {p}; | |||||
\draw[<-] (fi1j0) -- (fi1j1) node[midway,right] (q) {q}; |
@ -0,0 +1,11 @@ | |||||
\node (atop) at (-1, 0.5) {a}; | |||||
\node (abot) at (-1, -0.5) {a}; | |||||
\node (btop) at (1, 0.5) {b}; | |||||
\node (bbot) at (1, -0.5) {b}; | |||||
\draw[->] (atop) to[out=30,in=150] node[midway] (f) {f} (btop); | |||||
\draw[->] (atop) -- (abot); | |||||
\draw[->] (abot) to[out=-30,in=-150] node[midway,below] (g) {g} (bbot); | |||||
\draw[->] (btop) -- (bbot); | |||||
\node at (0, 0) {$\alpha$}; |
@ -0,0 +1,24 @@ | |||||
\node (a) at (-1, 0) {$a$}; | |||||
\node (a0) at (0, 0.75) {$a$}; | |||||
\node (a1) at (0, -0.75) {$a$}; | |||||
\draw[->] (a0) -- node[midway] (al) {} (a1); | |||||
\draw[dashed,->] (a) to[] node[midway,above] {$\sigma$} ([xshift=-0.5em]al); | |||||
\node (fi0) at (1, 0.75) {$a$}; | |||||
\node (fi1) at (1, -0.75) {$b$}; | |||||
\draw[->] (fi0) -- node[midway,right] (f) {f} (fi1); | |||||
\node (fi0j1) at (4 + -0.75, 0.75) {$a$}; | |||||
\node (fi0j0) at (4 + -0.75, -0.75) {$a$}; | |||||
\node (fi1j1) at (4 + 0.75, 0.75) {$b$}; | |||||
\node (fi1j0) at (4 + 0.75, -0.75) {$b$}; | |||||
\draw[->] (fi0j1) -- (fi1j1) node[midway] (fs) {f}; | |||||
\draw[->] (fi0j0) -- (fi1j0) node[midway,below] (gs) {f}; | |||||
\draw[<-] (fi0j0) -- (fi0j1) node[midway,left] (p) {}; | |||||
\draw[<-] (fi1j0) -- (fi1j1) node[midway,right] (q) {}; | |||||
\node (sq) at (4, 0) {$\bullet$}; | |||||
\draw[dashed,->] (f) to[out=20,in=160] node[midway,below] {$\sigma_0 \circ \sigma$} (sq); |
@ -0,0 +1,9 @@ | |||||
\node (fi0j1) at (-0.75,0.75) {$a$}; | |||||
\node (fi0j0) at (-0.75,-0.75) {$b$}; | |||||
\node (fi1j1) at (0.75,0.75) {$c$}; | |||||
\node (fi1j0) at (0.75,-0.75) {$d$}; | |||||
\draw[->] (fi0j1) -- (fi1j1) node[midway] (f) {f}; | |||||
\draw[->] (fi0j0) -- (fi1j0) node[midway,below] (g) {g}; | |||||
\draw[<-] (fi0j0) -- (fi0j1) node[midway,left] (p) {p}; | |||||
\draw[<-] (fi1j0) -- (fi1j1) node[midway,right] (q) {q}; |
@ -0,0 +1,17 @@ | |||||
\node (point) at (0, 0) {$\color{red}{\bullet}$}; | |||||
\node (line0) at (2, 1) {$\color{blue}{\bullet}_0$}; | |||||
\node (line1) at (2, -1) {$\color{red}{\bullet}_1$}; | |||||
\draw (line0) -- (line1) node[midway] (linemid) {}; | |||||
; | |||||
\draw[->] (point) to[out=-70,in=180] node[midway] (a) {$\delta^1$} (line1); | |||||
\node (sq00) at (4, -1) {$\color{red}\bullet_{01}$}; | |||||
\node (sq01) at (4, 1) {$\color{blue}{\bullet}_{00}$}; | |||||
\node (sq10) at (6, -1) {$\bullet_{11}$}; | |||||
\node (sq11) at (6, 1) {$\bullet_{10}$}; | |||||
\draw (sq00) -- node[midway] (linemid_img) {} (sq01) -- (sq11) -- (sq10) -- (sq00); | |||||
\draw[->] (linemid) -- node[midway] (b) {$\delta^0_0$} (linemid_img); |
@ -0,0 +1,43 @@ | |||||
\node (j1) at (0,1) {$\color{red}\bullet$}; | |||||
\node (j0) at (0,0) {$\color{red}\bullet$}; | |||||
\node (mid) at (0,0.5) {}; | |||||
\draw[color=red] (j1) -- (j0); | |||||
\node (i0j1) at (1,1) {$\color{red}\bullet$}; | |||||
\node (i0j0) at (1,0) {$\color{red}\bullet$}; | |||||
\node (i1j1) at (2,1) {$\bullet$}; | |||||
\node (i1j0) at (2,0) {$\bullet$}; | |||||
\draw[color=red] (i0j1) -- (i0j0); | |||||
\draw (i0j1) -- (i1j1) -- (i1j0) -- (i0j0); | |||||
\node (mid2) at (1,0.5) {}; | |||||
\draw[->] (mid) -> (mid2); | |||||
\node (F) at (-0.6, 0.5) {$\mathcal{F}$}; | |||||
\node[fit=(j0)(i1j1), left delimiter=(, inner sep=-0.7ex, right delimiter=)] (openF) {}; | |||||
\node (colon) at (2.6, 0.5) {$:$}; | |||||
\node (F2) at (2.9, 0.5) {$\mathcal{F}$}; | |||||
\node (fi0j1) at (3.5,1) {$\color{red}\bullet$}; | |||||
\node (fi0j0) at (3.5,0) {$\color{red}\bullet$}; | |||||
\node (fi1j1) at (4.5,1) {$\bullet$}; | |||||
\node (fi1j0) at (4.5,0) {$\bullet$}; | |||||
\draw[color=red] (fi0j1) -- (fi0j0); | |||||
\draw (fi0j1) -- (fi1j1) -- (fi1j0) -- (fi0j0); | |||||
\node[fit=(fi0j0)(fi1j1), left delimiter=(, inner sep=-0.7ex, right delimiter=)] (openF2) {}; | |||||
\node (F3) at (6.1, 0.5) {$\mathcal{F}$}; | |||||
\draw[->] ([xshift=2.1ex]openF2.east) -- (F3); | |||||
\node (fj1) at (6.7,1) {$\color{red}\bullet$}; | |||||
\node (fj0) at (6.7,0) {$\color{red}\bullet$}; | |||||
\node[fit=(fj1)(fj0), left delimiter=(, inner sep=-0.7ex, right delimiter=)] (openF3) {}; | |||||
\draw[color=red] (fj1) -- (fj0); |
@ -0,0 +1,38 @@ | |||||
\node at (-5, 0) {$\bullet_{()}$}; | |||||
\node (line0) at (-3, 1) {$\bullet_0$}; | |||||
\node (line1) at (-3, -1) {$\bullet_1$}; | |||||
\draw[->] (line0) -> (line1); | |||||
\node (sq00) at (-1, 1) {$\bullet_{00}$}; | |||||
\node (sq01) at (-1, -1) {$\bullet_{01}$}; | |||||
\node (sq10) at (1, 1) {$\bullet_{10}$}; | |||||
\node (sq11) at (1, -1) {$\bullet_{11}$}; | |||||
\draw[->] (sq00) -> (sq01); | |||||
\draw[->] (sq00) -> (sq10); | |||||
\draw[->] (sq10) -> (sq11); | |||||
\draw[->] (sq01) -> (sq11); | |||||
\node (sq010) at (3, -1) {$\bullet_{000}$}; | |||||
\node (sq011) at (4, 0) {$\bullet_{001}$}; | |||||
\node (sq110) at (5, -1) {$\bullet_{100}$}; | |||||
\node (sq111) at (6, 0) {$\bullet_{101}$}; | |||||
\node (sq000) at (3, 1) {$\bullet_{010}$}; | |||||
\node (sq001) at (4, 2) {$\bullet_{011}$}; | |||||
\node (sq100) at (5, 1) {$\bullet_{110}$}; | |||||
\node (sq101) at (6, 2) {$\bullet_{111}$}; | |||||
\draw[->] (sq000) -- (sq001); | |||||
\draw[->] (sq000) -- (sq100); | |||||
\draw[->] (sq000) -- (sq010); | |||||
\draw[->] (sq001) -- (sq011); | |||||
\draw[->] (sq001) -- (sq101); | |||||
\draw[->] (sq010) -- (sq110); | |||||
\draw[->] (sq010) -- (sq011); | |||||
\draw[->] (sq100) -- (sq101); | |||||
\draw[->] (sq100) -- (sq110); | |||||
\draw[->] (sq101) -- (sq111); | |||||
\draw[->] (sq110) -- (sq111); | |||||
\draw[->] (sq011) -- (sq111); |
@ -0,0 +1,6 @@ | |||||
\node (a) at (-1, 0) {a}; | |||||
\node (b) at (1, 0) {b}; | |||||
\draw[->] (a) to[out=30,in=150] node[midway] (f) {f} (b); | |||||
\draw[->] (a) to[out=-30,in=-150] node[midway,below] (g) {g} (b); | |||||
\draw[double,->] ([yshift=-2pt]f.south) -- node[midway,right] {$\alpha$} ([yshift=2pt]g.north); |
@ -0,0 +1,7 @@ | |||||
\node (open) at (0, 2) {$\sqcap^{n,i,\varepsilon}$}; | |||||
\node (box) at (0, 0) {$\square^n$}; | |||||
\node (set) at (2, 0) {$X$}; | |||||
\draw[right hook->] (open) -- (box); | |||||
\draw[->] (open) -- node[midway] {f} (set); | |||||
\draw[dotted, ->] (box) -- node[midway, below] {g} (set); |
@ -0,0 +1,9 @@ | |||||
\node (sq1_b00) at (-3, 1) {B}; | |||||
\node (sq1_a10) at (-1, 1) {A}; | |||||
\node (sq1_b01) at (-3, -1) {B}; | |||||
\node (sq1_b11) at (-1, -1) {B}; | |||||
\draw[dashed,->] (sq1_b00) -- node[midway] {g} (sq1_a10); | |||||
\draw[->] (sq1_a10) -> node[midway] {f} (sq1_b11); | |||||
\draw[->] (sq1_b00) -> node[midway,left] {1} (sq1_b01); | |||||
\draw[->] (sq1_b01) -> node[midway,below] {1} (sq1_b11); |
@ -0,0 +1,9 @@ | |||||
\node (sq1_b00) at (-3, 1) {$F(c\prime)$}; | |||||
\node (sq1_a10) at (-1, 1) {$F(c)$}; | |||||
\node (sq1_b01) at (-3, -1) {$G(c\prime)$}; | |||||
\node (sq1_b11) at (-1, -1) {$G(c)$}; | |||||
\draw[->] (sq1_b00) -- node[midway] {$F(f)$} (sq1_a10); | |||||
\draw[->] (sq1_a10) -> node[midway] {$\alpha_{c}$} (sq1_b11); | |||||
\draw[->] (sq1_b00) -> node[midway,left] {$\alpha_{c\prime}$} (sq1_b01); | |||||
\draw[->] (sq1_b01) -> node[midway,below] {$G(f)$} (sq1_b11); |
@ -0,0 +1,9 @@ | |||||
\node (fi0j1) at (-0.75, 0.75) {$a$}; | |||||
\node (fi0j0) at (-0.75, -0.75) {$b$}; | |||||
\node (fi1j1) at (0.75, 0.75) {$c$}; | |||||
\node (fi1j0) at (0.75, -0.75) {$d$}; | |||||
\draw[->,dotted] (fi0j1) -- (fi1j1) node[midway] (f) {f}; | |||||
\draw[->] (fi0j0) -- (fi1j0) node[midway,below] (g) {g}; | |||||
\draw[<-] (fi0j0) -- (fi0j1) node[midway,left] (p) {p}; | |||||
\draw[<-] (fi1j0) -- (fi1j1) node[midway,right] (q) {q}; |
@ -0,0 +1,39 @@ | |||||
\node (fi0j1) at (-0.75, 0.75) {$a$}; | |||||
\node (fi0j0) at (-0.75, -0.75) {$b$}; | |||||
\node (fi1j1) at (0.75, 0.75) {$c$}; | |||||
\node (fi1j0) at (0.75, -0.75) {$d$}; | |||||
\draw[->,dotted] (fi0j1) -- (fi1j1) node[midway] (f) {f}; | |||||
\draw[->] (fi0j0) -- (fi1j0) node[midway,below] (g) {g}; | |||||
\draw[<-] (fi0j0) -- (fi0j1) node[midway,left] (p) {p}; | |||||
\draw[<-] (fi1j0) -- (fi1j1) node[midway,right] (q) {q}; | |||||
\node (fi0j1) at (2.5 + -0.75, 0.75) {$a$}; | |||||
\node (fi0j0) at (2.5 + -0.75, -0.75) {$b$}; | |||||
\node (fi1j1) at (2.5 + 0.75, 0.75) {$c$}; | |||||
\node (fi1j0) at (2.5 + 0.75, -0.75) {$d$}; | |||||
\draw[->] (fi0j1) -- (fi1j1) node[midway] (f) {f}; | |||||
\draw[->,dotted] (fi0j0) -- (fi1j0) node[midway,below] (g) {g}; | |||||
\draw[<-] (fi0j0) -- (fi0j1) node[midway,left] (p) {p}; | |||||
\draw[<-] (fi1j0) -- (fi1j1) node[midway,right] (q) {q}; | |||||
\node (fi0j1) at (5 + -0.75, 0.75) {$a$}; | |||||
\node (fi0j0) at (5 + -0.75, -0.75) {$b$}; | |||||
\node (fi1j1) at (5 + 0.75, 0.75) {$c$}; | |||||
\node (fi1j0) at (5 + 0.75, -0.75) {$d$}; | |||||
\draw[->] (fi0j1) -- (fi1j1) node[midway] (f) {f}; | |||||
\draw[->] (fi0j0) -- (fi1j0) node[midway,below] (g) {g}; | |||||
\draw[<-,dotted] (fi0j0) -- (fi0j1) node[midway,left] (p) {p}; | |||||
\draw[<-] (fi1j0) -- (fi1j1) node[midway,right] (q) {q}; | |||||
\node (fi0j1) at (7.5 + -0.75, 0.75) {$a$}; | |||||
\node (fi0j0) at (7.5 + -0.75, -0.75) {$b$}; | |||||
\node (fi1j1) at (7.5 + 0.75, 0.75) {$c$}; | |||||
\node (fi1j0) at (7.5 + 0.75, -0.75) {$d$}; | |||||
\draw[->] (fi0j1) -- (fi1j1) node[midway] (f) {f}; | |||||
\draw[->] (fi0j0) -- (fi1j0) node[midway,below] (g) {g}; | |||||
\draw[<-] (fi0j0) -- (fi0j1) node[midway,left] (p) {p}; | |||||
\draw[<-,dotted] (fi1j0) -- (fi1j1) node[midway,right] (q) {q}; |
@ -0,0 +1,9 @@ | |||||
\node (sq1_a00) at (-3, 1) {A}; | |||||
\node (sq1_b10) at (-1, 1) {B}; | |||||
\node (sq1_a01) at (-3, -1) {A}; | |||||
\node (sq1_a11) at (-1, -1) {A}; | |||||
\draw[dashed,->] (sq1_b10) -- node[midway] {h} (sq1_a11); | |||||
\draw[->] (sq1_a00) -- node[midway] {f} (sq1_b10); | |||||
\draw[->] (sq1_a00) -- node[midway,left] {1} (sq1_a01); | |||||
\draw[->] (sq1_a01) -- node[midway,below] {1} (sq1_a11); |
@ -0,0 +1,19 @@ | |||||
\node (fi0j1) at (-0.75, 0.75) {$a$}; | |||||
\node (fi0j0) at (-0.75, -0.75) {$b$}; | |||||
\node (fi1j1) at (0.75, 0.75) {$a$}; | |||||
\node (fi1j0) at (0.75, -0.75) {$b$}; | |||||
\draw[->] (fi0j1) -- (fi1j1) node[midway] (f) {1}; | |||||
\draw[->] (fi0j0) -- (fi1j0) node[midway,below] (g) {1}; | |||||
\draw[<-] (fi0j0) -- (fi0j1) node[midway,left] (p) {f}; | |||||
\draw[<-] (fi1j0) -- (fi1j1) node[midway,right] (q) {f}; | |||||
\node (fi0j1) at (2.5 + -0.75, 0.75) {$a$}; | |||||
\node (fi0j0) at (2.5 + -0.75, -0.75) {$a$}; | |||||
\node (fi1j1) at (2.5 + 0.75, 0.75) {$b$}; | |||||
\node (fi1j0) at (2.5 + 0.75, -0.75) {$b$}; | |||||
\draw[->] (fi0j1) -- (fi1j1) node[midway] (f) {f}; | |||||
\draw[->] (fi0j0) -- (fi1j0) node[midway,below] (g) {f}; | |||||
\draw[<-] (fi0j0) -- (fi0j1) node[midway,left] (p) {1}; | |||||
\draw[<-] (fi1j0) -- (fi1j1) node[midway,right] (q) {1}; |
@ -0,0 +1 @@ | |||||
\node[draw,circle,label=right:$x:A$,fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i0) at (0, 0) {}; |
@ -0,0 +1,4 @@ | |||||
\node[draw,circle,label=left:{$A[0/i]$},fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i0) at (-1, 0) {}; | |||||
\node[draw,circle,label=right:{$A[1/i]$},fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i1) at (1, 0) {}; | |||||
\draw (i0) -- (i1); |
@ -0,0 +1,6 @@ | |||||
\node[draw,circle,label=left:{$A[0/i, 0/j]$},fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i0j0) at (-1, -1) {}; | |||||
\node[draw,circle,label=right:{$A[1/i, 0/j]$},fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i1j0) at (1, -1) {}; | |||||
\node[draw,circle,label=left:{$A[0/i, 1/j]$},fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i0j1) at (-1, 1) {}; | |||||
\node[draw,circle,label=right:{$A[1/i, 1/j]$},fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i1j1) at (1, 1) {}; | |||||
\draw (i0j0) -- (i1j0) -- (i1j1) -- (i0j1) -- (i0j0); |
@ -0,0 +1,5 @@ | |||||
\node[draw,circle,label=below:$i_0$,fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i0) at (-1, 0) {}; | |||||
\node[draw,circle,label=below:$i_1$,fill,outer sep=0.1cm, inner sep=0pt, minimum size=0.1cm] (i1) at (1, 0) {}; | |||||
\draw (i0) -- (i1) node [midway, above] (seg) {seg}; | |||||
% \draw[-] (i0) -- (i1); |
@ -0,0 +1,39 @@ | |||||
\begin{scope}[node distance=0.75cm] | |||||
\node (FGX) [inner xsep=0.01cm, inner ysep=0.03cm] at (0, 0) {@}; | |||||
\node (FG) [xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FGX, left of=FGX] {@}; | |||||
\node (X) [xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FGX, right of=FGX] {x}; | |||||
\node (F) [xshift=0.25cm, inner xsep=0.04cm, inner ysep=0.05cm, below of=FG, left of=FG, xshift=2] {f}; | |||||
\node (G) [xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FG, right of=FG, xshift=-2] {g}; | |||||
\node (GX) | |||||
[xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=F, xshift=0.75cm] | |||||
{@}; | |||||
\draw[->] (FGX) to (X); | |||||
\draw[->] (FGX) to (FG); | |||||
\draw[->] (FG) to (F.north east); | |||||
\draw[->] (FG) to (G.north west); | |||||
\draw[->] (GX) to ([shift=({-0.35cm,-0.35cm})]GX) | |||||
-- ++(0, -0.10cm) | |||||
-| (G); | |||||
\draw[->] (GX) to ([shift=({0.45cm,-0.35cm})]GX) | |||||
-| (X); | |||||
\node (Stk0) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, 0) {}; | |||||
\node (Stk1) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -0.75) {}; | |||||
\node (Stk2) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -1.5) {}; | |||||
\node (Stk3) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -2.25) {}; | |||||
\draw[->] (Stk0.center) to (FGX); | |||||
\draw[->] (Stk1.center) to (FG); | |||||
\draw[->] (Stk2.center) to (F); | |||||
\draw[->] (Stk3.center) to (GX); | |||||
\end{scope} |
@ -0,0 +1,51 @@ | |||||
\begin{scope}[node distance=0.75cm] | |||||
\node (FGX) [inner xsep=0.01cm, inner ysep=0.03cm] at (0, 0) {@}; | |||||
\node (FG) [xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FGX, left of=FGX] {@}; | |||||
\node (X) [xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FGX, right of=FGX] {x}; | |||||
\node (F) [xshift=0.25cm, inner xsep=0.04cm, inner ysep=0.05cm, below of=FG, left of=FG, xshift=2] {f}; | |||||
\node (G) [xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FG, right of=FG, xshift=-2] {g}; | |||||
\node (KGX) | |||||
[xshift=-0.55cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=F, xshift=0.75cm] | |||||
{@}; | |||||
\node (K) | |||||
[xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=KGX, left of=KGX] | |||||
{K}; | |||||
\node (GX) | |||||
[xshift=-0.45cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=KGX, right of=KGX] | |||||
{@}; | |||||
\draw[->] (FGX) to (X); | |||||
\draw[->] (FGX) to (FG); | |||||
\draw[->] (FG) to (F.north east); | |||||
\draw[->] (FG) to (G.north west); | |||||
\draw[->] (KGX) to (K); | |||||
\draw[->] (KGX) to (GX); | |||||
\draw[->] (GX) to ([shift=({-0.35cm,-0.35cm})]GX) | |||||
-- ++(0, -0.10cm) | |||||
-| (G); | |||||
\draw[->] (GX) to ([shift=({0.45cm,-0.35cm})]GX) | |||||
-| (X); | |||||
\node (Stk0) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, 0) {}; | |||||
\node (Stk1) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -0.75) {}; | |||||
\node (Stk2) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -1.5) {}; | |||||
\node (Stk3) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -2.25) {}; | |||||
\draw[->] (Stk0.center) to (FGX); | |||||
\draw[->] (Stk1.center) to (FG); | |||||
\draw[->] (Stk2.center) to (F); | |||||
\draw[->] (Stk3.center) to (KGX); | |||||
\end{scope} |
@ -0,0 +1,25 @@ | |||||
\begin{scope}[node distance=0.75cm] | |||||
\node (FGX) [inner xsep=0.01cm, inner ysep=0.03cm] at (0, 0) {@}; | |||||
\node (FG) [xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FGX, left of=FGX] {@}; | |||||
\node (X) [xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FGX, right of=FGX] {x}; | |||||
\node (F) [xshift=0.25cm, inner xsep=0.04cm, inner ysep=0.05cm, below of=FG, left of=FG, xshift=2] {f}; | |||||
\node (G) [xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FG, right of=FG, xshift=-2] {g}; | |||||
\draw[->] (FGX) to (X); | |||||
\draw[->] (FGX) to (FG); | |||||
\draw[->] (FG) to (F.north east); | |||||
\draw[->] (FG) to (G.north west); | |||||
\node (Stk0) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, 0) {}; | |||||
\node (Stk1) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -0.75) {}; | |||||
\node (Stk2) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -1.5) {}; | |||||
\draw[->] (Stk0.center) to (FGX); | |||||
\draw[->] (Stk1.center) to (FG); | |||||
\draw[->] (Stk2.center) to (F); | |||||
\end{scope} |
@ -0,0 +1,31 @@ | |||||
\begin{scope}[node distance=0.75cm] | |||||
\node (FGX) [inner xsep=0.01cm, inner ysep=0.03cm] at (0, 0) {@}; | |||||
\node (FG) [xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FGX, left of=FGX] {@}; | |||||
\node (X) [xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FGX, right of=FGX] {x}; | |||||
\node (F) [xshift=0.25cm, inner xsep=0.04cm, inner ysep=0.05cm, below of=FG, left of=FG, xshift=2] {f}; | |||||
\node (G) [xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FG, right of=FG, xshift=-2] {g}; | |||||
\draw[->] (FGX) to (X); | |||||
\draw[->] (FGX) to (FG); | |||||
\draw[->] (FG) to (F.north east); | |||||
\draw[->] (FG) to (G.north west); | |||||
\node (Stk0) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, 0) {}; | |||||
\node (Stk1) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -0.75) {}; | |||||
\node (Stk2) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -1.5) {}; | |||||
\node (Stk3) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -2.25) {}; | |||||
\node (Stk4) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -3) {}; | |||||
\draw[->] (Stk0.center) to (FGX); | |||||
\draw[->] (Stk1.center) to (FG); | |||||
\draw[->] (Stk2.center) to (F); | |||||
\draw[->] (Stk3.center) to (X |- 0, -2.25cm) -- (X); | |||||
\draw[->] (Stk4.center) to (G |- 0, -3cm) -- (G); | |||||
\end{scope} |
@ -0,0 +1,44 @@ | |||||
\begin{scope}[node distance=0.75cm] | |||||
\node (FGX) [inner xsep=0.01cm, inner ysep=0.03cm] at (0, 0) {@}; | |||||
\node (FG) [xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FGX, left of=FGX] {@}; | |||||
\node (X) [xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FGX, right of=FGX] {x}; | |||||
\node (F) [xshift=0.25cm, inner xsep=0.04cm, inner ysep=0.05cm, below of=FG, left of=FG, xshift=2] {f}; | |||||
\node (G) [xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FG, right of=FG, xshift=-2] {g}; | |||||
\node (GX) | |||||
[xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=F, xshift=0.75cm] | |||||
{@}; | |||||
\draw[->] (FGX) to (X); | |||||
\draw[->] (FGX) to (FG); | |||||
\draw[->] (FG) to (F.north east); | |||||
\draw[->] (FG) to (G.north west); | |||||
\draw[->] (GX) to ([shift=({-0.35cm,-0.35cm})]GX) | |||||
-- ++(0, -0.10cm) | |||||
-| (G); | |||||
\draw[->] (GX) to ([shift=({0.45cm,-0.35cm})]GX) | |||||
-| (X); | |||||
\node (Stk0) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, 0) {}; | |||||
\node (Stk1) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -0.75) {}; | |||||
\node (Stk2) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -1.5) {}; | |||||
\node (Stk3) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -2.25) {}; | |||||
\node (Stk4) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -3) {}; | |||||
\node (K) [right of=Stk4, xshift=1.5cm] {K}; | |||||
\draw[->] (Stk0.center) to (FGX); | |||||
\draw[->] (Stk1.center) to (FG); | |||||
\draw[->] (Stk2.center) to (F); | |||||
\draw[->] (Stk3.center) to (GX); | |||||
\draw[->] (Stk4.center) to (K); | |||||
\end{scope} |
@ -0,0 +1,33 @@ | |||||
\begin{scope}[node distance=0.75cm] | |||||
\node (FGX) [inner xsep=0.01cm, inner ysep=0.03cm] | |||||
at (0, 0) {@}; | |||||
\node (FG) [xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FGX, left of=FGX] | |||||
{@}; | |||||
\node (X) [xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FGX, right of=FGX] | |||||
{x}; | |||||
\node (F) [xshift=0.25cm, inner xsep=0.04cm, inner ysep=0.05cm, below of=FG, left of=FG, xshift=2] | |||||
{f}; | |||||
\node (G) [xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FG, right of=FG, xshift=-2] | |||||
{g}; | |||||
\draw[->] (FGX) to (X); | |||||
\draw[->] (FGX) to (FG); | |||||
\draw[->] (FG) to (F.north east); | |||||
\draw[->] (FG) to (G.north west); | |||||
\node (Stk0) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, 0) {}; | |||||
\node (Stk1) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -0.75) {}; | |||||
\node (Stk2) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -1.5) {}; | |||||
\node (Stk3) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -2.25) {}; | |||||
\draw[->] (Stk0.center) to (FGX); | |||||
\draw[->] (Stk1.center) to (FG); | |||||
\draw[->] (Stk2.center) to (F); | |||||
\draw[->] (Stk3.center) to (0.5cm, -2.25cm) -- (X); | |||||
\end{scope} |
@ -0,0 +1,34 @@ | |||||
\begin{scope}[node distance=0.75cm] | |||||
\node (KGX) | |||||
[xshift=-0.55cm, inner xsep=0.01cm, inner ysep=0.03cm] | |||||
at (0, 0) {@}; | |||||
\node (K) | |||||
[xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=KGX, left of=KGX] | |||||
{K}; | |||||
\node (GX) | |||||
[xshift=-0.45cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=KGX, right of=KGX] | |||||
{@}; | |||||
\node (G) | |||||
[xshift=0.45cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=GX, left of=GX] | |||||
{g}; | |||||
\node (X) | |||||
[xshift=-0.45cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=GX, right of=GX] | |||||
{x}; | |||||
\draw[->] (KGX) to (K); | |||||
\draw[->] (KGX) to (GX); | |||||
\draw[->] (GX) to (G); | |||||
\draw[->] (GX) to (X); | |||||
\node (Stk0) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, 0) {}; | |||||
\draw[->] (Stk0.center) to (KGX); | |||||
\end{scope} |
@ -0,0 +1,25 @@ | |||||
\begin{scope}[node distance=0.75cm] | |||||
\node (FGX) [color=blue,inner xsep=0.01cm, inner ysep=0.03cm] at (0, 0) {@}; | |||||
\node (FG) [color=blue,xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FGX, left of=FGX] {@}; | |||||
\node (X) [xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FGX, right of=FGX] {y}; | |||||
\node (F) [color=blue,xshift=0.25cm, inner xsep=0.04cm, inner ysep=0.05cm, below of=FG, left of=FG, xshift=2] {f}; | |||||
\node (G) [xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FG, right of=FG, xshift=-2] {x}; | |||||
\draw[->] (FGX) to (X); | |||||
\draw[->,color=blue] (FGX) to (FG); | |||||
\draw[->,color=blue] (FG) to (F.north east); | |||||
\draw[->] (FG) to (G.north west); | |||||
\node (Stk0) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, 0) {}; | |||||
\node (Stk1) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -0.75) {}; | |||||
\node (Stk2) [draw, shape=rectangle, minimum width=1.5cm, minimum height=0.75cm, anchor=center] | |||||
at (-2.75, -1.5) {}; | |||||
\draw[->] (Stk0.center) to (FGX); | |||||
\draw[->] (Stk1.center) to (FG); | |||||
\draw[->] (Stk2.center) to (F); | |||||
\end{scope} |
@ -0,0 +1,14 @@ | |||||
\begin{scope}[node distance=0.75cm] | |||||
\node (FGX) [color=blue,inner xsep=0.01cm, inner ysep=0.03cm] at (0, 0) {@}; | |||||
\node (FG) [color=blue,xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FGX, left of=FGX] {@}; | |||||
\node (X) [xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FGX, right of=FGX] {y}; | |||||
\node (F) [color=blue,xshift=0.25cm, inner xsep=0.04cm, inner ysep=0.05cm, below of=FG, left of=FG, xshift=2] {f}; | |||||
\node (G) [xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=FG, right of=FG, xshift=-2] {x}; | |||||
\draw[->] (FGX) to (X); | |||||
\draw[->,color=blue] (FGX) to (FG); | |||||
\draw[->,color=blue] (FG) to (F.north east); | |||||
\draw[->] (FG) to (G.north west); | |||||
\end{scope} |
@ -0,0 +1 @@ | |||||
\node at (0, 0) {main}; |
@ -0,0 +1,26 @@ | |||||
\begin{scope}[node distance=0.75cm] | |||||
\node (DoDo4) [inner xsep=0.01cm, inner ysep=0.03cm] at (0, 0) {@}; | |||||
\node (Do) | |||||
[xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=DoDo4, left of=DoDo4] | |||||
{double}; | |||||
\node (Do4) | |||||
[xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=DoDo4, right of=DoDo4] | |||||
{@}; | |||||
\node (Do_2) | |||||
[xshift=0.25cm, inner xsep=0.04cm, inner ysep=0.05cm, below of=Do4, left of=Do4, xshift=2] | |||||
{double}; | |||||
\node (4) | |||||
[xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=Do4, right of=Do4, xshift=-2] | |||||
{4}; | |||||
\draw[->] (DoDo4) to (Do); | |||||
\draw[->] (DoDo4) to (Do4); | |||||
\draw[->] (Do4) to (Do_2); | |||||
\draw[->] (Do4) to (4); | |||||
\end{scope} |
@ -0,0 +1,33 @@ | |||||
\begin{scope}[node distance=0.75cm] | |||||
\node (DoDo4) [inner xsep=0.01cm, inner ysep=0.03cm] at (0, 0) {@}; | |||||
\node (TimesAp) | |||||
[xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=DoDo4, left of=DoDo4] | |||||
{@}; | |||||
\node (Times) | |||||
[xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=TimesAp, left of=TimesAp] | |||||
{$+$}; | |||||
\node (Do4) | |||||
[xshift=-0.5cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=DoDo4, | |||||
right of=DoDo4, yshift=-0.5cm] | |||||
{@}; | |||||
\node (Do_2) | |||||
[xshift=0.25cm, inner xsep=0.04cm, inner ysep=0.05cm, below of=Do4, left of=Do4, xshift=2] | |||||
{double}; | |||||
\node (4) | |||||
[xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=Do4, right of=Do4, xshift=-2] | |||||
{4}; | |||||
\draw[->] (DoDo4) to (TimesAp); | |||||
\draw[->] (TimesAp) to (Times); | |||||
\draw[->] (TimesAp) |- (Do4); | |||||
\draw[->] (DoDo4) to (Do4); | |||||
\draw[->] (Do4) to (Do_2); | |||||
\draw[->] (Do4) to (4); | |||||
\end{scope} |
@ -0,0 +1,38 @@ | |||||
\begin{scope}[node distance=0.75cm] | |||||
\node (DoDo4) [inner xsep=0.01cm, inner ysep=0.03cm] at (0, 0) {@}; | |||||
\node (TimesAp) | |||||
[xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=DoDo4, left of=DoDo4] | |||||
{@}; | |||||
\node (Times) | |||||
[xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=TimesAp, left of=TimesAp] | |||||
{$+$}; | |||||
\node (Times44) | |||||
[xshift=-0.5cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=DoDo4, right of=DoDo4, yshift=-0.5cm] | |||||
{@}; | |||||
\node (4) | |||||
[xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=Times44, right of=Times44, yshift=-0.75cm] | |||||
{4}; | |||||
\node (Times4) | |||||
[xshift=0.25cm, inner xsep=0.04cm, inner ysep=0.05cm, below of=Times44, left of=Times44, xshift=2] | |||||
{@}; | |||||
\node (Times2) | |||||
[xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=Times4, left of=Times4] | |||||
{$+$}; | |||||
\draw[->] (DoDo4) to (TimesAp); | |||||
\draw[->] (TimesAp) to (Times); | |||||
\draw[->] (TimesAp) to (Times44); | |||||
\draw[->] (DoDo4) to (Times44); | |||||
\draw[->] (Times44) to (Times4); | |||||
\draw[->] (Times4) to (Times2); | |||||
\draw[->] (Times4) |- (4); | |||||
\draw[->] (Times44) to (4); | |||||
\end{scope} |
@ -0,0 +1,38 @@ | |||||
\begin{scope}[node distance=0.75cm] | |||||
\node (DoDo4) [inner xsep=0.01cm, inner ysep=0.03cm] at (0, 0) {@}; | |||||
\node (TimesAp) | |||||
[xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=DoDo4, left of=DoDo4] | |||||
{@}; | |||||
\node (Times) | |||||
[xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=TimesAp, left of=TimesAp] | |||||
{$+$}; | |||||
\node (Times44) | |||||
[xshift=-0.5cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=DoDo4, right of=DoDo4, yshift=-0.5cm, color=blue] | |||||
{@}; | |||||
\node (4) | |||||
[xshift=-0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=Times44, right of=Times44, yshift=-0.75cm, color=blue] | |||||
{4}; | |||||
\node (Times4) | |||||
[xshift=0.25cm, inner xsep=0.04cm, inner ysep=0.05cm, below of=Times44, left of=Times44, xshift=2, color=blue] | |||||
{@}; | |||||
\node (Times2) | |||||
[xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=Times4, left of=Times4, color=blue] | |||||
{$+$}; | |||||
\draw[->] (DoDo4) to (TimesAp); | |||||
\draw[->] (TimesAp) to (Times); | |||||
\draw[->] (TimesAp) to (Times44); | |||||
\draw[->] (DoDo4) to (Times44); | |||||
\draw[->,color=blue,dashed] (Times44) to (Times4); | |||||
\draw[->,color=blue,dashed] (Times4) to (Times2); | |||||
\draw[->,color=blue,dashed] (Times4) |- (4); | |||||
\draw[->,color=blue,dashed] (Times44) to (4); | |||||
\end{scope} |
@ -0,0 +1,22 @@ | |||||
\begin{scope}[node distance=0.75cm] | |||||
\node (DoDo4) [inner xsep=0.01cm, inner ysep=0.03cm] at (0, 0) {@}; | |||||
\node (TimesAp) | |||||
[xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=DoDo4, left of=DoDo4] | |||||
{@}; | |||||
\node (Times) | |||||
[xshift=0.25cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=TimesAp, left of=TimesAp] | |||||
{$+$}; | |||||
\node (8) | |||||
[xshift=-0.5cm, inner xsep=0.01cm, inner ysep=0.03cm, below of=DoDo4, right of=DoDo4, yshift=-0.75cm] | |||||
{8}; | |||||
\draw[->] (DoDo4) to (TimesAp); | |||||
\draw[->] (TimesAp) to (Times); | |||||
\draw[->] (TimesAp) |- (8); | |||||
\draw[->] (DoDo4) to (8); | |||||
\end{scope} |
@ -0,0 +1,12 @@ | |||||
\node (gamma) at (0, 2) {$\Gamma$}; | |||||
\node (prod) at (0, 0) {$\beta^\alpha \times \alpha$}; | |||||
\node (arg) at (2.5, 0) {$\alpha$}; | |||||
\node (func) at (-2.5, 0) {$\beta^\alpha$}; | |||||
\draw[->] (gamma) -- node[midway] {$\left<f,x\right>$} (prod); | |||||
\draw[->] (prod) -- node[midway] {$\pi_1$} (func); | |||||
\draw[->] (prod) -- node[midway,below] {$\pi_2$} (arg); | |||||
\draw[->] (gamma) -- node[midway] {$f$} (func); | |||||
\draw[->] (gamma) -- node[midway] {$x$} (arg); |
@ -0,0 +1,2 @@ | |||||
cradle: | |||||
stack: |
@ -0,0 +1,42 @@ | |||||
--- | |||||
title: Contact | |||||
--- | |||||
Here are the easiest ways to reach me: | |||||
<style> | |||||
span#reading-length { display: none; } | |||||
</style> | |||||
<div class="contact-list"> | |||||
<div class="contact-card"> | |||||
<div class="contact-header"> | |||||
<span class="username">@plt_amy</span> | |||||
</div> | |||||
<span> | |||||
I am unhealthily active on the bird website, so follow me on Twitter to stay up to date with what I think about.. everything! | |||||
</span> | |||||
</div> | |||||
<div class="contact-card"> | |||||
<div class="contact-header"> | |||||
<span class="username">{ames}</span> | |||||
</div> | |||||
<span> | |||||
I'm active in `##dependent` on [libera.chat](https://libera.chat) to talk about types! | |||||
</span> | |||||
</div> | |||||
</div> | |||||
If you like what I do, here are some ways you can support this blog: | |||||
<div class="contact-list"> | |||||
<div class="contact-card"> | |||||
<span class="username"><a href="https://ko-fi.com/plt_amy">Ko-fi</a></span> | |||||
You can send me a one-time donation on Ko-Fi. Just remember not to read the name on the receipt! | |||||
(Paypal sucks) | |||||
</div> | |||||
</div> |
@ -0,0 +1,83 @@ | |||||
--- | |||||
title: Home | |||||
--- | |||||
<div id=index> | |||||
<div style="grid-column: 2;"> | |||||
<h2 id=hi>Hi!</h2> | |||||
<p style="min-height: 98px"> | |||||
<a class=ico-left href="/"> | |||||
<img alt="profile picture" decoding=async src="/static/icon/[email protected]" /> | |||||
</a> | |||||
I'm Amélia, a non-binary (they/them) mathematician & programmer. This | |||||
blog is where I write about programming languages: their implementation, | |||||
their semantics, etc. | |||||
</p> | |||||
<hr /> | |||||
<div id=social> | |||||
<a href="https://twitter.com/plt_amy"> | |||||
<img class=social decoding=async title="My Twitter profile" src="/static/svg/twitter.svg" style="background-color: #1DA1F2;" /> | |||||
</a> | |||||
<a href="https://github.com/plt-amy"> | |||||
<img class=social decoding=async title="My GitHub profile" src="/static/svg/github.svg" style="background-color: black;" /> | |||||
</a> | |||||
<a href="https://git.amelia.how"> | |||||
<img class=social decoding=async title="My personal Gitea" src="/static/svg/gitea.svg" style="background-color: #609926;" /> | |||||
</a> | |||||
<a href="https://ko-fi.com/plt_amy"> | |||||
<img class=social decoding=async title="Buy me a coffee on Ko-Fi" src="/static/svg/kofi.svg" style="background-color: #434B57;" /> | |||||
</a> | |||||
<a rel="me" href="https://types.pl/@amy"> | |||||
<img class=social decoding=async title="Buy me a coffee on Ko-Fi" src="/static/svg/mastodon.svg" style="background-color: #2b90d9;" /> | |||||
</a> | |||||
</div> | |||||
<hr /> | |||||
<p style="min-height: 98px"> | |||||
<a class=ico-right href="https://cubical.1lab.dev"> | |||||
<img class=ico-right alt="cube" decoding=async src="/static/icon/cube-128x.png" /> | |||||
</a> | |||||
In addition to this blog, I maintain <a | |||||
href="https://cubical.1lab.dev">the 1Lab</a>, a formalised, cross-linked | |||||
reference resource for Homotopy Type Theory, done in Cubical Agda. | |||||
</p> | |||||
<hr /> | |||||
<p style="min-height: 98px"> | |||||
<a class=ico-left href="https://amulet.works"> | |||||
<img alt="amulet" decoding=async src="/static/svg/amulet.svg" /> | |||||
</a> | |||||
My most significant project other than this blog and the 1lab is <a | |||||
href="https://amulet.works">Amulet</a>, a functional programming | |||||
language in the ML tradition with support for advanced type-level | |||||
programming. | |||||
</p> | |||||
</div> | |||||
<div style="grid-column: 4;"> | |||||
<h2>Posts</h2> | |||||
<p> | |||||
Here are the lastest 5 posts from the blog: | |||||
</p> | |||||
$partial("templates/post-list.html")$ | |||||
<p>…or you can find more in the <a href="/archive.html">archives</a>.</p> | |||||
</div> | |||||
</div> | |||||
<style> | |||||
main > div#title { | |||||
display: none; | |||||
} | |||||
</style> |
@ -0,0 +1,13 @@ | |||||
--- | |||||
title: Open-source Licenses | |||||
--- | |||||
<style> | |||||
span#reading-length { display: none; } | |||||
</style> | |||||
This blog redistributes (parts of) the following free software projects: | |||||
* **KaTeX** is a fast JavaScript library for rendering LaTeX on the client. I use it to pre-generate amazing looking mathematics at compile time. **KaTeX is licensed under the terms of the MIT license; A copy is available [here](/static/licenses/LICENSE.KaTeX)**. | |||||
* **Iosevka** is a customizable monospace font designed for programmers. It's used in this website for code blocks, and also for any Agda developments I've shared under the amelia.how domain. **Iosevka is distributed under the terms of the SIL Open Font License; A copy is available [here](/static/licenses/LICENSE.Iosevka).** |
@ -0,0 +1,164 @@ | |||||
--- | |||||
title: "This Sentence is False, or: On Natural Language, Typing and Proof" | |||||
date: September 9th, 2020 | |||||
--- | |||||
The Liar's paradox is often the first paradox someone dealing with logic, even in an informal setting, encounters. It is _intuitively_ paradoxical: how can a sentence be both true, and false? This contradicts (ahem) the law of non-contradiction, that states that "no proposition is both true and false", or, symbolically, $\neg (A \land \neg A)$. Appealing to symbols like that gives us warm fuzzy feelings, because, _of course, the algebra doesn't lie!_ | |||||
There's a problem with that the appeal to symbols, though. And it's nothing to do with non-contradiction: It's to do with well-formedness. How do you accurately translate the "this sentence is false" sentence into a logical formula? We can try by giving it a name, say $L$ (for liar), and state that $L$ must represent some logical formula. Note that the equality symbol $=$ here is _not_ a member of the logic we're using to express $L$, it's a symbol of this discourse. It's _meta_logical. | |||||
$$ L = \dots $$ | |||||
But what should fill in the dots? $L$ is the sentence we're symbolising, so "this sentence" must mean $L$. Saying "X is false" can be notated in a couple of equivalent ways, such as $\neg X$ or $X \to \bot$. We'll go with the latter: it's a surprise tool that will help us later. Now we know how to fill in the dots: It's $L \to \bot$. | |||||
<details> | |||||
<summary>Truth tables demonstrating the equivalence between $\neg A$ and $A \to \bot$, if you are classically inclined.</summary> | |||||
<div class="mathpar"> | |||||
<table> | |||||
<tr> | |||||
<th> $A$ </th> | |||||
<th> $\neg A$ </th> | |||||
</tr> | |||||
<tr><td>$\top$</td><td>$\bot$</td></tr> | |||||
<tr><td>$\bot$</td><td>$\top$</td></tr> | |||||
</table> | |||||
<table> | |||||
<tr> | |||||
<th> $A$ </th> | |||||
<th> $A\to\bot$ </th> | |||||
</tr> | |||||
<tr><td>$\top$</td><td>$\bot$</td></tr> | |||||
<tr><td>$\bot$</td><td>$\top$</td></tr> | |||||
</table> | |||||
</div> | |||||
</details> | |||||
But wait. If $L = L \to \bot$, then $L = (L \to \bot) \to \bot$, and also $L = ((L \to \bot) \to \bot) \to \bot$, and so... forever. There is no finite, well-formed formula of first-order logic that represents the sentence "This sentence is false", thus, assigning a truth value to it is meaningless: Saying "This sentence is false" is true is just as valid as saying that it's false, both of those are as valid as saying "$\neg$ is true". | |||||
Wait some more, though: we're not done. It's known, by the [Curry-Howard isomorphism], that logical systems correspond to type systems. Therefore, if we can find a type-system that assigns a meaning to our sentence $L$, then there _must_ exist a logical system that can express $L$, and so, we can decide its truth! | |||||
Even better, we don't need to analyse the truth of $L$ logically, we can do it type-theoretically: if we can build an inhabitant of $L$, then it is true; If we can build an inhabitant of $\neg L$, then it's false; And otherwise, I'm just not smart enough to do it. | |||||
So what is the smallest type system that lets us assign a meaning to $L$? | |||||
# A system of equirecursive types: $\lambda_{\text{oh no}}$[^1] | |||||
[^1]: The reason for the name will become obvious soon enough. | |||||
We do not need a complex type system to express $L$: a simple extension over the basic simply-typed lambda calculus $\lambda_{\to}$ will suffice. No fancy higher-ranked or dependent types here, sorry! | |||||
As a refresher, the simply-typed lambda calculus has _only_: | |||||
* A set of base types $\mathbb{B}$, | |||||
* Function types $\tau \to \sigma$, | |||||
* For each base type $b \in \mathbb{B}$, a set of base terms $\mathbb{T}_b$, | |||||
* Variables $v$, | |||||
* Lambda abstractions $\lambda v. e$, and | |||||
* Application $e\ e'$. | |||||
<details> | |||||
<summary>Type assignment rules for the basic $\lambda_{\to}$ calculus.</summary> | |||||
<div class="math-paragraph"> | |||||
<div> | |||||
$$\frac{x : \tau \in \Gamma}{\Gamma \vdash x : \tau}$$ | |||||
</div> | |||||
<div> | |||||
$$\frac{b \in \mathbb{B} \quad x \in \mathbb{T}_{b}}{\Gamma \vdash x : b}$$ | |||||
</div> | |||||
<div> | |||||
$$\frac{\Gamma, x : \sigma \vdash e : \tau}{\Gamma \vdash \lambda x. e : \sigma \to \tau}$$ | |||||
</div> | |||||
<div> | |||||
$$\frac{\Gamma, e : \sigma \to \tau \quad \Gamma \vdash e' : \sigma}{\Gamma \vdash e\ e' : \tau}$$ | |||||
</div> | |||||
</div> | |||||
</details> | |||||
First of all, we'll need a type to represent the logical proposition $\bot$. This type is empty: It has no type formers. Its elimination rule corresponds to the principle of explosion, and we write it $\mathtt{absurd}$. The inference rule: | |||||
<div class="math-paragraph"> | |||||
$$\frac{\Gamma \vdash e : \bot}{\mathtt{absurd}\ e : A}$$ | |||||
</div> | |||||
We're almost there. What we need now is a type former that serves as a solution for equations of the form $v = ... v ...$. That's right: we're just _inventing_ a solution to this class of equations---maths! | |||||
These are the _equirecursive_ types, $\mu a. \tau$. The important part here is _equi_: these types are entirely indistinguishable from their unrollings. Formally, we extend the set of type formers with type variables $a$ and $\mu$-types $\mu a. \tau$, where $\mu a$ acts as a binder for $a$. | |||||
Since we invented $\mu$ types as a solution for equations of the form $a = \tau$, we have that $\mu a. \tau = \tau[\mu a.\tau/a]$, where $\tau[\sigma{}/a]$ means "substitute $\sigma{}$ everywhere $a$ occurs in $\tau$". The typing rules express this identity, saying that anywhere a term might have one as a type, the other works too: | |||||
<div class="math-paragraph"> | |||||
<div> | |||||
$$\frac{\Gamma \vdash e : \tau[\mu a.\tau / a]}{\Gamma \vdash e : \mu a. \tau}$$ | |||||
</div> | |||||
<div> | |||||
$$\frac{\Gamma \vdash e : \mu a.\tau}{\Gamma \vdash e : \tau[\mu a. \tau / a]}$$ | |||||
</div> | |||||
</div> | |||||
Adding these rules, along with the one for eliminating $\bot$, to the $\lambda_{\to}$ calculus nets us the system $\lambda_{\text{oh no}}$. With it, one can finally formulate a representation for our $L$-sentence: it's $\mu a. a \to \bot$. | |||||
There exists a closed term of this type, namely $\lambda k. k\ k$, which means: The "this sentence is false"-sentence is true. We can check this fact ourselves, or, more likely, use a type checker that supports equirecursive types. For example, OCaml with the `-rectypes` compiler option does. | |||||
We'll first define the empty type `void` and the type corresponding to $L$: | |||||
<div class="math-paragraph"> | |||||
~~~~{.ocaml} | |||||
type void = | ;; | |||||
type l = ('a -> void) as 'a ;; | |||||
~~~~ | |||||
</div> | |||||
Now we can define our proof of $L$, called `yesl`, and check that it has the expected type: | |||||
<div class="math-paragraph"> | |||||
~~~~{.ocaml} | |||||
let yesl: l = fun k -> k k ;; | |||||
~~~~ | |||||
</div> | |||||
However. This same function is also a proof that... $\neg L$. Check it out: | |||||
<div class="math-paragraph"> | |||||
~~~~{.ocaml} | |||||
let notl (x : l) : void = x x ;; | |||||
~~~~ | |||||
</div> | |||||
# I am Bertrand Russell | |||||
Bertrand Russell (anecdotally) once proved, starting from $1 = 0$, that he was the Pope. I am also the Pope, as it turns out, since I have on hand a proof that $L$ and $\neg L$, in violation of non-contradiction; By transitivity, I am Bertrand Russell. <span style="float: right; display: inline-block;"> $\blacksquare$ </span> | |||||
Alright, maybe I'm not Russell (drat). But I am, however, a trickster. I tricked you! You thought that this post was going to be about a self-referential sentence, but it was actually about typed programming language design (not very shocking, I know). It's a demonstration of how recursive types (in any form) are logically inconsistent, and of how equirecursive types _are wrong_. | |||||
The logical inconsistency, we all deal with, on a daily basis. It comes with Turing completeness, and it annoys me to no end every single time I accidentally do `let x = ... x ...`{.haskell}. I _really_ wish I had a practical, total functional programming language to use for my day-to-day programming, and this non-termination _everywhere_ is a great big blotch on Haskell's claim of purity. | |||||
The kind of recursive types you get in Haskell is _fine_. They're not _great_ if you like the propositions-as-types interpretation, since it's trivial to derive a contradiction from them, but they're good enough for programming that implementing a positivity checker to ensure your definitions are strictly inductive isn't generally worth the effort. | |||||
Unless your language claims to have "zero runtime errors", in which case, if you implement isorecursive types instead of inductive types, you are _wrong_. See: Elm. God damn it. | |||||
<details> | |||||
<summary>So much for "no runtime errors"... I guess spinning forever on the client side is acceptable.</summary> | |||||
<div class="flex-list"> | |||||
```elm | |||||
-- Elm | |||||
type Void = Void Void | |||||
type Omega = Omega (Omega -> Void) | |||||
yesl : Omega | |||||
yesl = Omega (\(Omega x) -> x (Omega x)) | |||||
notl : Omega -> Void | |||||
notl (Omega x) = x (Omega x) | |||||
``` | |||||
</div> | |||||
</details> | |||||
Equirecursive types, however, are a totally different beast. They are _basically_ useless. Sure, you might not have to write a couple of constructors, here and there... at the cost of _dramatically_ increasing the set of incorrect programs that your type system accepts. Suddenly, typos will compile fine, and your program will just explode at runtime (more likely: fail to terminate). Isn't this what type systems are meant to prevent? | |||||
Thankfully, very few languages implement equirecursive types. OCaml is the only one I know of, and it's gated behind a compiler flag. However, that's a footgun that should _not_ be there. | |||||
**EDIT** (April 14th, 2021) It's been pointed out to me that you can get equirecursive types in OCaml even without passing `-rectypes` to the compiler. I am not an OCaml expert, so I encourage you to see [here](https://gist.github.com/drvink/a0094680aaae2569951ea4601752944d) for more details. | |||||
[Curry-Howard isomorphism]: https://en.wikipedia.org/wiki/Curry%E2%80%93Howard_correspondence |
@ -0,0 +1,309 @@ | |||||
--- | |||||
title: You could have invented Parsec | |||||
date: August 17, 2016 01:29 AM | |||||
synopsys: 2 | |||||
--- | |||||
As most of us should know, [Parsec](https://hackage.haskell.org/package/parsec) | |||||
is a relatively fast, lightweight monadic parser combinator library. | |||||
In this post I aim to show that monadic parsing is not only useful, but a simple | |||||
concept to grok. | |||||
We shall implement a simple parsing library with instances of common typeclasses | |||||
of the domain, such as Monad, Functor and Applicative, and some example | |||||
combinators to show how powerful this abstraction really is. | |||||
--- | |||||
Getting the buzzwords out of the way, being _monadic_ just means that Parsers | |||||
instances of `Monad`{.haskell}. Recall the Monad typeclass, as defined in | |||||
`Control.Monad`{.haskell}, | |||||
```haskell | |||||
class Applicative m => Monad m where | |||||
return :: a -> m a | |||||
(>>=) :: m a -> (a -> m b) -> m b | |||||
{- Some fields omitted -} | |||||
``` | |||||
How can we fit a parser in the above constraints? To answer that, we must first | |||||
define what a parser _is_. | |||||
A naïve implementation of the `Parser`{.haskell} type would be a simple type | |||||
synonym. | |||||
```haskell | |||||
type Parser a = String -> (a, String) | |||||
``` | |||||
This just defines that a parser is a function from a string to a result pair | |||||
with the parsed value and the resulting stream. This would mean that parsers are | |||||
just state transformers, and if we define it as a synonym for the existing mtl | |||||
`State`{.haskell} monad, we get the Monad, Functor and Applicative instances for | |||||
free! But alas, this will not do. | |||||
Apart from modeling the state transformation that a parser expresses, we need a | |||||
way to represent failure. You already know that `Maybe a`{.haskell} expresses | |||||
failure, so we could try something like this: | |||||
```haskell | |||||
type Parser a = String -> Maybe (a, String) | |||||
``` | |||||
But, as you might have guessed, this is not the optimal representation either: | |||||
`Maybe`{.haskell} _does_ model failure, but in a way that is lacking. It can | |||||
only express that a computation was successful or that it failed, not why it | |||||
failed. We need a way to fail with an error message. That is, the | |||||
`Either`{.haskell} monad. | |||||
```haskell | |||||
type Parser e a = String -> Either e (a, String) | |||||
``` | |||||
Notice how we have the `Maybe`{.haskell} and `Either`{.haskell} outside the | |||||
tuple, so that when an error happens we stop parsing immediately. We could | |||||
instead have them inside the tuple for better error reporting, but that's out of | |||||
scope for a simple blag post. | |||||
This is pretty close to the optimal representation, but there are still some | |||||
warts things to address: `String`{.haskell} is a bad representation for textual | |||||
data, so ideally you'd have your own `Stream`{.haskell} class that has instances | |||||
for things such as `Text`{.haskell}, `ByteString`{.haskell} and | |||||
`String`{.haskell}. | |||||
One issue, however, is more glaring: You _can't_ define typeclass instances for | |||||
type synonyms! The fix, however, is simple: make `Parser`{.haskell} a newtype. | |||||
```haskell | |||||
newtype Parser a | |||||
= Parser { parse :: String -> Either String (a, String) } | |||||
``` | |||||
--- | |||||
Now that that's out of the way, we can actually get around to instancing some | |||||
typeclasses. | |||||
Since the AMP landed in GHC 7.10 (base 4.8), the hierarchy of the Monad | |||||
typeclass is as follows: | |||||
```haskell | |||||
class Functor (m :: * -> *) where | |||||
class Functor m => Applicative m where | |||||
class Applicative m => Monad m where | |||||
``` | |||||
That is, we need to implement Functor and Applicative before we can actually | |||||
implement Monad. | |||||
We shall also add an `Alternative`{.haskell} instance for expressing choice. | |||||
First we need some utility functions, such as `runParser`{.haskell}, that runs a | |||||
parser from a given stream. | |||||
```haskell | |||||
runParser :: Parser a -> String -> Either String a | |||||
runParser (Parser p) s = fst <$> p s | |||||
``` | |||||
We could also use function for modifying error messages. For convenience, we | |||||
make this an infix operator, `<?>`{.haskell}. | |||||
```haskell | |||||
(<?>) :: Parser a -> String -> Parser a | |||||
(Parser p) <?> err = Parser go where | |||||
go s = case p s of | |||||
Left _ -> Left err | |||||
Right x -> return x | |||||
infixl 2 <?> | |||||
``` | |||||
`Functor` | |||||
======= | |||||
Remember that Functor models something that can be mapped over (technically, | |||||
`fmap`-ed over). | |||||
We need to define semantics for `fmap` on Parsers. A sane implementation would | |||||
only map over the result, and keeping errors the same. This is a homomorphism, | |||||
and follows the Functor laws. | |||||
However, since we can't modify a function in place, we need to return a new | |||||
parser that applies the given function _after_ the parsing is done. | |||||
```haskell | |||||
instance Functor Parser where | |||||
fn `fmap` (Parser p) = Parser go where | |||||
go st = case p st of | |||||
Left e -> Left e | |||||
Right (res, str') -> Right (fn res, str') | |||||
``` | |||||
### `Applicative` | |||||
While Functor is something that can be mapped over, Applicative defines | |||||
semantics for applying a function inside a context to something inside a | |||||
context. | |||||
The Applicative class is defined as | |||||
```haskell | |||||
class Functor m => Applicative m where | |||||
pure :: a -> m a | |||||
(<*>) :: f (a -> b) -> f a -> f b | |||||
``` | |||||
Notice how the `pure`{.haskell} and the `return`{.haskell} methods are | |||||
equivalent, so we only have to implement one of them. | |||||
Let's go over this by parts. | |||||
```haskell | |||||
instance Applicative Parser where | |||||
pure x = Parser $ \str -> Right (x, str) | |||||
``` | |||||
The `pure`{.haskell} function leaves the stream untouched, and sets the result | |||||
to the given value. | |||||
The `(<*>)`{.haskell} function needs to to evaluate and parse the left-hand side | |||||
to get the in-context function to apply it. | |||||
```haskell | |||||
(Parser p) <*> (Parser p') = Parser go where | |||||
go st = case p st of | |||||
Left e -> Left e | |||||
Right (fn, st') -> case p' st' of | |||||
Left e' -> Left e' | |||||
Right (v, st'') -> Right (fn v, st'') | |||||
``` | |||||
### `Alternative` | |||||
Since the only superclass of Alternative is Applicative, we can instance it | |||||
without a Monad instance defined. We do, however, need an import of | |||||
`Control.Applicative`{.haskell}. | |||||
```haskell | |||||
instance Alternative Parser where | |||||
empty = Parser $ \_ -> Left "empty parser" | |||||
(Parser p) <|> (Parser p') = Parser go where | |||||
go st = case p st of | |||||
Left _ -> p' st | |||||
Right x -> Right x | |||||
``` | |||||
### `Monad` | |||||
After almost a thousand words, one would be excused for forgetting we're | |||||
implementing a _monadic_ parser combinator library. That means, we need an | |||||
instance of the `Monad`{.haskell} typeclass. | |||||
Since we have an instance of Applicative, we don't need an implementation of | |||||
return: it is equivalent to `pure`, save for the class constraint. | |||||
```haskell | |||||
instance Monad Parser where | |||||
return = pure | |||||
``` | |||||
The `(>>=)`{.haskell} implementation, however, needs a bit more thought. Its | |||||
type signature is | |||||
```haskell | |||||
(>>=) :: m a -> (a -> m b) -> m b | |||||
``` | |||||
That means we need to extract a value from the Parser monad and apply it to the | |||||
given function, producing a new Parser. | |||||
```haskell | |||||
(Parser p) >>= f = Parser go where | |||||
go s = case p s of | |||||
Left e -> Left e | |||||
Right (x, s') -> parse (f x) s' | |||||
``` | |||||
While some people think that the `fail`{.haskell} is not supposed to be in the | |||||
Monad typeclass, we do need an implementation for when pattern matching fails. | |||||
It is also convenient to use `fail`{.haskell} for the parsing action that | |||||
returns an error with a given message. | |||||
```haskell | |||||
fail m = Parser $ \_ -> Left m | |||||
``` | |||||
--- | |||||
We now have a `Parser`{.haskell} monad, that expresses a parsing action. But, a | |||||
parser library is no good when actual parsing is made harder than easier. To | |||||
make parsing easier, we define _combinators_, functions that modify a parser in | |||||
one way or another. | |||||
But first, we should get some parsing functions. | |||||
### any, satisfying | |||||
`any` is the parsing action that pops a character off the stream and returns | |||||
that. It does no further parsing at all. | |||||
```haskell | |||||
any :: Parser Char | |||||
any = Parser go where | |||||
go [] = Left "any: end of file" | |||||
go (x:xs) = Right (x,xs) | |||||
``` | |||||
`satisfying` tests the parsed value against a function of type `Char -> | |||||
Bool`{.haskell} before deciding if it's successful or a failure. | |||||
```haskell | |||||
satisfy :: (Char -> Bool) -> Parser Char | |||||
satisfy f = d | |||||
x <- any | |||||
if f x | |||||
then return x | |||||
else fail "satisfy: does not satisfy" | |||||
``` | |||||
We use the `fail`{.haskell} function defined above to represent failure. | |||||
### `oneOf`, `char` | |||||
These functions are defined in terms of `satisfying`, and parse individual | |||||
characters. | |||||
```haskell | |||||
char :: Char -> Parser Char | |||||
char c = satisfy (c ==) <?> "char: expected literal " ++ [c] | |||||
oneOf :: String -> Parser Char | |||||
oneOf s = satisfy (`elem` s) <?> "oneOf: expected one of '" ++ s ++ "'" | |||||
``` | |||||
### `string` | |||||
This parser parses a sequence of characters, in order. | |||||
```haskell | |||||
string :: String -> Parser String | |||||
string [] = return [] | |||||
string (x:xs) = do | |||||
char x | |||||
string xs | |||||
return $ x:xs | |||||
``` | |||||
--- | |||||
And that's it! In a few hundred lines, we have built a working parser combinator | |||||
library with Functor, Applicative, Alternative, and Monad instances. While it's | |||||
not as complex or featureful as Parsec in any way, it is powerful enough to | |||||
define grammars for simple languages. | |||||
[A transcription](/static/Parser.hs) ([with syntax | |||||
highlighting](/static/Parser.hs.html)) of this file is available as runnable | |||||
Haskell. The transcription also features some extra combinators for use. |
@ -0,0 +1,332 @@ | |||||
--- | |||||
title: Dependent types in Haskell - Sort of | |||||
date: August 23, 2016 | |||||
synopsys: 2 | |||||
--- | |||||
**Warning**: An intermediate level of type-fu is necessary for understanding | |||||
*this post. | |||||
The glorious Glasgow Haskell Compilation system, since around version 6.10 has | |||||
had support for indexed type familes, which let us represent functional | |||||
relationships between types. Since around version 7, it has also supported | |||||
datatype-kind promotion, which lifts arbitrary data declarations to types. Since | |||||
version 8, it has supported an extension called `TypeInType`, which unifies the | |||||
kind and type level. | |||||
With this in mind, we can implement the classical dependently-typed example: | |||||
Length-indexed lists, also called `Vectors`{.haskell}. | |||||
---- | |||||
> {-# LANGUAGE TypeInType #-} | |||||
`TypeInType` also implies `DataKinds`, which enables datatype promotion, and | |||||
`PolyKinds`, which enables kind polymorphism. | |||||
`TypeOperators` is needed for expressing type-level relationships infixly, and | |||||
`TypeFamilies` actually lets us define these type-level functions. | |||||
> {-# LANGUAGE TypeOperators #-} | |||||
> {-# LANGUAGE TypeFamilies #-} | |||||
Since these are not simple-kinded types, we'll need a way to set their kind | |||||
signatures[^kind] explicitly. We'll also need Generalized Algebraic Data Types | |||||
(or GADTs, for short) for defining these types. | |||||
> {-# LANGUAGE KindSignatures #-} | |||||
> {-# LANGUAGE GADTs #-} | |||||
Since GADTs which couldn't normally be defined with regular ADT syntax can't | |||||
have deriving clauses, we also need `StandaloneDeriving`. | |||||
> {-# LANGUAGE StandaloneDeriving #-} | |||||
> module Vector where | |||||
> import Data.Kind | |||||
---- | |||||
Natural numbers | |||||
=============== | |||||
We could use the natural numbers (and singletons) implemented in `GHC.TypeLits`, | |||||
but since those are not defined inductively, they're painful to use for our | |||||
purposes. | |||||
Recall the definition of natural numbers proposed by Giuseppe Peano in his | |||||
axioms: **Z**ero is a natural number, and the **s**uccessor of a natural number | |||||
is also a natural number. | |||||
If you noticed the bold characters at the start of the words _zero_ and | |||||
_successor_, you might have already assumed the definition of naturals to be | |||||
given by the following GADT: | |||||
< data Nat where | |||||
< Z :: Nat | |||||
< S :: Nat -> Nat | |||||
This is fine if all you need are natural numbers at the _value_ level, but since | |||||
we'll be parametrising the Vector type with these, they have to exist at the | |||||
type level. The beauty of datatype promotion is that any promoted type will | |||||
exist at both levels: A kind with constructors as its inhabitant types, and a | |||||
type with constructors as its... constructors. | |||||
Since we have TypeInType, this declaration was automatically lifted, but we'll | |||||
use explicit kind signatures for clarity. | |||||
> data Nat :: Type where | |||||
> Z :: Nat | |||||
> S :: Nat -> Nat | |||||
The `Type` kind, imported from `Data.Kind`, is a synonym for the `*` (which will | |||||
eventually replace the latter). | |||||
Vectors | |||||
======= | |||||
Vectors, in dependently-typed languages, are lists that apart from their content | |||||
encode their size along with their type. | |||||
If we assume that lists can not have negative length, and an empty vector has | |||||
length 0, this gives us a nice inductive definition using the natural number | |||||
~~type~~ kind[^kinds] | |||||
> 1. An empty vector of `a` has size `Z`{.haskell}. | |||||
> 2. Adding an element to the front of a vector of `a` and length `n` makes it | |||||
> have length `S n`{.haskell}. | |||||
We'll represent this in Haskell as a datatype with a kind signature of `Nat -> | |||||
Type -> Type` - That is, it takes a natural number (remember, these were | |||||
automatically lifted to kinds), a regular type, and produces a regular type. | |||||
Note that, `->` still means a function at the kind level. | |||||
> data Vector :: Nat -> Type -> Type where | |||||
Or, without use of `Type`, | |||||
< data Vector :: Nat -> * -> * where | |||||
We'll call the empty vector `Nil`{.haskell}. Remember, it has size | |||||
`Z`{.haskell}. | |||||
> Nil :: Vector Z a | |||||
Also note that type variables are implicit in the presence of kind signatures: | |||||
They are assigned names in order of appearance. | |||||
Consing onto a vector, represented by the infix constructor `:|`, sets its | |||||
length to the successor of the existing length, and keeps the type of elements | |||||
intact. | |||||
> (:|) :: a -> Vector x a -> Vector (S x) a | |||||
Since this constructor is infix, we also need a fixidity declaration. For | |||||
consistency with `(:)`, cons for regular lists, we'll make it right-associative | |||||
with a precedence of `5`. | |||||
> infixr 5 :| | |||||
We'll use derived `Show`{.haskell} and `Eq`{.haskell} instances for | |||||
`Vector`{.haskell}, for clarity reasons. While the derived `Eq`{.haskell} is | |||||
fine, one would prefer a nicer `Show`{.haskell} instance for a | |||||
production-quality library. | |||||
> deriving instance Show a => Show (Vector n a) | |||||
> deriving instance Eq a => Eq (Vector n a) | |||||
Slicing up Vectors {#slicing} | |||||
================== | |||||
Now that we have a vector type, we'll start out by implementing the 4 basic | |||||
operations for slicing up lists: `head`, `tail`, `init` and `last`. | |||||
Since we're working with complicated types here, it's best to always use type | |||||
signatures. | |||||
Head and Tail {#head-and-tail} | |||||
------------- | |||||
Head is easy - It takes a vector with length `>1`, and returns its first | |||||
element. This could be represented in two ways. | |||||
< head :: (S Z >= x) ~ True => Vector x a -> a | |||||
This type signature means that, if the type-expression `S Z >= x`{.haskell} | |||||
unifies with the type `True` (remember - datakind promotion at work), then head | |||||
takes a `Vector x a` and returns an `a`. | |||||
There is, however, a much simpler way of doing the above. | |||||
> head :: Vector (S x) a -> a | |||||
That is, head takes a vector whose length is the successor of a natural number | |||||
`x` and returns its first element. | |||||
The implementation is just as concise as the one for lists: | |||||
> head (x :| _) = x | |||||
That's it. That'll type-check and compile. | |||||
Trying, however, to use that function on an empty vector will result in a big | |||||
scary type error: | |||||
```plain | |||||
Vector> Vector.head Nil | |||||
<interactive>:1:13: error: | |||||
• Couldn't match type ‘'Z’ with ‘'S x0’ | |||||
Expected type: Vector ('S x0) a | |||||
Actual type: Vector 'Z a | |||||
• In the first argument of ‘Vector.head’, namely ‘Nil’ | |||||
In the expression: Vector.head Nil | |||||
In an equation for ‘it’: it = Vector.head Nil | |||||
``` | |||||
Simplified, it means that while it was expecting the successor of a natural | |||||
number, it got zero instead. This function is total, unlike the one in | |||||
`Data.List`{.haskell}, which fails on the empty list. | |||||
< head [] = error "Prelude.head: empty list" | |||||
< head (x:_) = x | |||||
Tail is just as easy, except in this case, instead of discarding the predecessor | |||||
of the vector's length, we'll use it as the length of the resulting vector. | |||||
This makes sense, as, logically, getting the tail of a vector removes its first | |||||
length, thus "unwrapping" a level of `S`. | |||||
> tail :: Vector (S x) a -> Vector x a | |||||
> tail (_ :| xs) = xs | |||||
Notice how neither of these have a base case for empty vectors. In fact, adding | |||||
one will not typecheck (with the same type of error - Can't unify `Z`{.haskell} | |||||
with `S x`{.haskell}, no matter how hard you try.) | |||||
Init {#init} | |||||
---- | |||||
What does it mean to take the initial of an empty vector? That's obviously | |||||
undefined, much like taking the tail of an empty vector. That is, `init` and | |||||
`tail` have the same type signature. | |||||
> init :: Vector (S x) a -> Vector x a | |||||
The `init` of a singleton list is nil. This type-checks, as the list would have | |||||
had length `S Z` (that is - 1), and now has length `Z`. | |||||
> init (x :| Nil) = Nil | |||||
To take the init of a vector with more than one element, all we do is recur on | |||||
the tail of the list. | |||||
> init (x :| y :| ys) = x :| Vector.init (y :| ys) | |||||
That pattern is a bit weird - it's logically equivalent to `(x :| | |||||
xs)`{.haskell}. But, for some reason, that doesn't make the typechecker happy, | |||||
so we use the long form. | |||||
Last {#last} | |||||
---- | |||||
Last can, much like the list version, be implemented in terms of a left fold. | |||||
The type signature is like the one for head, and the fold is the same as that | |||||
for lists. The foldable instance for vectors is given [here](#Foldable). | |||||
> last :: Vector (S x) a -> a | |||||
> last = foldl (\_ x -> x) impossible where | |||||
Wait - what's `impossible`? Since this is a fold, we do still need an initial | |||||
element - We could use a pointful fold with the head as the starting point, but | |||||
I feel like this helps us to understand the power of dependently-typed vectors: | |||||
That error will _never_ happen. Ever. That's why it's `impossible`! | |||||
> impossible = error "Type checker, you have failed me!" | |||||
That's it for the basic vector operations. We can now slice a vector anywhere | |||||
that makes sense - Though, there's one thing missing: `uncons`. | |||||
Uncons {#uncons} | |||||
------ | |||||
Uncons splits a list (here, a vector) into a pair of first element and rest. | |||||
With lists, this is generally implemented as returning a `Maybe`{.haskell} type, | |||||
but since we can encode the type of a vector in it's type, there's no need for | |||||
that here. | |||||
> uncons :: Vector (S x) a -> (a, Vector x a) | |||||
> uncons (x :| xs) = (x, xs) | |||||
Mapping over Vectors {#functor} | |||||
==================== | |||||
We'd like a `map` function that, much like the list equivalent, applies a | |||||
function to all elements of a vector, and returns a vector with the same length. | |||||
This operation should hopefully be homomorphic: That is, it keeps the structure | |||||
of the list intact. | |||||
The `base` package has a typeclass for this kind of morphism, can you guess what | |||||
it is? If you guessed Functor, then you're right! If you didn't, you might | |||||
aswell close the article now - Heavy type-fu inbound, though not right now. | |||||
The functor instance is as simple as can be: | |||||
> instance Functor (Vector x) where | |||||
The fact that functor expects something of kind `* -> *`, we need to give the | |||||
length in the instance head - And since we do that, the type checker guarantees | |||||
that this is, in fact, a homomorphic relationship. | |||||
Mapping over `Nil` just returns `Nil`. | |||||
> f `fmap` Nil = Nil | |||||
Mapping over a list is equivalent to applying the function to the first element, | |||||
then recurring over the tail of the vector. | |||||
> f `fmap` (x :| xs) = f x :| (fmap f xs) | |||||
We didn't really need an instance of Functor, but I think standalone map is | |||||
silly. | |||||
Folding Vectors {#foldable} | |||||
=============== | |||||
The Foldable class head has the same kind signature as the Functor class head: | |||||
`(* -> *) -> Constraint` (where `Constraint` is the kind of type classes), that | |||||
is, it's defined by the class head | |||||
< class Foldable (t :: Type -> Type) where | |||||
So, again, the length is given in the instance head. | |||||
> instance Foldable (Vector x) where | |||||
> foldr f z Nil = z | |||||
> foldr f z (x :| xs) = f x $ foldr f z xs | |||||
This is _exactly_ the Foldable instance for `[a]`, except the constructors are | |||||
different. Hopefully, by now you've noticed that Vectors have the same | |||||
expressive power as lists, but with more safety enforced by the type checker. | |||||
Conclusion | |||||
========== | |||||
Two thousand words in, we have an implementation of functorial, foldable vectors | |||||
with implementations of `head`, `tail`, `init`, `last` and `uncons`. Since | |||||
going further (implementing `++`, since a Monoid instance is impossible) would | |||||
require implementing closed type familes, we'll leave that for next time. | |||||
Next time, we'll tackle the implementation of `drop`, `take`, `index` (`!!`, but | |||||
for vectors), `append`, `length`, and many other useful list functions. | |||||
Eventually, you'd want an implementation of all functions in `Data.List`. We | |||||
shall tackle `filter` in a later issue. | |||||
[^kind]: You can read about [Kind polymorphism and | |||||
Type-in-Type](https://downloads.haskell.org/~ghc/latest/docs/html/users_guide/glasgow_exts.html#kind-polymorphism-and-type-in-type) | |||||
in the GHC manual. | |||||
[^kinds]: The TypeInType extension unifies the type and kind level, but this | |||||
article still uses the word `kind` throughout. This is because it's easier to | |||||
reason about types, datatype promotion and type familes if you have separate | |||||
type and kind levels. |
@ -0,0 +1,173 @@ | |||||
--- | |||||
title: Monadic Parsing with User State | |||||
date: August 26, 2016 | |||||
synopsys: 2 | |||||
--- | |||||
> {-# LANGUAGE FlexibleInstances, MultiParamTypeClasses #-} | |||||
> module StatefulParsing where | |||||
> import Control.Monad.State.Class | |||||
> import Control.Applicative | |||||
In this post I propose an extension to the monadic parser framework | |||||
introduced in a previous post, _[You could have invented | |||||
Parsec](/posts/2016-08-17.html)_, that extends | |||||
the parser to also support embedded user state in your parsing. | |||||
This could be used, for example, for parsing a language with | |||||
user-extensible operators: The precedences and fixidities of operators | |||||
would be kept in a hashmap threaded along the bind chain. | |||||
Instead of posing these changes as diffs, we will rewrite the parser | |||||
framework from scratch with the updated type. | |||||
--- | |||||
Parser `newtype`{.haskell} | |||||
========================= | |||||
Our new parser is polymorphic in both the return type and the user state | |||||
that, so we have to update the `newtype`{.haskell} declaration to match. | |||||
> newtype Parser state result | |||||
> = Parser { runParser :: String | |||||
> -> state | |||||
> -> Either String (result, state, String) } | |||||
Our tuple now contains the result of the parsing operation and the new | |||||
user state, along with the stream. We still need to supply a stream to | |||||
parse, and now also supply the initial state. This will be reflected in | |||||
our functions. | |||||
For convenience, we also make a `Parser' a`{.haskell} type alias for | |||||
parsers with no user state. | |||||
< type Parser' a = Parser () a | |||||
Seeing as type constructors are also curried, we can apply η-reduction | |||||
to get the following, which is what we'll go | |||||
with. | |||||
> type Parser' = Parser () | |||||
`Functor`{.haskell} instance | |||||
============================ | |||||
> instance Functor (Parser st) where | |||||
The functor instance remains mostly the same, except now we have to | |||||
thread the user state around, too. | |||||
The instance head also changes to fit the kind signature of the | |||||
`Functor`{.haskell} typeclass. Since user state can not change from | |||||
fmapping, this is fine. | |||||
> fn `fmap` (Parser p) = Parser go where | |||||
> go st us = case p st us of | |||||
> Left e -> Left e | |||||
> Right (r, us', st') -> Right (fn r, us', st') | |||||
As you can see, the new user state (`us'`) is just returned as is. | |||||
`Applicative`{.haskell} instance | |||||
================================ | |||||
> instance Applicative (Parser st) where | |||||
The new implementations of `pure`{.haskell} and `<*>`{.haskell} need to | |||||
correctly manipulate the user state. In the case of `pure`, it's just passed | |||||
as-is to the `Right`{.haskell} constructor. | |||||
> pure ret = Parser go where | |||||
> go st us = Right (ret, us, st) | |||||
Since `(<*>)` needs to evaluate both sides before applying the function, we need | |||||
to pass the right-hand side's generated user state to the right-hand side for | |||||
evaluation. | |||||
> (Parser f) <*> (Parser v) = Parser go where | |||||
> go st us = case f st us of | |||||
> Left e -> Left e | |||||
> Right (fn, us', st') -> case v st' us' of | |||||
> Left e -> Left e | |||||
> Right (vl, us'', st'') -> Right (fn vl, us'', st'') | |||||
`Monad`{.haskell} instance | |||||
========================== | |||||
> instance Monad (Parser st) where | |||||
Since we already have an implementation of `pure`{.haskell} from the Applicative | |||||
instance, we don't need to worry about an implementation of `return`. | |||||
> return = pure | |||||
The monad instance is much like the existing monad instance, except now we have | |||||
to give the updated parser state to the new computation. | |||||
> (Parser p) >>= f = Parser go where | |||||
> go s u = case p s u of | |||||
> Left e -> Left e | |||||
> Right (x, u', s') -> runParser (f x) s' u' | |||||
`MonadState`{.haskell} instance | |||||
=============================== | |||||
> instance MonadState st (Parser st) where | |||||
Since we now have a state transformer in the parser, we can make it an instance | |||||
of the MTL's `MonadState` class. | |||||
The implementation of `put`{.haskell} must return `()` (the unit value), and | |||||
needs to replace the existing state with the supplied one. This operation can | |||||
not fail. | |||||
Since this is a parsing framework, we also need to define how the stream is | |||||
going to be affected: In this case, it isn't. | |||||
> put us' = Parser go where | |||||
> go st _ = Right ((), us', st) | |||||
The `get`{.haskell} function returns the current user state, and leaves it | |||||
untouched. This operation also does not fail. | |||||
> get = Parser go where | |||||
> go st us = Right (us, us, st) | |||||
Since we're an instance of `MonadState`{.haskell}, we needn't an implementation | |||||
of `modify` and friends - They're given by the MTL. | |||||
`Alternative`{.haskell} instance | |||||
================================ | |||||
> instance Alternative (Parser st) where | |||||
The `Alternative`{.haskell} instance uses the same state as it was given for | |||||
trying the next parse. | |||||
The `empty`{.haskell} parser just fails unconditionally. | |||||
> empty = Parser go where | |||||
> go _ _ = Left "empty parser" | |||||
`(<|>)` will try both parsers in order, reusing both the state and the stream. | |||||
> (Parser p) <|> (Parser q) = Parser go where | |||||
> go st us = case p st us of | |||||
> Left e -> q st us | |||||
> Right v -> Right v | |||||
Conclusion | |||||
========== | |||||
This was a relatively short post. This is because many of the convenience | |||||
functions defined in the previous post also work with this parser framework, if | |||||
you replace `Parser` with `Parser'`. You can now use `get`, `put` and `modify` | |||||
to work on the parser's user state. As a closing note, a convenience function | |||||
for running parsers with no state is given. | |||||
> parse :: Parser' a -> String -> Either String a | |||||
> parse str = case runParser str () of | |||||
> Left e -> Left e | |||||
> Right (x, _, _) -> x |
@ -0,0 +1,155 @@ | |||||
--- | |||||
title: Delimited Continuations, Urn and Lua | |||||
date: August 1, 2017 | |||||
--- | |||||
As some of you might know, [Urn](https://squiddev.github.io/urn) is my | |||||
current pet project. This means that any potential upcoming blag posts | |||||
are going to involve it in some way or another, and that includes this | |||||
one. For the uninitiated, Urn is a programming language which compiles | |||||
to Lua[^1], in the Lisp tradition, with no clear ascendance: We take | |||||
inspiration from several Lisps, most notably Common Lisp and Scheme. | |||||
As functional programmers at heart, we claim to be minimalists: Urn is | |||||
reduced to 12 core forms before compilation, with some of those being | |||||
redundant (e.g. having separate `define` and `define-macro` builtins | |||||
instead of indicating that the definition is a macro through | |||||
a parameter). On top of these primitives we build all our abstraction, | |||||
and one such abstraction is what this post is about: _Delimited | |||||
continuations_. | |||||
Delimited continuations are a powerful control abstraction first | |||||
introduced by Matthias Felleisein[^2], initially meant as | |||||
a generalisation of several other control primitives such as | |||||
`call-with-current-continuation`{.scheme} from Scheme among others. | |||||
However, whereas `call-with-current-continuation`{.scheme} captures | |||||
a continuation representing the state of the entire program after that | |||||
point, delimited continuations only reify a slice of program state. In | |||||
this, they are cheaper to build and invoke, and as such may be used to | |||||
implement e.g. lightweight threading primitives. | |||||
While this may sound rather limiting, there are very few constructs that | |||||
can simultaneously be implemented with | |||||
`call-with-current-continuation`{.scheme} without also being expressible | |||||
in terms of delimited continuations. The converse, however, is untrue. | |||||
While `call/cc`{.scheme} be used to implement any control abstraction, | |||||
it can't implement any _two_ control abstractions: the continuations it | |||||
reifies are uncomposable[^3]. | |||||
### Delimited Continuations in Urn | |||||
Our implementation of delimited continuations follows the Guile Scheme | |||||
tradition of two functions `call-with-prompt` and `abort-to-prompt`, | |||||
which are semantically equivalent to the more traditional | |||||
`shift`/`reset`. This is, however, merely an implementation detail, as | |||||
both schemes are available. | |||||
We have decided to base our implementation on Lua's existing coroutine | |||||
machinery instead of implementing an ad-hoc solution especially for Urn. | |||||
This lets us reuse and integrate with existing Lua code, which is one of | |||||
the goals for the language. | |||||
`call-with-prompt` is used to introduce a _prompt_ into scope, which | |||||
delimits a frame execution and sets up an abort handler with the | |||||
specified tag. Later on, calls to `abort-to-prompt` reify the rest of | |||||
the program slice's state and jump into the handler set up. | |||||
```lisp | |||||
(call/p 'a-prompt-tag | |||||
(lambda () | |||||
; code to run with the prompt | |||||
) | |||||
(lambda (k) | |||||
; abort handler | |||||
)) | |||||
``` | |||||
One limitation of the current implementation is that the continuation, | |||||
when invoked, will no longer have the prompt in scope. A simple way to | |||||
get around this is to store the prompt tag and handler in values and use | |||||
`call/p`[^4] again instead of directly calling the continuation. | |||||
Unfortunately, being implemented on top of Lua coroutines does bring one | |||||
significant disadvantage: The reified continuations are single-use. | |||||
After a continuation has reached the end of its control frame, there's | |||||
no way to make it go back, and there's no way to copy continuations | |||||
either (while we have a wrapper around coroutines, the coroutines | |||||
themselves are opaque objects, and there's no equivalent of | |||||
`string.dump`{.lua} to, for instance, decompile and recompile them) | |||||
### Why? | |||||
In my opinion (which, like it or not, is the opinion of the Urn team), | |||||
Guile-style delimited continuations provide a much better abstraction | |||||
than operating with Lua coroutines directly, which may be error prone | |||||
and feels out of place in a functional-first programming language. | |||||
As a final motivating example, below is an in-depth explanation of | |||||
a tiny cooperative task scheduler. | |||||
```lisp | |||||
(defun run-tasks (&tasks) ; 1 | |||||
(loop [(queue tasks)] ; 2 | |||||
[(empty? queue)] ; 2 | |||||
(call/p 'task (car queue) | |||||
(lambda (k) | |||||
(when (alive? k) | |||||
(push-cdr! queue k)))) ; 3 | |||||
(recur (cdr queue)))) ; 4 | |||||
``` | |||||
1. We begin, of course, by defining our function. As inputs, we take | |||||
a list of tasks to run, which are generally functions, but may be Lua | |||||
coroutines (`threads`) or existing continuations, too. As a sidenote, | |||||
in Urn, variadic arguments have `&` prepended to them, instead of | |||||
having symbols beginning of `&` acting as modifiers in a lambda-list. | |||||
For clarity, that is wholly equivalent to `(defun run-tasks (&rest | |||||
tasks)`{.lisp}. | |||||
2. Then, we take the first element of the queue as the current task to | |||||
run, and set up a prompt of execution. The task will run until it | |||||
hits an `abort-to-prompt`, at which point it will be interrupted and | |||||
the handler will be invoked. | |||||
3. The handler inspects the reified continuation to see if it is | |||||
suitable for being scheduled again, and if so, pushes it to the end | |||||
of the queue. This means it'll be the first task to execute again | |||||
when the scheduler is done with the current set of working tasks. | |||||
4. We loop back to the start with the first element (the task we just | |||||
executed) removed. | |||||
Believe it or not, the above is a fully functioning cooperative | |||||
scheduler that can execute any number of tasks.[^5] | |||||
### Conclusion | |||||
I think that the addition of delimited continuations to Urn brings | |||||
a much needer change in the direction of the project: Moving away from | |||||
ad-hoc abstraction to structured, proven abstraction. Hopefully this is | |||||
the first of many to come. | |||||
[^1]: Though this might come off as a weird decision to some, there is | |||||
a logical reason behind it: Urn was initially meant to be used in the | |||||
[ComputerCraft](https://computercraft.info) mod for Minecraft, which | |||||
uses the Lua programming language, though the language has outgrown it | |||||
by now. For example, the experimental `readline` support is being | |||||
implemented with the LuaJIT foreign function interface. | |||||
[^2]: [The Theory and Practice of First-Class | |||||
Prompts](http://www.cs.tufts.edu/~nr/cs257/archive/matthias-felleisen/prompts.pdf). | |||||
[^3]: Oleg Kiselyov demonstrates | |||||
[here](http://okmij.org/ftp/continuations/against-callcc.html#traps) | |||||
that abstractions built on `call/cc`{.scheme} do not compose. | |||||
[^4]: `call-with-prompt` is a bit of a mouthful, so the alias `call/p` | |||||
is blessed. | |||||
[^5]: There's a working example [here](/static/tasks.lisp) ([with syntax | |||||
highlighting](/static/tasks.lisp.html)) as runnable Urn. Clone the | |||||
compiler then execute `lua bin/urn.lua --run tasks.lisp`. | |||||
<!-- vim: tw=72 | |||||
--> |
@ -0,0 +1,231 @@ | |||||
--- | |||||
title: The Urn Pattern Matching Library | |||||
date: August 2, 2017 | |||||
--- | |||||
Efficient compilation of pattern matching is not exactly an open problem | |||||
in computer science in the same way that implementing say, type systems, | |||||
might be, but it's still definitely possible to see a lot of mysticism | |||||
surrounding it. | |||||
In this post I hope to clear up some misconceptions regarding the | |||||
implementation of pattern matching by demonstrating one such | |||||
implementation. Do note that our pattern matching engine is strictly | |||||
_linear_, in that pattern variables may only appear once in the match | |||||
head. This is unlike other languages, such as Prolog, in which variables | |||||
appearing more than once in the pattern are unified together. | |||||
### Structure of a Pattern Match | |||||
Pattern matching always involves a pattern (the _match head_, as we call | |||||
it) and a value to be compared against that pattern, the _matchee_. | |||||
Sometimes, however, a pattern match will also include a body, to be | |||||
evaluated in case the pattern does match. | |||||
```lisp | |||||
(case 'some-value ; matchee | |||||
[some-pattern ; match head | |||||
(print! "some body")]) ; match body | |||||
``` | |||||
As a side note, keep in mind that `case`{.lisp} has linear lookup of | |||||
match bodies. Though logarithmic or constant-time lookup might be | |||||
possible, it is left as an exercise for the reader. | |||||
### Compiling Patterns | |||||
To simplify the task of compiling patterns to an intermade form without | |||||
them we divide their compilation into two big steps: compiling the | |||||
pattern's test and compiling the pattern's bindings. We do so | |||||
_inductively_ - there are a few elementary pattern forms on which the | |||||
more complicated ones are built upon. | |||||
Most of these elementary forms are very simple, but two are the | |||||
simplest: _atomic forms_ and _pattern variables_. An atomic form is the | |||||
pattern correspondent of a self-evaluating form in Lisp: a string, an | |||||
integer, a symbol. We compare these for pointer equality. Pattern | |||||
variables represent unknowns in the structure of the data, and a way to | |||||
capture these unknowns. | |||||
+------------------+----------+-------------+ | |||||
| Pattern | Test | Bindings | | |||||
+:=================+:=========+:============+ | |||||
| Atomic form | Equality | Nothing | | |||||
+------------------+----------+-------------+ | |||||
| Pattern variable | Nothing | The matchee | | |||||
+------------------+----------+-------------+ | |||||
All compilation forms take as input the pattern to compile along with | |||||
a symbol representing the matchee. Patterns which involve other patterns | |||||
(for instance, lists, conses) will call the appropriate compilation | |||||
forms with the symbol modified to refer to the appropriate component of | |||||
the matchee. | |||||
Let's quickly have a look at compiling these elementary patterns before | |||||
looking at the more interesting ones. | |||||
```lisp | |||||
(defun atomic-pattern-test (pat sym) | |||||
`(= ,pat ,sym)) | |||||
(defun atomic-pattern-bindings (pat sym) | |||||
'()) | |||||
``` | |||||
Atomic forms are the simplest to compile - we merely test that the | |||||
symbol's value is equal (with `=`, which compares identities, instead of | |||||
with `eq?` which checks for equivalence - more complicated checks, such | |||||
as handling list equality, need not be handled by the equality function | |||||
as we handle them in the pattern matching library itself) and emit no | |||||
bindings. | |||||
```lisp | |||||
(defun variable-pattern-test (pat sym) | |||||
`true) | |||||
(defun variable-pattern-bindings (pat sym) | |||||
(list `(,pat ,sym))) | |||||
``` | |||||
The converse is true for pattern variables, which have no test and bind | |||||
themselves. The returned bindings are in association list format, and | |||||
the top-level macro that users invoke will collect these and them bind | |||||
them with `let*`{.lisp}. | |||||
Composite forms are a bit more interesting: These include list patterns | |||||
and cons patterns, for instance, and we'll look at implementing both. | |||||
Let's start with list patterns. | |||||
To determine if a list matches a pattern we need to test for several | |||||
things: | |||||
1. First, we need to test if it actually is a list at all! | |||||
2. The length of the list is also tested, to see if it matches the length | |||||
of the elements stated in the pattern | |||||
3. We check every element of the list against the corresponding elements | |||||
of the pattern | |||||
With the requirements down, here's the implementation. | |||||
```lisp | |||||
(defun list-pattern-test (pat sym) | |||||
`(and (list? ,sym) ; 1 | |||||
(= (n ,sym) ,(n pat)) ; 2 | |||||
,@(map (lambda (index) ; 3 | |||||
(pattern-test (nth pat index) `(nth ,sym ,index))) | |||||
(range :from 1 :to (n pat))))) | |||||
``` | |||||
To test for the third requirement, we call a generic dispatch function | |||||
(which is trivial, and thus has been inlined) to compile the $n$th pattern | |||||
in the list against the $n$th element of the actual list. | |||||
List pattern bindings are similarly easy: | |||||
```lisp | |||||
(defun list-pattern-bindings (pat sym) | |||||
(flat-map (lambda (index) | |||||
(pattern-bindings (nth pat index) `(nth ,sym ,index))) | |||||
(range :from 1 :to (n pat)))) | |||||
``` | |||||
Compiling cons patterns is similarly easy if your Lisp is proper: We | |||||
only need to check for `cons`{.lisp}-ness (or `list`{.lisp}-ness, less | |||||
generally), then match the given patterns against the car and the cdr. | |||||
```lisp | |||||
(defun cons-pattern-test (pat sym) | |||||
`(and (list? ,sym) | |||||
,(pattern-test (cadr pat) `(car ,sym)) | |||||
,(pattern-test (caddr pat) `(cdr ,sym)))) | |||||
(defun cons-pattern-bindings (pat sym) | |||||
(append (pattern-bindings (cadr pat) `(car ,sym)) | |||||
(pattern-bindings (caddr pat) `(cdr ,sym)))) | |||||
``` | |||||
Note that, in Urn, `cons` patterns have the more general form `(pats* | |||||
. pat)` (using the asterisk with the usual meaning of asterisk), and can | |||||
match any number of elements in the head. It is also less efficient than | |||||
expected, due to the nature of `cdr` copying the list's tail. (Our lists | |||||
are not linked - rather, they are implemented over Lua arrays, and as | |||||
such, removing the first element is rather inefficient.) | |||||
### Using patterns | |||||
Now that we can compile a wide assortment of patterns, we need a way to | |||||
actually use them to scrutinize data. For this, we implement two forms: | |||||
an improved version of `destructuring-bind`{.lisp} and `case`{.lisp}. | |||||
Implementing `destructuring-bind`{.lisp} is simple: We only have | |||||
a single pattern to test against, and thus no search is nescessary. We | |||||
simply generate the pattern test and the appropriate bindings, and | |||||
generate an error if the pattern does not mind. Generating a friendly | |||||
error message is similarly left as an exercise for the reader. | |||||
Note that as a well-behaving macro, destructuring bind will not evaluate | |||||
the given variable more than once. It does this by binding it to | |||||
a temporary name and scrutinizing that name instead. | |||||
```lisp | |||||
(defmacro destructuring-bind (pat var &body) | |||||
(let* [(variable (gensym 'var)) | |||||
(test (pattern-test pat variable)) | |||||
(bindings (pattern-bindings pat variable))] | |||||
`(with (,variable ,var) | |||||
(if ,test | |||||
(progn ,@body) | |||||
(error! "pattern matching failure"))))) | |||||
``` | |||||
Implementing case is a bit more difficult in a language without | |||||
`cond`{.lisp}, since the linear structure of a pattern-matching case | |||||
statement would have to be transformed into a tree of `if`-`else` | |||||
combinations. Fortunately, this is not our case (pun intended, | |||||
definitely.) | |||||
```lisp | |||||
(defmacro case (var &cases) | |||||
(let* [(variable (gensym 'variable))] | |||||
`(with (,variable ,var) | |||||
(cond ,@(map (lambda (c) | |||||
`(,(pattern-test (car c) variable) | |||||
(let* ,(pattern-bindings (car c) variable) | |||||
,@(cdr c)))) | |||||
cases))))) | |||||
``` | |||||
Again, we prevent reevaluation of the matchee by binding it to | |||||
a temporary symbol. This is especially important in an impure, | |||||
expression-oriented language as evaluating the matchee might have side | |||||
effects! Consider the following contrived example: | |||||
```lisp | |||||
(case (progn (print! "foo") | |||||
123) | |||||
[1 (print! "it is one")] | |||||
[2 (print! "it is two")] | |||||
[_ (print! "it is neither")]) ; _ represents a wild card pattern. | |||||
``` | |||||
If the matchee wasn't bound to a temporary value, `"foo"` would be | |||||
printed thrice in this example. Both the toy implementation presented | |||||
here and the implementation in the Urn standard library will only | |||||
evaluate matchees once, thus preventing effect duplication. | |||||
### Conclusion | |||||
Unlike previous blog posts, this one isn't runnable Urn. If you're | |||||
interested, I recommend checking out [the actual | |||||
implementation](https://gitlab.com/urn/urn/blob/master/lib/match.lisp). | |||||
It gets a bit hairy at times, particularly with handling of structure | |||||
patterns (which match Lua tables), but it's similar enough to the above | |||||
that this post should serve as a vague map of how to read it. | |||||
In a bit of a meta-statement I want to point out that this is the first | |||||
(second, technically!) of a series of posts detailing the interesting | |||||
internals of the Urn standard library: It fixes two things in the sorely | |||||
lacking category: content in this blag, and standard library | |||||
documentation. | |||||
Hopefully this series is as nice to read as it is for me to write, and | |||||
here's hoping I don't forget about this blag for a year again. |
@ -0,0 +1,276 @@ | |||||
--- | |||||
title: Optimisation through Constraint Propagation | |||||
date: August 06, 2017 | |||||
--- | |||||
Constraint propagation is a new optimisation proposed for implementation | |||||
in the Urn compiler[^mr]. It is a variation on the idea of | |||||
flow-sensitive typing in that it is not applied to increasing program | |||||
safety, rather being used to improve _speed_. | |||||
### Motivation | |||||
The Urn compiler is decently fast for being implemented in Lua. | |||||
Currently, it manages to compile itself (and a decent chunk of the | |||||
standard library) in about 4.5 seconds (when using LuaJIT; When using | |||||
the lua.org interpreter, this time roughly doubles). Looking at | |||||
a call-stack profile of the compiler, we notice a very interesting data | |||||
point: about 11% of compiler runtime is spent in the `(type)` function. | |||||
There are two ways to fix this: Either we introduce a type system (which | |||||
is insanely hard to do for a language as dynamic as Urn - or Lisp in | |||||
general) or we reduce the number of calls to `(type)` by means of | |||||
optimisation. Our current plan is to do the latter. | |||||
### How | |||||
The proposed solution is to collect all the branches that the program | |||||
has taken to end up in the state it currently is. Thus, every branch | |||||
grows the set of "constraints" - the predicates which have been invoked | |||||
to get the program here. | |||||
Most useful predicates involve a variable: Checking if it is or isn't | |||||
nil, if is positive or negative, even or odd, a list or a string, and | |||||
etc. However, when talking about a single variable, this test only has | |||||
to be performed _once_ (in general - mutating the variable invalidates | |||||
the set of collected constraints), and their truthiness can be kept, by | |||||
the compiler, for later use. | |||||
As an example, consider the following code. It has three branches, all | |||||
of which imply something different about the type of the variable `x`. | |||||
```lisp | |||||
(cond | |||||
[(list? x)] ; first case | |||||
[(string? x)] ; second case | |||||
[(number? x)]) ; third case | |||||
``` | |||||
If, in the first case, the program then evaluated `(car x)`, it'd end up | |||||
doing a redundant type check. `(car)`, is, in the standard library, | |||||
implemented like so: | |||||
```lisp | |||||
(defun car (x) | |||||
(assert-type! x list) | |||||
(.> x 0)) | |||||
``` | |||||
`assert-type!` is merely a macro to make checking the types of arguments | |||||
more convenient. Let's make the example of branching code a bit more | |||||
complicated by making it take and print the `car` of the list. | |||||
```lisp | |||||
(cond | |||||
[(list? x) | |||||
(print! (car x))]) | |||||
; other branches elided for clarity | |||||
``` | |||||
To see how constraint propagation would aid the runtime performance of | |||||
this code, let's play optimiser for a bit, and see what this code would | |||||
end up looking like at each step. | |||||
First, `(car x)` is inlined. | |||||
```lisp | |||||
(cond | |||||
[(list? x) | |||||
(print! (progn (assert-type! x list) | |||||
(.> x 0)))]) | |||||
``` | |||||
`assert-type!` is expanded, and the problem becomes apparent: the type | |||||
of `x` is being computed _twice_! | |||||
```lisp | |||||
(cond | |||||
[(list? x) | |||||
(print! (progn (if (! (list? x)) | |||||
(error! "the argument x is not a list")) | |||||
(.> x 0)))]) | |||||
``` | |||||
If the compiler had constraint propagation (and the associated code | |||||
motions), this code could be simplified further. | |||||
```lisp | |||||
(cond | |||||
[(list? x) | |||||
(print! (.> x 0))]) | |||||
``` | |||||
Seeing as we already know that `(list? x)` is true, we don't need to | |||||
test anymore, and the conditional can be entirely eliminated. Figuring | |||||
out `(! (list? x))` from `(list? x)` is entirely trivial constant | |||||
folding (the compiler already does it) | |||||
This code is optimal. The `(list? x)` test can't be eliminated because | |||||
nothing else is known about `x`. If its value were statically known, the | |||||
compiler could eliminate the branch and invocation of `(car x)` | |||||
completely by constant propagation and folding (`(car)` is, type | |||||
assertion notwithstanding, a pure function - it returns the same results | |||||
for the same inputs. Thus, it is safe to execute at compile time) | |||||
### How, exactly | |||||
In this section I'm going to outline a very simple implementation of the | |||||
constraint propagation algorithm to be employed in the Urn compiler. | |||||
It'll work on a simple Lisp with no quoting or macros (thus, basically | |||||
the lambda calculus). | |||||
```lisp | |||||
(lambda (var1 var2) exp) ; λ-abstraction | |||||
(foo bar baz) ; procedure application | |||||
var ; variable reference | |||||
(list x y z) ; list | |||||
t, nil ; boolean | |||||
(cond [t1 b1] [t2 b2]) ; conditional | |||||
``` | |||||
The language has very simple semantics. It has three kinds of values | |||||
(closures, lists and booleans), and only a couple reduction rules. The | |||||
evaluation rules are presented as an interpretation function (in Urn, | |||||
not the language itself). | |||||
```lisp | |||||
(defun interpret (x env) | |||||
(case x | |||||
[(lambda ?params . ?body) | |||||
`(:closure ,params ,body ,(copy env))] ; 1 | |||||
[(list . ?xs) | |||||
(map (cut interpret <> env) xs)] ; 2 | |||||
[t true] [nil false] ; 3 | |||||
[(cond . ?alts) ; 4 | |||||
(interpret | |||||
(block (map (lambda (alt) | |||||
(when (interpret (car alt) env) | |||||
(break (cdr alt)))))) | |||||
env)] | |||||
[(?fn . ?args) | |||||
(case (eval fn env) | |||||
[(:closure ?params ?body ?cl-env) ; 5 | |||||
(map (lambda (a k) | |||||
(.<! cl-env (symbol->string a) (interpret k env))) | |||||
params args) | |||||
(last (map (cut interpret <> env) body))] | |||||
[_ (error! $"not a procedure: ${fn}")])] | |||||
[else (.> env (symbol->string x))])) | |||||
``` | |||||
1. In the case the expression currently being evaluated is a lambda, we | |||||
make a copy of the current environment and store it in a _closure_. | |||||
2. If a list is being evaluated, we recursively evaluate each | |||||
sub-expression and store all of them in a list. | |||||
3. If a boolean is being interpreted, they're mapped to the respective | |||||
values in the host language. | |||||
4. If a conditional is being evaluated, each test is performed in order, | |||||
and we abort to interpret with the corresponding body. | |||||
5. When evaluating a procedure application, the procedure to apply is | |||||
inspected: If it is a closure, we evaluate all the arguments, bind | |||||
them along with the closure environment, and interpret the body. If | |||||
not, an error is thrown. | |||||
Collecting constraints in a language as simple as this is fairly easy, | |||||
so here's an implementation. | |||||
```lisp | |||||
(defun collect-constraints (expr (constrs '())) | |||||
(case expr | |||||
[(lambda ?params . ?body) | |||||
`(:constraints (lambda ,params | |||||
,@(map (cut collect-constraints <> constrs) body)) | |||||
,constrs)] | |||||
``` | |||||
Lambda expressions incur no additional constraints, so the inner | |||||
expressions (namely, the body) receive the old set. | |||||
The same is true for lists: | |||||
```lisp | |||||
[(list . ?xs) | |||||
`(:constraints (list ,@(map (cut collect-constraints <> constrs) xs)) | |||||
,constrs)] | |||||
``` | |||||
Booleans are simpler: | |||||
```lisp | |||||
[t `(:constraints ,'t ,constrs)] | |||||
[nil `(:constraints ,'nil ,constrs)] | |||||
``` | |||||
Since there are no sub-expressions to go through, we only associate the | |||||
constraints with the boolean values. | |||||
Conditionals are where the real work happens. For each case, we add that | |||||
case's test as a constraint in its body. | |||||
```lisp | |||||
[(cond . ?alts) | |||||
`(:constraints | |||||
(cond | |||||
,@(map (lambda (x) | |||||
`(,(collect-constraints (car x) constrs) | |||||
,(collect-constraints (cadr x) (cons (car x) constrs)))) | |||||
alts)) | |||||
,constrs)] | |||||
``` | |||||
Applications are as simple as lists. Note that we make no distinction | |||||
between valid applications and invalid ones, and just tag both. | |||||
```lisp | |||||
[(?fn . ?args) | |||||
`(:constraints | |||||
(,(collect-constraints fn constrs) | |||||
,@(map (cut collect-constraints <> constrs) | |||||
args)) | |||||
,constrs)] | |||||
``` | |||||
References are also straightforward: | |||||
```lisp | |||||
[else `(:constraints ,expr ,constrs)])) | |||||
``` | |||||
That's it! Now, this information can be exploited to select a case | |||||
branch at compile time, and eliminate the overhead of performing the | |||||
test again. | |||||
This is _really_ easy to do in a compiler that already has constant | |||||
folding of alternatives. All we have to do is associate constraints to | |||||
truthy values. For instance: | |||||
```lisp | |||||
(defun fold-on-constraints (x) | |||||
(case x | |||||
[((:constraints ?e ?x) | |||||
:when (known? e x)) | |||||
't] | |||||
[else x])) | |||||
``` | |||||
That's it! We check if the expression is in the set of known | |||||
constraints, and if so, reduce it to true. Then, the constant folding | |||||
code will take care of eliminating the redundant branches. | |||||
### When | |||||
This is a really complicated question. The Urn core language, | |||||
unfortunately, is a tad more complicated, as is the existing optimiser. | |||||
Collecting constraints and eliminating tests would be in completely | |||||
different parts of the compiler. | |||||
There is also a series of code motions that need to be in place for | |||||
constraints to be propagated optimally, especially when panic edges are | |||||
involved. Fortunately, these are all simple to implement, but it's still | |||||
a whole lot of work. | |||||
I don't feel confident setting a specific timeframe for this, but | |||||
I _will_ post more blags on this topic. It's fascinating (for me, at | |||||
least) and will hopefully make the compiler faster! | |||||
[^mr]: The relevant merge request can be found | |||||
[here](https://gitlab.com/urn/urn/issues/27). | |||||
@ -0,0 +1,280 @@ | |||||
--- | |||||
title: Multimethods in Urn | |||||
date: August 15, 2017 | |||||
--- | |||||
`multimethod`, noun. A procedure which decides runtime behaviour based | |||||
on the types of its arguments. | |||||
### Introduction | |||||
At some point, most programming language designers realise that they've | |||||
outgrown the language's original feature set and must somehow expand it. | |||||
Sometimes, this expansion is painless for example, if the language had | |||||
already features in place to facilitate this, such as type classes or | |||||
message passing. | |||||
In our case, however, we had to decide on and implement a performant | |||||
system for extensibility in the standard library, from scratch. For | |||||
a while, Urn was using Lua's scheme for modifying the behaviour of | |||||
standard library functions: metamethods in metatables. For the | |||||
uninitiated, Lua tables can have _meta_-tables attached to modify their | |||||
behaviour with respect to several language features. As an example, the | |||||
metamethod `__add`{.lua} controls how Lua will add two tables. | |||||
However, this was not satisfactory, the most important reason as to why | |||||
being the fact that metamethods are associated with particular object | |||||
_instances_, instead of being associated with the _types_ themselves. | |||||
This meant that all the operations you'd like to modify had to be | |||||
modified in one big go - inside the constructor. Consider the | |||||
constructor for hash-sets as it was implemented before the addition of | |||||
multimethods. | |||||
```lisp | |||||
(defun make-set (hash-function) | |||||
(let* [(hash (or hash-function id))] | |||||
(setmetatable | |||||
{ :tag "set" | |||||
:hash hash | |||||
:data {} } | |||||
{ :--pretty-print | |||||
(lambda (x) | |||||
(.. "«hash-set: " (concat (map pretty (set->list x)) " ") "»")) | |||||
:--compare #| elided for brevity |# }))) | |||||
``` | |||||
That second table, the meta table, is entirely noise. The fact that | |||||
constructors also had to specify behaviour, instead of just data, was | |||||
annoying from a code style point of view and _terrible_ from a reuse | |||||
point of view. Behaviour is closely tied to the implementation - remember | |||||
that metamethods are tied to the _instance_. To extend the behaviour of | |||||
standard library functions (which you can't redefine) for a type you do | |||||
not control (whose constructor you also can not override), you suddenly | |||||
need to wrap the constructor and add your own metamethods. | |||||
### Finding a Solution | |||||
Displeased with the situation as it stood, I set out to discover what | |||||
other Lisps did, and it seemed like the consensus solution was to | |||||
implement open multimethods. And so we did. | |||||
Multimethods - or multiple dispatch in general - is one of the best | |||||
solutions to the expression problem. We can easily add new types, and | |||||
new operations to work on existing types - and most importantly, this | |||||
means touching _no_ existing code. | |||||
Our implementation is, like almost everything in Urn, a combination of | |||||
clever (ab)use of macros, tables and functions. A method is represented | |||||
as a table - more specifically, a n-ary tree of possible cases, with | |||||
a metamethod, `__call`{.lua}, which means multimethods can be called and | |||||
passed around like regular functions - they are first-order. | |||||
Upon calling a multimethod, it'll look up the correct method body to | |||||
call for the given arguments - or the default method, or throw an error, | |||||
if no default method is provided - and tail-call that, with all the | |||||
arguments. | |||||
Before diving into the ridiculously simple implementation, let's look at | |||||
a handful of examples. | |||||
#### Pretty printing | |||||
Pretty printing is, quite possibly, the simplest application of multiple | |||||
dispatch to extensibility. As of | |||||
[`ba289d2d`](https://gitlab.com/urn/urn/commit/ba829d2de30e3b1bef4fa1a22a5e4bbdf243426b), | |||||
the standard library implementation of `pretty` is a multimethod. | |||||
Before, the implementation[^1] would perform a series of type tests and | |||||
decide on the behaviour, including testing if the given object had | |||||
a metatable which overrides the pretty-printing behaviour. | |||||
The new implementation is _significantly_ shorter, so much so that I'm | |||||
comfortable pasting it here. | |||||
```lisp | |||||
(defgeneric pretty (x) | |||||
"Pretty-print a value.") | |||||
``` | |||||
That's it! All of the logic that used to exist is now provided by the | |||||
`defgeneric` macro, and adding support for your types is as simple as | |||||
using `defmethod`.[^2] | |||||
```lisp | |||||
(defmethod (pretty string) (x) | |||||
(format "%q" x)) | |||||
``` | |||||
As another example, let's define - and assume the following are separate | |||||
modules - a new type, and add pretty printing support for that. | |||||
```lisp | |||||
; Module A - A box. | |||||
(defun box (x) | |||||
{ :tag "box" | |||||
:value x }) | |||||
``` | |||||
The Urn function `type` will look for a `tag` element in tables and | |||||
report that as the type if it is present, and that function is what the | |||||
multimethod infrastructure uses to determine the correct body to call. | |||||
This means that all we need to do if we want to add support for | |||||
pretty-printing boxes is use defmethod again! | |||||
```lisp | |||||
(defmethod (pretty box) (x) "🎁") | |||||
``` | |||||
#### Comparison | |||||
A more complicated application of multiple dispatch for extensibility is | |||||
the implementation of the `eq?` method in the standard library. | |||||
Before[^3], based on a series of conditionals, the equality test was | |||||
chosen at runtime. | |||||
Anyone with experience optimising code is wincing at the mere thought of | |||||
this code. | |||||
The new implementation of `eq?` is also comically short - a mere 2 lines | |||||
for the definition, and only a handful of lines for all the previously | |||||
existing cases. | |||||
```lisp | |||||
(defgeneric eq? (x y) | |||||
"Compare values for equality deeply.") | |||||
(defmethod (eq? symbol symbol) (x y) | |||||
(= (get-idx x :contents) (get-idx y :contents))) | |||||
(defmethod (eq? string symbol) (x y) (= x (get-idx y :contents))) | |||||
(defmethod (eq? symbol string) (x y) (= (get-idx x :contents) y)) | |||||
``` | |||||
If we would, as an example, add support for comparing boxes, the | |||||
implementation would similarly be short. | |||||
```lisp | |||||
(defmethod (eq? box box) (x y) | |||||
(= (.> x :value) (.> y :value))) | |||||
``` | |||||
### Implementation | |||||
`defgeneric` and `defmethod` are, quite clearly, macros. However, | |||||
contrary to what one would expect, both their implementations are | |||||
_quite_ simple. | |||||
```lisp | |||||
(defmacro defgeneric (name ll &attrs) | |||||
(let* [(this (gensym 'this)) | |||||
(method (gensym 'method))] | |||||
`(define ,name | |||||
,@attrs | |||||
(setmetatable | |||||
{ :lookup {} } | |||||
{ :__call (lambda (,this ,@ll) | |||||
(let* [(,method (deep-get ,this :lookup ,@(map (lambda (x) | |||||
`(type ,x)) ll)))] | |||||
(unless ,method | |||||
(if (get-idx ,this :default) | |||||
(set! ,method (get-idx ,this :default)) | |||||
(error "elided for brevity"))) | |||||
(,method ,@ll))) })))) | |||||
``` | |||||
Everything `defgeneric` has to do is define a top-level symbol to hold | |||||
the multimethod table, and generate, at compile time, a lookup function | |||||
specialised for the correct number of arguments. In a language without | |||||
macros, multimethod calls would have to - at runtime - loop over the | |||||
provided arguments, take their types, and access the correct elements in | |||||
the table. | |||||
As an example of how generating the lookup function at compile time is | |||||
better for performance, consider the (cleaned up[^4]) lookup function | |||||
generated for the `(eq?)` method defined above. | |||||
```lua | |||||
function(this, x, y) | |||||
local method | |||||
if this.lookup then | |||||
local temp1 = this.lookup[type(x)] | |||||
if temp1 then | |||||
method = temp1[type(y)] or nil | |||||
else | |||||
method = nil | |||||
end | |||||
elseif this.default then | |||||
method = this.default | |||||
end | |||||
if not method then | |||||
error("No matching method to call for...") | |||||
end | |||||
return method(x, y) | |||||
end | |||||
``` | |||||
`defmethod` and `defdefault` are very simple and uninteresting macros: | |||||
All they do is wrap the provided body in a lambda expression along with | |||||
the proper argument list and associate them to the correct element in | |||||
the tree. | |||||
```lisp | |||||
(defmacro defmethod (name ll &body) | |||||
`(put! ,(car name) (list :lookup ,@(map s->s (cdr name))) | |||||
(let* [(,'myself nil)] | |||||
(set! ,'myself (lambda ,ll ,@body)) | |||||
,'myself))) | |||||
``` | |||||
### Conclusion | |||||
Switching to methods instead of a big if-else chain improved compiler | |||||
performance by 12% under LuaJIT, and 2% under PUC Lua. The performace | |||||
increase under LuaJIT can be attributed to the use of polymorphic inline | |||||
caches to speed up dispatch, which is now just a handful of table | |||||
accesses - Doing it with the if-else chain is _much_ harder. | |||||
Defining complex multiple-dispatch methods used to be an unthinkable | |||||
hassle what with keeping straight which cases have been defined yet and | |||||
which cases haven't, but they're now very simple to define: Just state | |||||
out the number of arguments and list all possible cases. | |||||
The fact that multimethods are _open_ means that new cases can be added | |||||
on the fly, at runtime (though this is not officially supported, and we | |||||
don't claim responsibility if you shoot your own foot), and that modules | |||||
loaded later may improve upon the behaviour of modules loaded earlier. | |||||
This means less coupling between the standard library, which has been | |||||
growing to be quite large. | |||||
This change has, in my opinion, made Urn a lot more expressive as | |||||
a language, and I'd like to take a minute to point out the power of the | |||||
Lisp family in adding complicated features such as these as merely | |||||
library code: no changes were made to the compiler, apart from a tiny | |||||
one regarding environments in the REPL - previously, it'd use the | |||||
compiler's version of `(pretty)` even if the user had overridden it, | |||||
which wasn't a problem with the metatable approach, but definitely is | |||||
with the multimethod approach. | |||||
Of course, no solution is all _good_. Compiled code size has increased | |||||
a fair bit, and for the Urn compiler to inline across multimethod | |||||
boundaries would be incredibly difficult - These functions are | |||||
essentially opaque boxes to the compiler. | |||||
Dead code elimination is harder, what with defining functions now being | |||||
a side-effect to be performed at runtime - Telling which method cases | |||||
are or aren't used is incredibly difficult with the extent of the | |||||
dynamicity. | |||||
[^1]: | |||||
[Here](https://gitlab.com/urn/urn/blob/e1e9777498e1a7d690e3b39c56f616501646b5da/lib/base.lisp#L243-270). | |||||
Do keep in mind that the implementation is _quite_ hairy, and grew to be | |||||
like that because of our lack of a standard way of making functions | |||||
extensible. | |||||
[^2]: `%q` is the format specifier for quoted strings. | |||||
[^3]: | |||||
[Here](https://gitlab.com/urn/urn/blob/e1e9777498e1a7d690e3b39c56f616501646b5da/lib/type.lisp#L116-1420). | |||||
Do keep in mind that that the above warnings apply to this one, too. | |||||
[^4]: [The original generated code](/static/generated_code.lua.html) is | |||||
quite similar, except the generated variable names make it a tad harder | |||||
to read. |
@ -0,0 +1,516 @@ | |||||
--- | |||||
title: Dependent Types | |||||
date: September 08, 2017 | |||||
maths: true | |||||
--- | |||||
Dependent types are pretty cool, yo. This post is a semi-structured | |||||
ramble about [dtt](https://ahti-saarelainen.zgrep.org/git/hydraz/dtt), | |||||
a small dependently-typed "programming language" inspired by Thierry | |||||
Coquand's Calculus of (inductive) Constructions (though, note that the | |||||
_induction_ part is still lacking: There is support for defining | |||||
inductive data types, and destructuring them by pattern matching, but | |||||
since there's no totality checker, recursion is disallowed). | |||||
`dtt` is written in Haskell, and served as a learning experience both in | |||||
type theory and in writing programs using [extensible | |||||
effects](https://hackage.haskell.org/package/freer). I *do* partly regret | |||||
the implementation of effects I chose (the more popular | |||||
[`extensible-effects`](https://hackage.haskell.org/package/extensible-effects) | |||||
did not build on the Nixpkgs channel I had, so I went with `freer`; | |||||
Refactoring between these should be easy enough, but I still haven't | |||||
gotten around to it, yet) | |||||
I originally intended for this post to be a Literate Haskell file, | |||||
interleaving explanation with code. However, for a pet project, `dtt`'s | |||||
code base quickly spiralled out of control, and is now over a thousand | |||||
lines long: It's safe to say I did not expect this one bit. | |||||
### The language | |||||
`dtt` is a very standard $\lambda_{\prod{}}$ calculus. We have all 4 axes of | |||||
Barendgret's lambda cube, in virtue of having types be first class | |||||
values: Values depending on values (functions), values depending on | |||||
types (polymorphism), types depending on types (type operators), and | |||||
types depending on values (dependent types). This places dtt squarely at | |||||
the top, along with other type theories such as the Calculus of | |||||
Constructions (the theoretical basis for the Coq proof assistant) and TT | |||||
(the type theory behind the Idris programming language). | |||||
The syntax is very simple. We have the standard lambda calculus | |||||
constructs - $\lambda$-abstraction, application and variables - along | |||||
with `let`{.haskell}-bindings, pattern matching `case` expression, and | |||||
the dependent type goodies: $\prod$-abstraction and `Set`{.haskell}. | |||||
_As an aside_, pi types are called as so because the dependent function | |||||
space may (if you follow the "types are sets of values" line of | |||||
thinking) be viewed as the cartesian product of types. Consider a type | |||||
`A`{.haskell} with inhabitants `Foo`{.haskell}, `Bar`{.haskell} and | |||||
a type `B`{.haskell} with inhabitant `Quux`{.haskell}. A dependent | |||||
product $\displaystyle\prod_{(x: \mathtt{A})}\mathtt{B}$, then, has | |||||
inhabitants `(Foo, Quux)`{.haskell} and `(Bar, Quux)`{.haskell}. | |||||
You'll notice that dtt does not have a dedicated arrow type. Indeed, the | |||||
dependent product subsumes both the $\forall$ quantifier of System $F$, | |||||
and the arrow type $\to$ of the simply-typed lambda calculus. Keep this | |||||
in mind: It'll be important later. | |||||
Since dtt's syntax is unified (i.e., there's no stratification of terms | |||||
and types), the language can be - and is - entirely contained in | |||||
a single algebraic data type. All binders are _explicitly typed_, seeing | |||||
as inference for dependent types is undecidable (and, therefore, | |||||
bad).[^1] | |||||
```haskell | |||||
type Type = Term | |||||
data Term | |||||
= Variable Var | |||||
| Set Int | |||||
| TypeHint Term Type | |||||
| Pi Var Type Type | |||||
| Lam Var Type Term | |||||
| Let Var Term Term | |||||
| App Term Term | |||||
| Match Term [(Pattern, Term)] | |||||
deriving (Eq, Show, Ord) | |||||
``` | |||||
The `TypeHint`{.haskell} term constructor, not mentioned before, is | |||||
merely a convenience: It allows the programmer to check their | |||||
assumptions and help the type checker by supplying a type (Note that we | |||||
don't assume this type is correct, as you'll see later; It merely helps | |||||
guide inference.) | |||||
Variables aren't merely strings because of the large amount of | |||||
substitutions we have to perform: For this, instead of generating a new | |||||
name, we increment a counter attached to the variable - the pretty | |||||
printer uses the original name to great effect, when unambiguous. | |||||
```haskell | |||||
data Var | |||||
= Name String | |||||
| Refresh String Int | |||||
| Irrelevant | |||||
deriving (Eq, Show, Ord) | |||||
``` | |||||
The `Irrelevant`{.haskell} variable constructor is used to support $a | |||||
\to b$ as sugar for $\displaystyle\prod_{(x: a)} b$ when $x$ does not | |||||
appear free in $b$. As soon as the type checker encounters an | |||||
`Irrelevant`{.haskell} variable, it is refreshed with a new name. | |||||
`dtt` does not have implicit support (as in Idris), so all parameters, | |||||
including type parameters, must be bound explicitly. For this, we | |||||
support several kinds of syntatic sugar. First, all abstractions support | |||||
multiple variables in a _binding group_. This allows the programmer to | |||||
write `(a, b, c : α) -> β` instead of `(a : α) -> (b : α) -> (c : α) -> | |||||
β`. Furthermore, there is special syntax `/\a` for single-parameter | |||||
abstraction with type `Set 0`{.haskell}, and lambda abstractions support | |||||
multiple binding groups. | |||||
As mentioned before, the language does not support recursion (either | |||||
general or well-founded). Though I would like to, writing a totality | |||||
checker is hard - way harder than type checking $\lambda_{\prod{}}$, in | |||||
fact. However, an alternative way of inspecting inductive values _does_ | |||||
exist: eliminators. These are dependent versions of catamorphisms, and | |||||
basically encode a proof by induction. An inductive data type as Nat | |||||
gives rise to an eliminator much like it gives rise to a natural | |||||
catamorphism. | |||||
``` | |||||
inductive Nat : Type of { | |||||
Z : Nat; | |||||
S : Nat -> Nat | |||||
} | |||||
natElim : (P : Nat -> Type) | |||||
-> P Z | |||||
-> ((k : Nat) -> P k -> P (S k)) | |||||
-> (n : Nat) | |||||
-> P n | |||||
``` | |||||
If you squint, you'll see that the eliminator models a proof by | |||||
induction (of the proposition $P$) on the natural number $n$: The type | |||||
signature basically states "Given a proposition $P$ on $\mathbb{N}$, | |||||
a proof of $P_0$, a proof that $P_{(k + 1)}$ follows from $P_k$ and | |||||
a natural number $n$, I'll give you a proof of $P_n$." | |||||
This understanding of computations as proofs and types as propositions, | |||||
by the way, is called the [Curry-Howard | |||||
Isomorphism](https://en.wikipedia.org/wiki/Curry-Howard_correspondence). | |||||
The regular, simply-typed lambda calculus corresponds to natural | |||||
deduction, while $\lambda_{\prod{}}$ corresponds to predicate logic. | |||||
### The type system | |||||
Should this be called the term system? | |||||
Our type inference algorithm, contrary to what you might expect for such | |||||
a complicated system, is actually quite simple. Unfortunately, the code | |||||
isn't, and thus isn't reproduced in its entirety below. | |||||
#### Variables | |||||
The simplest case in any type system. The typing judgement that gives | |||||
rise to this case is pretty much the identity: $\Gamma \vdash \alpha: | |||||
\tau \therefore \Gamma \vdash \alpha: \tau$. If, from the current typing | |||||
context we know that $\alpha$ has type $\tau$, then we know that | |||||
$\alpha$ has type $\tau$. | |||||
```haskell | |||||
Variable x -> do | |||||
ty <- lookupType x -- (I) | |||||
case ty of | |||||
Just t -> pure t -- (II) | |||||
Nothing -> throwError (NotFound x) -- (III) | |||||
``` | |||||
1. Look up the type of the variable in the current context. | |||||
2. If we found a type for it, then return that (this is the happy path) | |||||
3. If we didn't find a type for it, we raise a type error. | |||||
#### `Set`{.haskell}s | |||||
Since dtt has a cummulative hierarchy of universes, $\mathtt{Set}_k: | |||||
\mathtt{Set}_{(k + 1)}$. This helps us avoid the logical inconsistency | |||||
introduced by having _type-in-type_[^2], i.e. $\mathtt{Type}: | |||||
\mathtt{Type}$. We say that $\mathtt{Set}_0$ is the type of _small | |||||
types_: in fact, $\mathtt{Set}_0$ is where most computation actually | |||||
happens, seeing as $\mathtt{Set}_k$ for $k \ge 1$ is reserved for | |||||
$\prod$-abstractions quantifying over such types. | |||||
```haskell | |||||
Set k -> pure . Set . (+1) $ k | |||||
``` | |||||
#### Type hints | |||||
Type hints are the first appearance of the unification engine, by far | |||||
the most complex part of dtt's type checker. But for now, suffices to | |||||
know that ``t1 `assertEquality` t2``{.haskell} errors if the types t1 | |||||
and t2 can't be made to _line up_, i.e., unify. | |||||
For type hints, we infer the type of given expression, and compare it | |||||
against the user-provided type, raising an error if they don't match. | |||||
Because of how the unification engine works, the given type may be more | |||||
general (or specific) than the inferred one. | |||||
```haskell | |||||
TypeHint v t -> do | |||||
it <- infer v | |||||
t `assertEquality` it | |||||
pure t | |||||
``` | |||||
#### $\prod$-abstractions | |||||
This is where it starts to get interesting. First, we mandate that the | |||||
parameter type is inhabited (basically, that it _is_, in fact, a type). | |||||
The dependent product $\displaystyle\prod_{(x : 0)} \alpha$, while allowed by the | |||||
language's grammar, is entirely meaningless: There's no way to construct | |||||
an inhabitant of $0$, and thus this function may never be applied. | |||||
Then, in the context extended with $(\alpha : \tau)$, we require that | |||||
the consequent is also a type itself: The function | |||||
$\displaystyle\prod_{(x: \mathbb{N})} 0$, while again a valid parse, is | |||||
also meaningless. | |||||
The type of the overall abstraction is, then, the maximum value of the | |||||
indices of the universes of the parameter and the consequent. | |||||
```haskell | |||||
Pi x p c -> do | |||||
k1 <- inferSet tx | |||||
k2 <- local (insertType (x, p)) $ | |||||
inferSet c | |||||
pure $ Set (k1 `max` k2) | |||||
``` | |||||
#### $\lambda$-abstractions | |||||
Much like in the simply-typed lambda calculus, the type of | |||||
a $\lambda$-abstraction is an arrow between the type of its parameter | |||||
and the type of its body. Of course, $\lambda_{\prod{}}$ incurs the | |||||
additional constraint that the type of the parameter is inhabited. | |||||
Alas, we don't have arrows. So, we "lift" the lambda's parameter to the | |||||
type level, and bind it in a $\prod$-abstraction. | |||||
```haskell | |||||
Lam x t b -> do | |||||
_ <- inferSet t | |||||
Pi x t <$> local (insertType (x, t)) (infer b) | |||||
``` | |||||
Note that, much like in the `Pi`{.haskell} case, we type-check the body | |||||
in a context extended with the parameter's type. | |||||
#### Application | |||||
Application is the most interesting rule, as it has to not only handle | |||||
inference, it also has to handle instantiation of $\prod$-abstractions. | |||||
Instantation is, much like application, handled by $\beta$-reduction, | |||||
with the difference being that instantiation happens during type | |||||
checking (applying a $\prod$-abstraction is meaningless) and application | |||||
happens during normalisation (instancing a $\lambda$-abstraction is | |||||
meaningless). | |||||
The type of the function being applied needs to be | |||||
a $\prod$-abstraction, while the type of the operand needs to be | |||||
inhabited. Note that the second constraint is not written out | |||||
explicitly: It's handled by the `Pi`{.haskell} case above, and | |||||
furthermore by the unification engine. | |||||
```haskell | |||||
App e1 e2 -> do | |||||
t1 <- infer e1 | |||||
case t1 of | |||||
Pi vr i o -> do | |||||
t2 <- infer e2 | |||||
t `assertEquality` i | |||||
N.normalise =<< subst [(vr, e2)] o -- (I) | |||||
e -> throwError (ExpectedPi e) -- (II) | |||||
``` | |||||
1. Notice that, here, we don't substitute the $\prod$-bound variable by | |||||
the type of $e_2$: That'd make us equivalent to System $F$. The whole | |||||
_deal_ with dependent types is that types depend on values, and that | |||||
entirely stems from this one line. By instancing a type variable with | |||||
a value, we allow _types_ to depend on _values_. | |||||
2. Oh, and if we didn't get a $\prod$-abstraction, error. | |||||
--- | |||||
You'll notice that two typing rules are missing here: One for handling | |||||
`let`{.haskell}s, which was not included because it is entirely | |||||
uninteresting, and one for `case ... of`{.haskell} expressions, which | |||||
was redacted because it is entirely a mess. | |||||
Hopefully, in the future, the typing of `case` expressions is simpler | |||||
- if not, they'll probably be replaced by eliminators. | |||||
### Unification and Constraint Solving | |||||
The unification engine is the man behind the curtain in type checking: | |||||
We often don't pay attention to it, but it's the driving force behind it | |||||
all. Fortunately, in our case, unification is entirely trivial: Solving | |||||
is the hard bit. | |||||
The job of the unification engine is to produce a set of constraints | |||||
that have to be satisfied in order for two types to be equal. Then, the | |||||
solver is run on these constraints to assert that they are logically | |||||
consistent, and potentially produce substitutions that _reify_ those | |||||
constraints. | |||||
Our solver isn't that cool, though, so it just verifies consitency. | |||||
The kinds of constraints we can generate are as in the data type below. | |||||
```haskell | |||||
data Constraint | |||||
= Instance Var Term -- (1) | |||||
| Equal Term Term -- (2) | |||||
| EqualTypes Type Type -- (3) | |||||
| IsSet Type -- (4) | |||||
deriving (Eq, Show, Ord) | |||||
``` | |||||
1. The constraint `Instance v t`{.haskell} corresponds to a substitution | |||||
between `v` and the term `t`. | |||||
2. A constraint `Equal a b`{.haskell} states that the two terms `a` and | |||||
`b` are equal under normalisation. | |||||
3. Ditto, but with their _types_ (We normalise, infer, and check for | |||||
equality) | |||||
4. A constraint `IsSet t`{.haskell} asserts that the provided type has | |||||
inhabitants. | |||||
#### Unification | |||||
Unification of most terms is entirely uninteresting. Simply line up the | |||||
structures and produce the appropriate equality (or instance) | |||||
constraints. | |||||
```haskell | |||||
unify (Variable a) b = instanceC a b | |||||
unify b (Variable a) = instanceC a b | |||||
unify (Set a) (Set b) | a == b = pure [] | |||||
unify (App x y) (App x' y') = | |||||
(++) <$> unify x x' <*> unify y y' | |||||
unify (TypeHint a b) (TypeHint c d) = | |||||
(++) <$> unify a c <*> unify b d | |||||
unify a b = throwError (NotEqual a b) | |||||
``` | |||||
Those are all the boring cases, and I'm not going to comment on them. | |||||
Similarly boring are binders, which were abstracted out because hlint | |||||
told me to. | |||||
```haskell | |||||
unify (Lam v1 t1 b1) (Lam v2 t2 b2) = unifyBinder (v1, v2) (t1, t2) (b1, b2) | |||||
unify (Pi v1 t1 b1) (Pi v2 t2 b2) = unifyBinder (v1, v2) (t1, t2) (b1, b2) | |||||
unify (Let v1 t1 b1) (Let v2 t2 b2) = unifyBinder (v1, v2) (t1, t2) (b1, b2) | |||||
unifyBinder (v1, v2) (t1, t2) (b1, b2) = do | |||||
(a, b) <- (,) <$> unify (Variable v1) (Variable v2) <*> unify t1 t2 | |||||
((a ++ b) ++) <$> unify b1 b2 | |||||
``` | |||||
There are two interesting cases: Unification between some term and a pi | |||||
abstraction, and unification between two variables. | |||||
```haskell | |||||
unify ta@(Variable a) tb@(Variable b) | |||||
| a == b = pure [] | |||||
| otherwise = do | |||||
(x, y) <- (,) <$> lookupType a <*> lookupType b | |||||
case (x, y) of | |||||
(Just _, Just _) -> do | |||||
ca <- equalTypesC ta tb | |||||
cb <- equalC ta tb | |||||
pure (ca ++ cb) | |||||
(Just x', Nothing) -> instanceC b x' | |||||
(Nothing, Just x') -> instanceC a x' | |||||
(Nothing, Nothing) -> instanceC a (Variable b) | |||||
``` | |||||
If the variables are syntactically the same, then we're done, and no | |||||
constraints have to be generated (Technically you could generate an | |||||
entirely trivial equality constraint, but this puts unnecessary pressure | |||||
on the solver). | |||||
If either variable has a known type, then we generate an instance | |||||
constraint between the unknown variable and the known one. | |||||
If both variables have a value, we equate their types' types and their | |||||
types. This is done mostly for error messages' sakes, seeing as if two | |||||
values are propositionally equal, so are their types. | |||||
Unification between a term and a $\prod$-abstraction is the most | |||||
interesting case: We check that the $\prod$ type abstracts over a type | |||||
(i.e., it corresponds to a System F $\forall$ instead of a System | |||||
F $\to$), and _instance_ the $\prod$ with a fresh type variable. | |||||
```haskell | |||||
unifyPi v1 t1 b1 a = do | |||||
id <- refresh Irrelevant | |||||
ss <- isSetC t1 | |||||
pi' <- subst [(v1, Variable id)] b1 | |||||
(++ ss) <$> unify a pi' | |||||
unify a (Pi v1 t1 b1) = unifyPi v1 t1 b1 a | |||||
unify (Pi v1 t1 b1) a = unifyPi v1 t1 b1 a | |||||
``` | |||||
#### Solving | |||||
Solving is a recursive function of the list of constraints (a | |||||
catamorphism!) with some additional state: Namely, a strict map of | |||||
already-performed substitutions. Let's work through the cases in reverse | |||||
order of complexity (and, interestingly, reverse order of how they're in | |||||
the source code). | |||||
##### No constraints | |||||
Solving an empty list of constraints is entirely trivial. | |||||
```haskell | |||||
solveInner _ [] = pure () | |||||
``` | |||||
#### `IsSet`{.haskell} | |||||
We infer the index of the universe of the given type, much like in the | |||||
inferrence case for $\prod$-abstractions, and check the remaining | |||||
constraints. | |||||
```haskell | |||||
solveInner map (IsSet t:xs) = do | |||||
_ <- inferSet t | |||||
solveInner map xs | |||||
``` | |||||
#### `EqualTypes`{.haskell} | |||||
We infer the types of both provided values, and generate an equality | |||||
constraint. | |||||
```haskell | |||||
solveInner map (EqualTypes a b:xs) = do | |||||
ta <- infer a | |||||
tb <- infer b | |||||
solveInner map (Equal ta tb:xs) | |||||
``` | |||||
#### `Equal`{.haskell} | |||||
We merely have to check for syntactic equality of the (normal forms of) | |||||
terms, because the hard lifting of destructuring and lining up was done | |||||
by the unification engine. | |||||
```haskell | |||||
solveInner map (Equal a b:xs) = do | |||||
a' <- N.normalise a | |||||
b' <- N.normalise b | |||||
eq <- equal a' b' | |||||
if eq | |||||
then solveInner map xs | |||||
else throwError (NotEqual a b) | |||||
``` | |||||
#### `Instance`{.haskell} | |||||
If the variable we're instancing is already in the map, and the thing | |||||
we're instancing it to _now_ is not the same as before, we have an | |||||
inconsistent set of substitutions and must error. | |||||
```haskell | |||||
solveInner map (Instance a b:xs) | |||||
| a `M.member` map | |||||
, b /= map M.! a | |||||
, Irrelevant /= a | |||||
= throwError $ InconsistentSubsts (a, b) (map M.! a) | |||||
``` | |||||
Otherwise, if we have a coherent set of instances, we add the instance | |||||
both to scope and to our local state map and continue checking. | |||||
```haskell | |||||
| otherwise = | |||||
local (insertType (a, b)) $ | |||||
solveInner (M.insert a b map) xs | |||||
``` | |||||
--- | |||||
Now that we have both `unify` and `solve`, we can write | |||||
`assertEquality`: We unify the two types, and then try to solve the set | |||||
of constraints. | |||||
```haskell | |||||
assertEquality t1 t2 = do | |||||
cs <- unify t1 t2 | |||||
solve cs | |||||
``` | |||||
The real implementation will catch and re-throw any errors raised by | |||||
`solve` to add appropriate context, and that's not the only case where | |||||
"real implementation" and "blag implementation" differ. | |||||
### Conclusion | |||||
Wow, that was a lot of writing. This conclusion begins on exactly the | |||||
500th line of the Markdown source of this article, and this is the | |||||
longest article on this blag (by far). However, that's not to say it's | |||||
bad: It was amazing to write, and writing `dtt` was also amazing. I am | |||||
not good at conclusions. | |||||
`dtt` is available under the BSD 3-clause licence, though I must warn | |||||
you that the source code hasn't many comments. | |||||
I hope you learned nearly as much as I did writing this by reading it. | |||||
[^1]: As [proven](https://link.springer.com/chapter/10.1007/BFb0037103) by Gilles Dowek. | |||||
[^2]: See [System U](https://en.wikipedia.org/wiki/System_U), also | |||||
Girard's paradox - the type theory equivalent of [Russell's | |||||
paradox](https://en.wikipedia.org/wiki/Russell%27s_paradox). |
@ -0,0 +1,456 @@ | |||||
--- | |||||
title: The Amulet Programming Language | |||||
date: January 18, 2018 | |||||
--- | |||||
As you might have noticed, I like designing and implementing programming | |||||
languages. This is another of these projects. Amulet is a | |||||
strictly-evaluated, statically typed impure roughly functional | |||||
programming language with support for parametric data types and rank-1 | |||||
polymorphism _à la_ Hindley-Milner (but [no | |||||
let-generalization](#letgen)), along with row-polymorphic records. While | |||||
syntactically inspired by the ML family, it's a disservice to those | |||||
languages to group Amulet with them, mostly because of the (present) | |||||
lack of modules. | |||||
Planned features (that I haven't even started working on, as of writing | |||||
this post) include generalized algebraic data types, modules and modular | |||||
implicits, a reworked type inference engine based on _OutsideIn(X)_[^4] | |||||
to support the other features, and, perhaps most importantly, a back-end | |||||
that's not a placeholder (i.e. something that generates either C or LLVM | |||||
and can be compiled to a standalone executable). | |||||
The compiler is still very much a work in progress, and is actively | |||||
being improved in several ways: Rewriting the parser for efficiency | |||||
concerns (see [Lexing and Parsing](#parser)), improving the quality of | |||||
generated code by introducing more intermediate representations, and | |||||
introducing several optimisations on the one intermediate language we | |||||
_do_ have. | |||||
## The Technical Bits | |||||
In this section, I'm going to describe the implementation of the | |||||
compiler as it exists at the time of writing - warts and all. | |||||
Unfortunately, we have a bit too much code for all of it to fit in this | |||||
blag post, so I'm only going to include the horribly broken bits here, | |||||
and leave the rest out. Of course, the compiler is open source, and is | |||||
available on my [GitHub][2]. | |||||
### Lexing and Parsing {#parser} | |||||
To call what we have a _lexer_ is a bit of an overstatement: The | |||||
`Parser.Lexer` module, which underpins the actual parser, contains only | |||||
a handful of imports and some definitions for use with [Parsec's][3] | |||||
[`Text.Parsec.Token`][4] module; Everything else is boilerplate, namely, | |||||
declaring, at top-level, the functions generated by `makeTokenParser`. | |||||
Our parser is then built on top of this infrastructure (and the other | |||||
combinators provided by Parsec) in a monadic style. Despite having | |||||
chosen to use strict `Text`s, many of the Parsec combinators return | |||||
`Char`s, and using the Alternative type class' ability to repeat actions | |||||
makes linked lists of these - the dreaded `String` type. Due to this, | |||||
and other inefficiencies, the parser is ridiculously bad at memory | |||||
management. | |||||
However, it does have some cute hacks. For example, the pattern parser | |||||
has to account for being used in the parsing of both `match`{.ml} and | |||||
`fun`{.ml} - in the former, destructuring patterns may appear without | |||||
parenthesis, but in the latter, they _must_ be properly parenthesised: | |||||
since `fun`{.ml} may have multiple patterns, it would be ambiguous if | |||||
`fun Foo x -> ...`{.ml} is destructuring a `Foo` or takes two arguments. | |||||
Instead of duplicating the pattern parser, one for `match`{.ml}es and | |||||
one for function arguments, we instead _parametrised_ the parser over | |||||
needing parenthesis or not by adding a rank-2 polymorphic continuation | |||||
argument. | |||||
```haskell | |||||
patternP :: (forall a. Parser a -> Parser a) -> Parser Pattern' | |||||
patternP cont = wildcard <|> {- some bits omitted -} try destructure where | |||||
destructure = withPos . cont $ do | |||||
ps <- constrName | |||||
Destructure ps <$> optionMaybe (patternP id) | |||||
``` | |||||
When we're parsing a pattern `match`{.ml}-style, the continuation given | |||||
is `id`, and when we're parsing an argument, the continuation is | |||||
`parens`. | |||||
For the aforementioned efficiency concerns, however, we've decided to | |||||
scrap the Parsec-based parser and move to an Alex/Happy based solution, | |||||
which is not only going to be more maintainable and more easily hackable | |||||
in the future, but will also be more efficient overall. Of course, for | |||||
a toy compiler such as this one, efficiency doesn't matter that much, | |||||
but using _one and a half gigabytes_ to compile a 20-line file is really | |||||
bad. | |||||
### Renaming {#renamer} | |||||
To simplify scope handling in both the type checker and optimiser, after | |||||
parsing, each variable is tagged with a globally unique integer that is | |||||
enough to compare variables. This also lets us use more efficient data | |||||
structures later in the compiler, such as `VarSet`, which stores only the | |||||
integer identifier of a variable in a big-endian Patricia tree[^1]. | |||||
Our approach, described in _[Secrets of the Glasgow Haskell Compiler | |||||
inliner][5]_ as "the Sledgehammer", consists of duplicating _every_ | |||||
bound variable to avoid name capture problems. However, while the first | |||||
of the listed disadvantages surely does apply, by doing all of the | |||||
_renaming_ in one go, we mostly avoid the latter. Of course, since then, | |||||
the Haskell ecosystem has evolved significantly, and the plumbing | |||||
required is a lot less intrusive. | |||||
In our compiler, we use MTL-style classes instead of concrete monad | |||||
transformer stacks. We also run every phase after parsing in a single | |||||
`GenT`{.haskell} monad, which provides a fresh supply of integers for | |||||
names. "Plumbing" the fresh name supply, then, only involves adding a | |||||
`MonadGen Int m` constraint to the context of functions that need it. | |||||
Since the string component of parsed names is not thrown away, we also | |||||
have to make up strings themselves. This is where another cute hack | |||||
comes in: We generate, lazily, an infinite stream of names that goes | |||||
`["a" .. "z", "aa" .. "az", "ba" .. "bz", ..]`, then use the | |||||
`MonadGen`{.haskell} counter as an index into that stream. | |||||
```haskell | |||||
alpha :: [Text] | |||||
alpha = map T.pack $ [1..] >>= flip replicateM ['a'..'z'] | |||||
``` | |||||
### Desugaring | |||||
The desugarer is a very simple piece of code which, through use of _Scrap | |||||
Your Boilerplate_-style generic programming, traverses the syntax tree | |||||
and rewrites nodes representing syntax sugar to their more explicit | |||||
versions. | |||||
Currently, the desugarer only expands _sections_: That is, expressions | |||||
of the form `(+ e)` become `fun x -> x + e` (where `e` is a fresh name), | |||||
expressions like `(e +)` become `fun x -> e + x`, and expressions like | |||||
`.foo` becomes `fun x -> x.foo`. | |||||
This is the only component of the compiler that I can reasonably | |||||
include, in its entirety, in this post. | |||||
```haskell | |||||
desugarProgram = everywhereM (mkM defaults) where | |||||
defaults :: Expr Parsed -> m (Expr Parsed) | |||||
defaults (BothSection op an) = do | |||||
(ap, ar) <- fresh an | |||||
(bp, br) <- fresh an | |||||
pure (Fun ap (Fun bp (BinOp ar op br an) an) an) | |||||
defaults (LeftSection op vl an) = do | |||||
(cap, ref) <- fresh an | |||||
pure (Fun cap (BinOp ref op vl an) an) | |||||
defaults (RightSection op vl an) = do | |||||
(cap, ref) <- fresh an | |||||
pure (Fun cap (BinOp vl op ref an) an) | |||||
defaults (AccessSection key an) = do | |||||
(cap, ref) <- fresh an | |||||
pure (Fun cap (Access ref key an) an) | |||||
defaults x = pure x | |||||
``` | |||||
### Type Checking | |||||
By far the most complicated stage of the compiler pipeline, our | |||||
inference algorithm is modelled after Algorithm W (extended with kinds | |||||
and kind inference), with constraint generation and solving being two | |||||
separate steps. | |||||
We first traverse the syntax tree, in order, making up constraints and | |||||
fresh type variables as needed, then invoke a unification algorithm to | |||||
produce a substitution, then apply that over both the generated type (a | |||||
skeleton of the actual result) and the syntax tree (which is explicitly | |||||
annotated with types everywhere). | |||||
The type inference code also generates and inserts explicit type | |||||
applications when instancing polymorphic types, since we internally | |||||
lower Amulet into a System F core language with explicit type | |||||
abstraction and application. We have `TypeApp` nodes in the syntax tree | |||||
that never get parsed or renamed, and are generated by the type checker | |||||
before lowering happens. | |||||
Our constraint solver is quite rudimentary, but it does the job nicely. | |||||
We operate with a State monad with the current substitution. When we | |||||
unify a variable with another type, it is added to the current | |||||
substitution. Everything else is just zipping the types together. When | |||||
we try to unify, say, a function type with a constructor, that's an | |||||
error. If a variable has already been added to the current substitution and | |||||
encounter it again, the new type is unified with the previously recorded | |||||
one. | |||||
```haskell | |||||
unify :: Type Typed -> Type Typed -> SolveM () | |||||
unify (TyVar a) b = bind a b | |||||
unify a (TyVar b) = bind b a | |||||
unify (TyArr a b) (TyArr a' b') = unify a a' *> unify b b' | |||||
unify (TyApp a b) (TyApp a' b') = unify a a' *> unify b b' | |||||
unify ta@(TyCon a) tb@(TyCon b) | |||||
| a == b = pure () | |||||
| otherwise = throwError (NotEqual ta tb) | |||||
``` | |||||
This is only an excerpt, because we have very complicated types. | |||||
#### Polymorphic Records | |||||
One of Amulet's selling points (if one could call it that) is its support | |||||
for row-polymorphic records. We have two types of first-class record | |||||
types: _closed_ record types (the type of literals) and _open_ record | |||||
types (the type inferred by record patterns and field getters.). Open | |||||
record types have the shape `{ 'p | x_n : t_n ... x_n : t_n }`{.ml}, | |||||
while closed records lack the type variable `'p`{.ml}. | |||||
Unification of records has 3 cases, but in all 3 cases it is checked that | |||||
fields present in both records have unifiable types. | |||||
- When unifying an open record with a closed one, present in both | |||||
records have unifiable types, and instance the type variable to contain | |||||
the extra fields. | |||||
- When unifying two closed records, they must have exactly the same | |||||
shape and unifiable types for common fields. | |||||
- When unifying two open record types, a new fresh type variable is | |||||
created to use as the "hole" and tack the fields together. | |||||
As an example, `{ x = 1 }` has type `{ x : int }`{.ml}, the function | |||||
`fun x -> x.foo` has type `{ 'p | foo : 'a } -> 'a`{.ml}, and | |||||
`(fun r -> r.x) { y = 2 }` is a type error[^2]. | |||||
#### No Let Generalisation {#letgen} | |||||
Vytiniotis, Peyton Jones and Schrijvers argue[^5] that HM-style | |||||
`let`{.ml} generalisation interacts badly with complex type system | |||||
extensions such as GADTs and type families, and should therefore be | |||||
omitted from such systems. In a deviation from the paper, GHC 7.2 | |||||
reintroduces `let`{.ml} generalisation for local definitions that meet | |||||
some criteria[^3]. | |||||
> Here's the rule. With `-XMonoLocalBinds` (the default), a binding | |||||
> without a type signature is **generalised only if all its free variables | |||||
> are closed.** | |||||
> | |||||
> A binding is **closed** if and only if | |||||
> | |||||
> - It has a type signature, and the type signature has no free variables; or | |||||
> - It has no type signature, and all its free variables are closed, and it | |||||
is unaffected by the monomorphism restriction. And hence it is fully | |||||
generalised. | |||||
We, however, have chosen to follow that paper to a tee. Despite not | |||||
(yet!) having any of those fancy type system features that interact | |||||
poorly with let generalisation, we do not generalise _any_ local | |||||
bindings. | |||||
### Lowering | |||||
After type checking is done (and, conveniently, type applications have | |||||
been left in the correct places for us by the type checker), Amulet code | |||||
is converted into an explicitly-typed intermediate representation, in | |||||
direct style, which is used for (local) program optimisation. The AST is | |||||
simplified considerably: from 19 constructors to 9. | |||||
Type inference is no longer needed: the representation of core is packed | |||||
with all the information we need to check that programs are | |||||
type-correct. This includes types in every binder (lambda abstractions, | |||||
`let`{.ml}s, pattern bindings in `match`{.ml}), big-lambda abstractions | |||||
around polymorphic values (a $\lambda$ binds a value, while a $\Lambda$ | |||||
binds a type), along with the already mentioned type applications. | |||||
Here, code also gets the error branches for non-exhaustive `match`{.ml} | |||||
expressions, and, as a general rule, gets a lot uglier. | |||||
```ocaml | |||||
let main _ = (fun r -> r.x) { x = 2 } | |||||
(* Is elaborated into *) | |||||
let main : ∀ 'e. 'e -> int = | |||||
Λe : *. λk : 'e. match k { | |||||
(p : 'e) : 'e -> (λl : { 'g | x : int }. match l { | |||||
(r : { 'g | x : int }) : { 'g | x : int } -> match r { | |||||
{ (n : { 'g | x : int }) | x = (m : int) } : { 'g | x : int } -> m | |||||
}; | |||||
(o : { 'g | x : int }) : { 'g | x : int } -> | |||||
error @int "<test>[1:15 .. 1:27]" | |||||
}) ({ {} | x : int = 2 }); | |||||
(q : 'e) : 'e -> error @int "<test>[1:14 .. 1:38]" | |||||
} | |||||
``` | |||||
### Optimisation | |||||
As the code we initially get from lowering is ugly and inefficient - | |||||
along with being full of the abstractions functional programs have by | |||||
nature, it is full of redundant matches created by e.g. the fact that | |||||
functions can not do pattern matching directly, and that field access | |||||
gets reduced to pattern matching - the optimiser's job is to make it | |||||
prettier, and more efficient. | |||||
The optimiser works by applying, in order, a series of local | |||||
transformations operating on individual sub-terms to produce an efficient | |||||
program, 25 times. The idea of applying them several times is that, when | |||||
a simplification pass kicks in, more simplification opportunities might | |||||
arise. | |||||
#### `dropBranches`, `foldExpr`, `dropUselessLets` | |||||
These trivial passes remove similarly trivial pieces of code that only | |||||
add noise to the program. `dropBranches` will do its best to remove | |||||
redundant arms from a `match`{.ml} expression, such as those that | |||||
appear after an irrefutable pattern. `foldExpr` reduces uses of | |||||
operators where both sides are known, e.g. `2 + 2` (replaced by the | |||||
literal `5`) or `"foo " ^ "bar"` (replaced by the literal `"foo | |||||
bar"`). `dropUselessLets` removes `let`{.ml}s that bind unused variables | |||||
whose right-hand sides are pure expressions. | |||||
#### `trivialPropag`, `constrPropag` | |||||
The Amulet optimiser does inlining decisions in two (well, three) | |||||
separate phases: One is called _propagation_, in which a `let` decides | |||||
to propagate its bound values into the expression, and the other is the | |||||
more traditional `inlining`, where variables get their values from the | |||||
context. | |||||
Propagation is by far the easiest of the two: The compiler can see both | |||||
the definitions and all of the use sites, and could in theory decide if | |||||
propagating is beneficial or not. Right now, we propagate all literals | |||||
(and records made up solely of other trivial expressions), and do a | |||||
round of propagation that is best described as a rule. | |||||
```ocaml | |||||
let { v = C e } in ... v ... | |||||
(* becomes *) | |||||
let { v' = e } in ... C v' ... | |||||
``` | |||||
This _constructor propagation_ allows the `match`{.ml} optimisations to kick | |||||
in more often, and is semantics preserving. | |||||
#### `match`{.ml}-of-known-constructor | |||||
This pass identifies `match`{.ml} expressions where we can statically | |||||
determine the expression being analysed and, therefore, decide which | |||||
branch is going to be taken. | |||||
```ocaml | |||||
match C x with | |||||
| C e -> ... e ... | |||||
... | |||||
(* becomes *) | |||||
... x ... | |||||
``` | |||||
#### `match`{.ml}-of-bottom | |||||
It is always safe to turn a `match`{.ml} where the term being matched is a | |||||
diverging expression into only that diverging expression, thus reducing | |||||
code size several times. | |||||
```ocaml | |||||
match (error @int "message") with ... | |||||
(* becomes *) | |||||
error @int "message" | |||||
``` | |||||
As a special case, when one of the arms is itself a diverging | |||||
expression, we use the type mentioned in that application to `error` to | |||||
fix up the type of the value being scrutinized. | |||||
```ocaml | |||||
match (error @foo "message") with | |||||
| _ -> error @bar "message 2" | |||||
... | |||||
(* becomes *) | |||||
error @bar "message" | |||||
``` | |||||
#### `match`{.ml}-of-`match`{.ml} | |||||
This transformation turns `match`{.ml} expressions where the expression | |||||
being dissected is itself another `match`{.ml} "inside-out": we push the | |||||
branches of the _outer_ `match`{.ml} "into" the _inner_ `match` (what | |||||
used to be the expression being scrutinized). In doing so, sometimes, | |||||
new opportunities for match-of-known-constructor arise, and the code | |||||
ends up simpler. | |||||
```ocaml | |||||
match (match x with | |||||
| A -> B | |||||
| C -> D) with | |||||
| B -> e | |||||
| D -> f | |||||
(* becomes *) | |||||
match x with | |||||
| A -> match B with | |||||
| B -> e | |||||
| D -> f | |||||
| C -> match D with | |||||
| B -> e | |||||
| D -> f | |||||
``` | |||||
A clear area of improvement here is extracting the outer branches into | |||||
local `let`{.ml}-bound lambda abstractions to avoid an explosion in code | |||||
size. | |||||
#### `inlineVariable`, `betaReduce` | |||||
In this pass, use of a variable is replaced with the definition of that | |||||
variable, if it meets the following conditions: | |||||
- The variable is a lambda abstraction; and | |||||
- The lambda abstraction's body is not too _expensive_. Computing the | |||||
cost of a term boils down to computing the depth of the tree | |||||
representing that term, with some extra cost added to some specific | |||||
types of expression. | |||||
In doing this, however, we end up with pathological terms of the form | |||||
`(fun x -> e) y`{.ml}. The `betaReduce` pass turns this into `let x = y in | |||||
e`{.ml}. We generate `let`{.ml} bindings instead of substituting the | |||||
variable with the parameter to maintain the same evaluation order and | |||||
observable effects of the original code. This does mean that, often, | |||||
propagation kicks in and gives rise to new simplification opportunities. | |||||
## Epilogue | |||||
I was planning to write a section with a formalisation of the language's | |||||
semantics and type system, but it turns out I'm no mathematician, no | |||||
matter how hard I pretend. Maybe in the future. | |||||
Our code generator is wholly uninteresting, and, most of all, a | |||||
placeholder: This is why it is not described in detail (that is, at all) | |||||
in this post. I plan to write a follow-up when we actually finish the | |||||
native code generator. | |||||
As previously mentioned, the compiler _is_ open source: the code is | |||||
[here][2]. I recommend using the [Nix package manager][9] to acquire the | |||||
Haskell dependencies, but Cabal should work too. Current work in | |||||
rewriting the parser is happening in the `feature/alex-happy` branch. | |||||
[^1]: This sounds fancy, but in practice, it boils down to using | |||||
`Data.IntSet`{.haskell} instead of `Data.Set`{.haskell}. | |||||
[^2]: As shown [here][6]. Yes, the error messages need improvement. | |||||
[^3]: As explained in [this blog post][8]. | |||||
[^4]: Dimitrios Vytiniotis, Simon Peyton Jones, Tom Schrijvers, | |||||
and Martin Sulzmann. 2011. [OutsideIn(X): Modular Type Inference With | |||||
Local Assumptions][1]. _Note that, although the paper has been | |||||
published in the Journal of Functional Programming, the version linked | |||||
to here is a preprint._ | |||||
[^5]: Dimitrios Vytiniotis, Simon Peyton Jones, Tom Schrijvers. 2010. | |||||
[Let Should not be Generalised][7]. | |||||
[1]: <https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/jfp-outsidein.pdf> | |||||
[2]: <https://github.com/zardyh/amulet/tree/66a4143af32c3e261af51b74f975fc48c0155dc8> | |||||
[3]: <https://hackage.haskell.org/package/parsec-3.1.11> | |||||
[4]: <https://hackage.haskell.org/package/parsec-3.1.11/docs/Text-Parsec-Token.html> | |||||
[5]: <https://www.microsoft.com/en-us/research/wp-content/uploads/2002/07/inline.pdf> | |||||
[6]: </snip/sel.b0e94.txt> | |||||
[7]: <https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/tldi10-vytiniotis.pdf> | |||||
[8]: <https://ghc.haskell.org/trac/ghc/blog/LetGeneralisationInGhc7> | |||||
[9]: <https://nixos.org/nix/> |
@ -0,0 +1,610 @@ | |||||
--- | |||||
title: Amulet's New Type Checker | |||||
date: February 18, 2018 | |||||
synopsys: 2 | |||||
--- | |||||
In the last post about Amulet I wrote about rewriting the type checking | |||||
code. And, to everybody's surprise (including myself), I actually did | |||||
it. | |||||
Like all good programming languages, Amulet has a strong, static type | |||||
system. What most other languages do not have, however, is (mostly) | |||||
_full type inference_: programs are still type-checked despite (mostly) | |||||
having no type annotations. | |||||
Unfortunately, no practical type system has truly "full type inference": | |||||
features like data-type declarations, integral to actually writing | |||||
software, mandate some type annotations (in this case, constructor | |||||
arguments). However, that doesn't mean we can't try. | |||||
The new type checker, based on a constraint-generating but | |||||
_bidirectional_ approach, can type a lot more programs than the older, | |||||
Algorithm W-derived, quite buggy checker. As an example, consider the | |||||
following definition. For this to check under the old type system, one | |||||
would need to annotate both arguments to `map` _and_ its return type - | |||||
clearly undesirable! | |||||
```ocaml | |||||
let map f = | |||||
let go cont xs = | |||||
match xs with | |||||
| Nil -> cont Nil | |||||
| Cons (h, t) -> go (compose cont (fun x -> Cons (f h, x))) t | |||||
in go id ;; | |||||
``` | |||||
Even more egregious is that the η-reduction of `map` would lead to an | |||||
ill-typed program. | |||||
```ocaml | |||||
let map f xs = | |||||
let go cont xs = (* elided *) | |||||
in go id xs ;; | |||||
(* map : forall 'a 'b. ('a -> 'b) -> list 'a -> list 'b *) | |||||
let map' f = | |||||
let go cont xs = (* elided *) | |||||
in go id ;; | |||||
(* map' : forall 'a 'b 'c. ('a -> 'b) -> list 'a -> list 'c *) | |||||
``` | |||||
Having declared this unacceptable, I set out to rewrite the type | |||||
checker, after months of procrastination. As is the case, of course, | |||||
with such things, it only took some two hours, and I really shouldn't have | |||||
procrastinated it for so long. | |||||
Perhaps more importantly, the new type checker also supports rank-N | |||||
polymorphism directly, with all appropriate checks in place: expressions | |||||
checked against a polymorphic type are, in reality, checked against a | |||||
_deeply skolemised_ version of that poly-type - this lets us enforce two | |||||
key properties: | |||||
1. the expression being checked _is_ actually parametric over the type | |||||
arguments, i.e., it can't unify the skolem constants with any type | |||||
constructors, and | |||||
2. no rank-N arguments escape. | |||||
As an example, consider the following function: | |||||
```ocaml | |||||
let rankn (f : forall 'a. 'a -> 'a) = f () | |||||
``` | |||||
Well-typed uses of this function are limited to applying it to the | |||||
identity function, as parametricity tells us; and, indeed, trying to | |||||
apply it to e.g. `fun x -> x + 1`{.ocaml} is a type error. | |||||
### The Solver | |||||
As before, type checking is done by a traversal of the syntax tree | |||||
which, by use of a `Writer`{.haskell} monad, produces a list of | |||||
constraints to be solved. Note that a _list_ really is needed: a set, or | |||||
similar data structure with unspecified order, will not do. The order in | |||||
which the solver processes constraints is important! | |||||
The support for rank-N types has lead to the solver needing to know | |||||
about a new kind of constraint: _subsumption_ constraints, in addition | |||||
to _unification_ constraints. Subsumption is perhaps too fancy a term, | |||||
used to obscure what's really going on: subtyping. However, whilst | |||||
languages like Java and Scala introduce subtyping by means of | |||||
inheritance, our subtyping boils down to eliminating ∀s. | |||||
∀s are eliminated from the right-hand-side of subsumption constraints by | |||||
_deep skolemisation_: replacing the quantified variables in the type | |||||
with fresh type constants. The "depth" of skolemisation refers to the | |||||
fact that ∀s to the right of arrows are eliminated along with the ones | |||||
at top-level. | |||||
```haskell | |||||
subsumes k t1 t2@TyForall{} = do | |||||
t2' <- skolemise t2 | |||||
subsumes k t1 t2' | |||||
subsumes k t1@TyForall{} t2 = do | |||||
(_, _, t1') <- instantiate t1 | |||||
subsumes k t1' t2 | |||||
subsumes k a b = k a b | |||||
``` | |||||
The function for computing subtyping is parametric over what to do in | |||||
the case of two monomorphic types: when this function is actually used | |||||
by the solving algorithm, it's applied to `unify`. | |||||
The unifier has the job of traversing two types in tandem to find the | |||||
_most general unifier_: a substitution that, when applied to one type, | |||||
will make it syntatically equal to the other. In most of the type | |||||
checker, when two types need to be "equal", they're equal up to | |||||
unification. | |||||
Most of the cases are an entirely boring traversal, so here are the | |||||
interesting ones. | |||||
- Skolem type constants only unify with other skolem type constants: | |||||
```haskell | |||||
unify TySkol{} TySkol{} = pure () | |||||
unify t@TySkol{} b = throwError $ SkolBinding t b | |||||
unify b t@TySkol{} = throwError $ SkolBinding t b | |||||
``` | |||||
- Type variables extend the substitution: | |||||
```haskell | |||||
unify (TyVar a) b = bind a b | |||||
unify a (TyVar b) = bind b a | |||||
``` | |||||
- Polymorphic types unify up to α-renaming: | |||||
```haskell | |||||
unify t@(TyForall vs ty) t'@(TyForall vs' ty') | |||||
| length vs /= length vs' = throwError (NotEqual t t') | |||||
| otherwise = do | |||||
fvs <- replicateM (length vs) freshTV | |||||
let subst = Map.fromList . flip zip fvs | |||||
unify (apply (subst vs) ty) (apply (subst vs') ty') | |||||
``` | |||||
When binding a variable to a concrete type, an _occurs check_ is | |||||
performed to make sure the substitution isn't going to end up containing | |||||
an infinite type. Consider binding `'a := list 'a`: If `'a` is | |||||
substituted for `list 'a` everywhere, the result would be `list (list | |||||
'a)` - but wait, `'a` appears there, so it'd be substituted again, ad | |||||
infinitum. | |||||
Extra care is also needed when binding a variable to itself, as is the | |||||
case with `'a ~ 'a`. These constraints are trivially discharged, but | |||||
adding them to the substitution would mean an infinite loop! | |||||
```haskell | |||||
occurs :: Var Typed -> Type Typed -> Bool | |||||
occurs _ (TyVar _) = False | |||||
occurs x e = x `Set.member` ftv e | |||||
``` | |||||
If the variable has already been bound, the new type is unified with the | |||||
one present in the substitution being accumulated. Otherwise, it is | |||||
added to the substitution. | |||||
```haskell | |||||
bind :: Var Typed -> Type Typed -> SolveM () | |||||
bind var ty | |||||
| occurs var ty = throwError (Occurs var ty) | |||||
| TyVar var == ty = pure () | |||||
| otherwise = do | |||||
env <- get | |||||
-- Attempt to extend the environment, otherwise | |||||
-- unify with existing type | |||||
case Map.lookup var env of | |||||
Nothing -> put (Map.singleton var (normType ty) `compose` env) | |||||
Just ty' | |||||
| ty' == ty -> pure () | |||||
| otherwise -> unify (normType ty) (normType ty') | |||||
``` | |||||
Running the solver, then, amounts to folding through the constraints in | |||||
order, applying the substitution created at each step to the remaining | |||||
constraints while also accumulating it to end up at the most general | |||||
unifier. | |||||
```haskell | |||||
solve :: Int -> Subst Typed | |||||
-> [Constraint Typed] | |||||
-> Either TypeError (Subst Typed) | |||||
solve _ s [] = pure s | |||||
solve i s (ConUnify e a t:xs) = do | |||||
case runSolve i s (unify (normType a) (normType t)) of | |||||
Left err -> Left (ArisingFrom err e) | |||||
Right (i', s') -> solve i' (s' `compose` s) (apply s' xs) | |||||
solve i s (ConSubsume e a b:xs) = | |||||
case runSolve i s (subsumes unify (normType a) (normType b)) of | |||||
Left err -> Left (ArisingFrom err e) | |||||
Right (i', s') -> solve i' (s' `compose` s) (apply s' xs) | |||||
``` | |||||
### Inferring and Checking Patterns | |||||
Amulet, being a member of the ML family, does most data processing | |||||
through _pattern matching_, and so, the patterns also need to be type | |||||
checked. | |||||
The pattern grammar is simple: it's made up of 6 constructors, while | |||||
expressions are described by over twenty constructors. | |||||
Here, the bidirectional approach to inference starts to shine. It is | |||||
possible to have different behaviours for when the type of the | |||||
pattern (or, at least, some skeleton describing that type) is known | |||||
and for when it is not, and such a type must be produced from the | |||||
pattern alone. | |||||
In an unification-based system like ours, the inference judgement can be | |||||
recovered from the checking judgement by checking against a fresh type | |||||
variable. | |||||
```haskell | |||||
inferPattern p = do | |||||
x <- freshTV | |||||
(p', binds) <- checkPattern p x | |||||
pure (p', x, binds) | |||||
``` | |||||
Inferring patterns produces three things: an annotated pattern, since | |||||
syntax trees after type checking carry their types; the type of values | |||||
that pattern matches; and a list of variables the pattern binds. | |||||
Checking omits returning the type, and yields only the annotated syntax | |||||
tree and the list of bindings. | |||||
As a special case, inferring patterns with type signatures overrides the | |||||
checking behaviour. The stated type is kind-checked (to verify its | |||||
integrity and to produce an annotated tree), then verified to be a | |||||
subtype of the inferred type for that pattern. | |||||
```haskell | |||||
inferPattern pat@(PType p t ann) = do | |||||
(p', pt, vs) <- inferPattern p | |||||
(t', _) <- resolveKind t | |||||
_ <- subsumes pat t' pt -- t' ≤ pt | |||||
case p' of | |||||
Capture v _ -> pure (PType p' t' (ann, t'), t', [(v, t')]) | |||||
_ -> pure (PType p' t' (ann, t'), t', vs) | |||||
``` | |||||
Checking patterns is where the fun actually happens. Checking `Wildcard`s | |||||
and `Capture`s is pretty much identical, except the latter actually | |||||
expands the capture list. | |||||
```haskell | |||||
checkPattern (Wildcard ann) ty = pure (Wildcard (ann, ty), []) | |||||
checkPattern (Capture v ann) ty = | |||||
pure (Capture (TvName v) (ann, ty), [(TvName v, ty)]) | |||||
``` | |||||
Checking a `Destructure` looks up the type of the constructor in the | |||||
environment, possibly instancing it, and does one of two things, | |||||
depending on whether or not the destructuring did not have an inner | |||||
pattern. | |||||
```haskell | |||||
checkPattern ex@(Destructure con ps ann) ty = | |||||
case ps of | |||||
``` | |||||
- If there was no inner pattern, then the looked-up type is unified with | |||||
the "goal" type - the one being checked against. | |||||
```haskell | |||||
Nothing -> do | |||||
pty <- lookupTy con | |||||
_ <- unify ex pty ty | |||||
pure (Destructure (TvName con) Nothing (ann, pty), []) | |||||
``` | |||||
- If there _was_ an inner pattern, we proceed by decomposing the type | |||||
looked up from the environment. The inner pattern is checked against the | |||||
_domain_ of the constructor's type, while the "goal" gets unified with | |||||
the _co-domain_. | |||||
```haskell | |||||
Just p -> do | |||||
(c, d) <- decompose ex _TyArr =<< lookupTy con | |||||
(ps', b) <- checkPattern p c | |||||
_ <- unify ex ty d | |||||
``` | |||||
Checking tuple patterns is a bit of a mess. This is because of a | |||||
mismatch between how they're written and how they're typed: a 3-tuple | |||||
pattern (and expression!) is written like `(a, b, c)`, but it's _typed_ | |||||
like `a * (b * c)`. There is a local helper that incrementally converts | |||||
between the representations by repeatedly decomposing the goal type. | |||||
```haskell | |||||
checkPattern pt@(PTuple elems ann) ty = | |||||
let go [x] t = (:[]) <$> checkPattern x t | |||||
go (x:xs) t = do | |||||
(left, right) <- decompose pt _TyTuple t | |||||
(:) <$> checkPattern x left <*> go xs right | |||||
go [] _ = error "malformed tuple in checkPattern" | |||||
``` | |||||
Even more fun is the `PTuple` constructor is woefully overloaded: One | |||||
with an empty list of children represents matching against `unit`{.ml}. | |||||
One with a single child is equivalent to the contained pattern; Only one | |||||
with more than two contained patterns makes a proper tuple. | |||||
```haskell | |||||
in case elems of | |||||
[] -> do | |||||
_ <- unify pt ty tyUnit | |||||
pure (PTuple [] (ann, tyUnit), []) | |||||
[x] -> checkPattern x ty | |||||
xs -> do | |||||
(ps, concat -> binds) <- unzip <$> go xs ty | |||||
pure (PTuple ps (ann, ty), binds) | |||||
``` | |||||
### Inferring and Checking Expressions | |||||
Expressions are incredibly awful and the bane of my existence. There are | |||||
18 distinct cases of expression to consider, a number which only seems | |||||
to be going up with modules and the like in the pipeline; this | |||||
translates to 24 distinct cases in the type checker to account for all | |||||
of the possibilities. | |||||
As with patterns, expression checking is bidirectional; and, again, | |||||
there are a lot more checking cases then there are inference cases. So, | |||||
let's start with the latter. | |||||
#### Inferring Expressions | |||||
Inferring variable references makes use of instantiation to generate | |||||
fresh type variables for each top-level universal quantifier in the | |||||
type. These fresh variables will then be either bound to something by | |||||
the solver or universally quantified over in case they escape. | |||||
Since Amulet is desugared into a core language resembling predicative | |||||
System F, variable uses also lead to the generation of corresponding | |||||
type applications - one for each eliminated quantified variable. | |||||
```haskell | |||||
infer expr@(VarRef k a) = do | |||||
(inst, old, new) <- lookupTy' k | |||||
if Map.null inst | |||||
then pure (VarRef (TvName k) (a, new), new) | |||||
else mkTyApps expr inst old new | |||||
``` | |||||
Functions, strangely enough, have both checking _and_ inference | |||||
judgements: which is used impacts what constraints will be generated, | |||||
and that may end up making type inference more efficient (by allocating | |||||
less, or correspondingly spending less time in the solver). | |||||
The pattern inference judgement is used to compute the type and bindings | |||||
of the function's formal parameter, and the body is inferred in the | |||||
context extended with those bindings; Then, a function type is | |||||
assembled. | |||||
```haskell | |||||
infer (Fun p e an) = do | |||||
(p', dom, ms) <- inferPattern p | |||||
(e', cod) <- extendMany ms $ infer e | |||||
pure (Fun p' e' (an, TyArr dom cod), TyArr dom cod) | |||||
``` | |||||
Literals are pretty self-explanatory: Figuring their types boils down to | |||||
pattern matching. | |||||
```haskell | |||||
infer (Literal l an) = pure (Literal l (an, ty), ty) where | |||||
ty = case l of | |||||
LiInt{} -> tyInt | |||||
LiStr{} -> tyString | |||||
LiBool{} -> tyBool | |||||
LiUnit{} -> tyUnit | |||||
``` | |||||
The inference judgement for _expressions_ with type signatures is very similar | |||||
to the one for patterns with type signatures: The type is kind-checked, | |||||
then compared against the inferred type for that expression. Since | |||||
expression syntax trees also need to be annotated, they are `correct`ed | |||||
here. | |||||
```haskell | |||||
infer expr@(Ascription e ty an) = do | |||||
(ty', _) <- resolveKind ty | |||||
(e', et) <- infer e | |||||
_ <- subsumes expr ty' et | |||||
pure (Ascription (correct ty' e') ty' (an, ty'), ty') | |||||
``` | |||||
There is also a judgement for turning checking into inference, again by | |||||
making a fresh type variable. | |||||
```haskell | |||||
infer ex = do | |||||
x <- freshTV | |||||
ex' <- check ex x | |||||
pure (ex', x) | |||||
``` | |||||
#### Checking Expressions | |||||
Our rule for eliminating ∀s was adapted from the paper [Complete | |||||
and Easy Bidirectional Typechecking for Higher-Rank Polymorphism]. | |||||
Unlike in that paper, however, we do not have explicit _existential | |||||
variables_ in contexts, and so must check expressions against | |||||
deeply-skolemised types to eliminate the universal quantifiers. | |||||
[Complete and Easy Bidirectional Typechecking for Higher-Rank | |||||
Polymorphism]: https://www.cl.cam.ac.uk/~nk480/bidir.pdf | |||||
```haskell | |||||
check e ty@TyForall{} = do | |||||
e' <- check e =<< skolemise ty | |||||
pure (correct ty e') | |||||
``` | |||||
If the expression is checked against a deeply skolemised version of the | |||||
type, however, it will be tagged with that, while it needs to be tagged | |||||
with the universally-quantified type. So, it is `correct`ed. | |||||
Amulet has rudimentary support for _typed holes_, as in dependently | |||||
typed languages and, more recently, GHC. Since printing the type of | |||||
holes during type checking would be entirely uninformative due to | |||||
half-solved types, reporting them is deferred to after checking. | |||||
Of course, holes must still have checking behaviour: They take whatever | |||||
type they're checked against. | |||||
```haskell | |||||
check (Hole v a) t = pure (Hole (TvName v) (a, t)) | |||||
``` | |||||
Checking functions is as easy as inferring them: The goal type is split | |||||
between domain and codomain; the pattern is checked against the domain, | |||||
while the body is checked against the codomain, with the pattern's | |||||
bindings in scope. | |||||
```haskell | |||||
check ex@(Fun p b a) ty = do | |||||
(dom, cod) <- decompose ex _TyArr ty | |||||
(p', ms) <- checkPattern p dom | |||||
Fun p' <$> extendMany ms (check b cod) <*> pure (a, ty) | |||||
``` | |||||
Empty `begin end` blocks are an error. | |||||
``` | |||||
check ex@(Begin [] _) _ = throwError (EmptyBegin ex) | |||||
``` | |||||
`begin ... end` blocks with at least one expression are checked by | |||||
inferring the types of every expression but the last, and then checking | |||||
the last expression in the block against the goal type. | |||||
```haskell | |||||
check (Begin xs a) t = do | |||||
let start = init xs | |||||
end = last xs | |||||
start' <- traverse (fmap fst . infer) start | |||||
end' <- check end t | |||||
pure (Begin (start' ++ [end']) (a, t)) | |||||
``` | |||||
`let`s are pain. Since all our `let`s are recursive by nature, they must | |||||
be checked, including all the bound variables, in a context where the | |||||
types of every variable bound there are already available; To figure | |||||
this out, however, we first need to infer the type of every variable | |||||
bound there. | |||||
If that strikes you as "painfully recursive", you're right. This is | |||||
where the unification-based nature of our type system saved our butts: | |||||
Each bound variable in the `let` gets a fresh type variable, the context | |||||
is extended and the body checked against the goal. | |||||
The function responsible for inferring and solving the types of | |||||
variables is `inferLetTy`. It keeps an accumulating association list to | |||||
check the types of further bindings as they are figured out, one by one, | |||||
then uses the continuation to generalise (or not) the type. | |||||
```haskell | |||||
check (Let ns b an) t = do | |||||
ks <- for ns $ \(a, _, _) -> do | |||||
tv <- freshTV | |||||
pure (TvName a, tv) | |||||
extendMany ks $ do | |||||
(ns', ts) <- inferLetTy id ks (reverse ns) | |||||
extendMany ts $ do | |||||
b' <- check b t | |||||
pure (Let ns' b' (an, t)) | |||||
``` | |||||
We have decided to take [the advice of Vytiniotis, Peyton Jones, and | |||||
Schrijvers], and refrain from generalising lets, except at top-level. | |||||
This is why `inferLetTy` gets given `id` when checking terms. | |||||
[the advice of Vytiniotis, Peyton Jones, and Schrijvers]: https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/tldi10-vytiniotis.pdf | |||||
The judgement for checking `if` expressions is what made me stick to | |||||
bidirectional type checking instead of fixing out variant of Algorithm | |||||
W. The condition is checked against the boolean type, while both | |||||
branches are checked against the goal. | |||||
```haskell | |||||
check (If c t e an) ty = If <$> check c tyBool | |||||
<*> check t ty | |||||
<*> check e ty | |||||
<*> pure (an, ty) | |||||
``` | |||||
it is not possible, in general, to recover the type of a function | |||||
at an application site, we infer it; The argument given is checked | |||||
against that function's domain and the codomain is unified with the | |||||
goal type. | |||||
```haskell | |||||
check ex@(App f x a) ty = do | |||||
(f', (d, c)) <- secondA (decompose ex _TyArr) =<< infer f | |||||
App f' <$> check x d <*> fmap (a,) (unify ex ty c) | |||||
``` | |||||
To check `match`, the type of what's being matched against is first | |||||
inferred because, unlike application where _some_ recovery is possible, | |||||
we can not recover the type of matchees from the type of branches _at | |||||
all_. | |||||
```haskell | |||||
check (Match t ps a) ty = do | |||||
(t', tt) <- infer t | |||||
``` | |||||
Once we have the type of the matchee in hands, patterns can be checked | |||||
against that. The branches are then each checked against the goal type. | |||||
```haskell | |||||
ps' <- for ps $ \(p, e) -> do | |||||
(p', ms) <- checkPattern p tt | |||||
(,) <$> pure p' <*> extendMany ms (check e ty) | |||||
``` | |||||
Checking binary operators is like checking function application twice. | |||||
Very boring. | |||||
```haskell | |||||
check ex@(BinOp l o r a) ty = do | |||||
(o', to) <- infer o | |||||
(el, to') <- decompose ex _TyArr to | |||||
(er, d) <- decompose ex _TyArr to' | |||||
BinOp <$> check l el <*> pure o' | |||||
<*> check r er <*> fmap (a,) (unify ex d ty) | |||||
``` | |||||
Checking records and record extension is a hack, so I'm not going to | |||||
talk about them until I've cleaned them up reasonably in the codebase. | |||||
Record access, however, is very clean: we make up a type for the | |||||
row-polymorphic bit, and check against a record type built from the goal | |||||
and the key. | |||||
```haskell | |||||
check (Access rc key a) ty = do | |||||
rho <- freshTV | |||||
Access <$> check rc (TyRows rho [(key, ty)]) | |||||
<*> pure key <*> pure (a, ty) | |||||
``` | |||||
Checking tuple expressions involves a local helper much like checking | |||||
tuple patterns. The goal type is recursively decomposed and made to line | |||||
with the expression being checked. | |||||
```haskell | |||||
check ex@(Tuple es an) ty = Tuple <$> go es ty <*> pure (an, ty) where | |||||
go [] _ = error "not a tuple" | |||||
go [x] t = (:[]) <$> check x t | |||||
go (x:xs) t = do | |||||
(left, right) <- decompose ex _TyTuple t | |||||
(:) <$> check x left <*> go xs right | |||||
``` | |||||
And, to finish, we have a judgement for turning inference into checking. | |||||
```haskell | |||||
check e ty = do | |||||
(e', t) <- infer e | |||||
_ <- subsumes e ty t | |||||
pure e' | |||||
``` | |||||
### Conclusion | |||||
I like the new type checker: it has many things you'd expect from a | |||||
typed lambda calculus, such as η-contraction preserving typability, and | |||||
substitution of `let`{.ocaml}-bound variables being generally | |||||
admissable. | |||||
Our type system is fairly complex, what with rank-N types and higher | |||||
kinded polymorphism, so inferring programs under it is a bit of a | |||||
challenge. However, I am fairly sure the only place that demands type | |||||
annotations are higher-ranked _parameters_: uses of higher-rank | |||||
functions are checked without the need for annotations. | |||||
Check out [Amulet] the next time you're looking for a typed functional | |||||
programming language that still can't compile to actual executables. | |||||
[Amulet]: https://github.com/zardyh/amulet |
@ -0,0 +1,286 @@ | |||||
--- | |||||
title: Amulet and Language Safety | |||||
date: March 14, 2018 | |||||
--- | |||||
Ever since its inception, Amulet has strived to be a language that | |||||
_guarantees_ safety, to some extent, with its strong, static, inferred | |||||
type system. Through polymorphism we gain the concept of | |||||
_parametricity_, as explained in Philip Wadler's [Theorems for Free]: a | |||||
function's behaviour does not depend on the instantiations you perform. | |||||
However, the power-to-weight ratio of these features quickly plummets, | |||||
as every complicated type system extension makes inference rather | |||||
undecidable, which in turn mandates more and more type annotations. Of | |||||
the complex extensions I have read about, three struck me as | |||||
particularly elegant, and I have chosen to implement them all in Amulet: | |||||
- Generalised Algebraic Data Types, which this post is about; | |||||
- Row Polymorphism, which allows being precise about which structure | |||||
fields a function uses; and | |||||
- Rank-N types, which enables the implementation of many concepts | |||||
including monadic regions. | |||||
Both GADTs and rank-N types are in the "high weight" category: inference | |||||
in the presence of both is undecidable. Adding support for the latter | |||||
(which laid the foundations for the former) is what drove me to re-write | |||||
the type checker, a crusade detailed in [my last post]. | |||||
Of course, in the grand scheme of things, some languages provide way | |||||
more guarantees than Amulet: For instance, Rust, with its lifetime | |||||
system, can prove that code is memory-safe at compile time; | |||||
Dependently-typed languages such as Agda and Idris can express a lot of | |||||
invariants in their type system, but inference is completely destroyed. | |||||
Picking which features you'd like to support is a game of | |||||
tradeoffs---all of them have benefits, but some have exceedingly high | |||||
costs. | |||||
Amulet was originally based on a very traditional, HM-like type system | |||||
with support for row polymorphism. The addition of rank-N polymorphism | |||||
and GADTs instigated the move to a bidirectional system, which in turn | |||||
provided us with the ability to experiment with a lot more type system | |||||
extensions (for instance, linear types)---in pursuit of more guarantees | |||||
like parametricity. | |||||
GADTs | |||||
===== | |||||
In a sense, generalised ADTs are a "miniature" version of the inductive | |||||
families one would find in dependently-typed programming (and, indeed, | |||||
Amulet can type-check _some_ uses of length-indexed vectors, although | |||||
the lack of type-level computation is a showstopper). They allow | |||||
non-uniformity in the return types of constructors, by packaging | |||||
"coercions" along with the values; pattern matching, then, allows these | |||||
coercions to influence the solving of particular branches. | |||||
Since this is an introduction to indexed types, I am legally obligated | |||||
to present the following three examples: the type of equality witnesses | |||||
between two other types; higher-order abstract syntax, the type of | |||||
well-formed terms in some language; and _vectors_, the type of linked | |||||
lists with statically-known lengths. | |||||
#### Equality | |||||
As is tradition in intuitionistic type theory, we define equality by | |||||
postulating (that is, introducing a _constructor_ witnessing) | |||||
reflexivity: anything is equal to itself. Symmetry and transitivity can | |||||
be defined as ordinary pattern-matching functions. However, this | |||||
demonstrates the first (and main) shortcoming of our implementation: | |||||
Functions which perform pattern matching on generalised constructors | |||||
_must_ have explicitly stated types.[^1] | |||||
```ocaml | |||||
type eq 'a 'b = | |||||
| Refl : eq 'a 'a ;; | |||||
let sym (Refl : eq 'a 'b) : eq 'b 'a = Refl ;; | |||||
let trans (Refl : eq 'a 'b) (Refl : eq 'b 'c) : eq 'a 'c = Refl ;; | |||||
``` | |||||
Equality, when implemented like this, is conventionally used to | |||||
implement substitution: If there exists a proof that `a` and `b` are | |||||
equal, any `a` may be treated as a `b`. | |||||
```ocaml | |||||
let subst (Refl : eq 'a 'b) (x : 'a) : 'b = x ;; | |||||
``` | |||||
Despite `a` and `b` being distinct, _rigid_ type variables, matching on | |||||
`Refl` allows the constraint solver to treat them as equal. | |||||
#### Vectors | |||||
```ocaml | |||||
type z ;; (* the natural zero *) | |||||
type s 'k ;; (* the successor of a number *) | |||||
type vect 'n 'a = (* vectors of length n *) | |||||
| Nil : vect z 'a | |||||
| Cons : 'a * vect 'k 'a -> vect (s 'k) 'a | |||||
``` | |||||
Parametricity can tell us many useful things about functions. For | |||||
instance, all closed, non-looping inhabitants of the type `forall 'a. 'a | |||||
-> 'a` are operationally the identity function. However, expanding the | |||||
type grammar tends to _weaken_ parametricity before making it stronger. | |||||
Consider the type `forall 'a. list 'a -> list 'a`{.ocaml}---it has | |||||
several possible implementations: One could return the list unchanged, | |||||
return the empty list, duplicate every element in the list, drop some | |||||
elements around the middle, among _many_ other possible behaviours. | |||||
Indexed types are beyond the point of weakening parametricity, and start | |||||
to make it strong again. Consider a function of type `forall 'a 'n. ('a | |||||
-> 'a -> ordering) -> vect 'n 'a -> vect 'n 'a`{.ocaml}---by making the | |||||
length of the vector explicit in the type, and requiring it to be kept | |||||
the same, we have ruled out any implementations that drop or duplicate | |||||
elements. A win, for sure, but at what cost? An implementation of | |||||
insertion sort for traditional lists looks like this, when implemented | |||||
in Amulet: | |||||
```ocaml | |||||
let insert_sort cmp l = | |||||
let insert e tl = | |||||
match tl with | |||||
| Nil -> Cons (e, Nil) | |||||
| Cons (h, t) -> match cmp e h with | |||||
| Lt -> Cons (e, Cons (h, t)) | |||||
| Gt -> Cons (h, insert e t) | |||||
| Eq -> Cons (e, Cons (h, t)) | |||||
and go l = match l with | |||||
| Nil -> Nil | |||||
| Cons (x, xs) -> insert x (go xs) | |||||
in go l ;; | |||||
``` | |||||
The implementation for vectors, on the other hand, is full of _noise_: | |||||
type signatures which we would rather not write, but are forced to by | |||||
the nature of type systems. | |||||
```ocaml | |||||
let insert_sort (cmp : 'a -> 'a -> ordering) (v : vect 'n 'a) : vect 'n 'a = | |||||
let insert (e : 'a) (tl : vect 'k 'a) : vect (s 'k) 'a = | |||||
match tl with | |||||
| Nil -> Cons (e, Nil) | |||||
| Cons (h, t) -> match cmp e h with | |||||
| Lt -> Cons (e, Cons (h, t)) | |||||
| Gt -> Cons (h, insert e t) | |||||
| Eq -> Cons (e, Cons (h, t)) | |||||
and go (v : vect 'k 'a) : vect 'k 'a = match v with | |||||
| Nil -> Nil | |||||
| Cons (x, xs) -> insert x (go xs) | |||||
in go v ;; | |||||
``` | |||||
These are not quite theorems for free, but they are theorems for quite | |||||
cheap. | |||||
#### Well-Typed Terms | |||||
```ocaml | |||||
type term 'a = | |||||
| Lit : int -> term int | |||||
| Fun : ('a -> 'b) -> term ('a -> 'b) | |||||
| App : term ('a -> 'b) * term 'a -> term 'b | |||||
``` | |||||
In much the same way as the vector example, which forced us to be | |||||
correct with our functions, GADTs can also be applied in making us be | |||||
correct with our _data_. The type `term 'a` represents well typed terms: | |||||
the interpretation of such a value need not be concerned with runtime | |||||
errors at all, by leveraging the Amulet type system to make sure its | |||||
inputs are correct. | |||||
``` | |||||
let eval (x : term 'a) : 'a = | |||||
match x with | |||||
| Lit l -> l | |||||
| Fun f -> f | |||||
| App (f, x) -> (eval f) (eval x) | |||||
``` | |||||
While equalities let us bend the type system to our will, vectors and | |||||
terms let _the type system_ help us, in making incorrect implementations | |||||
compile errors. | |||||
Rank-N Types | |||||
============ | |||||
Rank-N types are quite useful, I'm sure. To be quite honest, they were | |||||
mostly implemented in preparation for GADTs, as the features have some | |||||
overlap. | |||||
A use case one might imagine if Amulet had notation for monads would be | |||||
[an implementation of the ST monad][^2], which prevents mutable state | |||||
from escaping by use of rank-N types. `St.run action` is a well-typed | |||||
program, since `action` has type `forall 's. st 's int`, but `St.run | |||||
action'` is not, since that has type `forall 's. st 's (ref 's int)`. | |||||
```ocaml | |||||
let action = | |||||
St.bind (alloc_ref 123) (fun var -> | |||||
St.bind (update_ref var (fun x -> x * 2)) (fun () -> | |||||
read_ref var)) | |||||
and action' = | |||||
St.bind (alloc_ref 123) (fun var -> | |||||
St.bind (update_ref var (fun x -> x * 2)) (fun () -> | |||||
St.pure var)) | |||||
``` | |||||
Conclusion | |||||
========== | |||||
Types are very powerful things. A powerful type system helps guide the | |||||
programmer by allowing the compiler to infer more and more of the | |||||
_program_---type class dictionaries in Haskell, and as a more extreme | |||||
example, proof search in Agda and Idris. | |||||
However, since the industry has long been dominated by painfully | |||||
first-order, very verbose type systems like those of Java and C#, it's | |||||
no surprise that many programmers have fled to dynamically typed | |||||
languages like ~~Go~~ Python---a type system needs to be fairly complex | |||||
before it gets to being expressive, and it needs to be _very_ complex to | |||||
get to the point of being useful. | |||||
Complexity and difficulty, while often present together, are not | |||||
nescessarily interdependent: Take, for instance, Standard ML. The | |||||
first-order parametric types might seem restrictive when used to a | |||||
system with like Haskell's (or, to some extent, Amulet's[^3]), but they | |||||
actually allow a lot of flexibility, and do not need many annotations at | |||||
all! They are a sweet spot in the design space. | |||||
If I knew more about statistics, I'd have some charts here correlating | |||||
programmer effort with implementor effort, and also programmer effort | |||||
with the extent of properties one can state as types. Of course, these | |||||
are all fuzzy metrics, and no amount of statistics would make those | |||||
charts accurate, so have my feelings in prose instead: | |||||
- Implementing a dynamic type system is _literally_ no effort. No effort | |||||
needs to be spent writing an inference engine, or a constraint solver, | |||||
or a renamer, or any other of the very complex moving parts of a type | |||||
checker. | |||||
However, the freedom they allow the implementor they take away from | |||||
the programmer, by forcing them to keep track of the types of | |||||
everything mentally. Even those that swear by dynamic types can not | |||||
refute the claim that data has shape, and having a compiler that can | |||||
make sure your shapes line up so you can focus on programming is a | |||||
definite advantage. | |||||
- On the opposite end of the spectrum, implementing a dependent type | |||||
system is a _lot_ of effort. Things quickly diverge into undecidability | |||||
before you even get to writing a solver---and higher order unification, | |||||
which has a tendency to pop up, is undecidable too. | |||||
While the implementor is subject to an endless stream of suffering, | |||||
the programmer is in some ways free and some ways constrained. They | |||||
can now express lots of invariants in the type system, from | |||||
correctness of `sort` to correctness of [an entire compiler] or an | |||||
[operating system kernel], but they must also state very precise types | |||||
for everything. | |||||
- In the middle lies a land of convenient programming without an | |||||
endlessly suffering compiler author, a land first explored by the ML | |||||
family with its polymorphic, inferred type system. | |||||
This is clearly the sweet spot. Amulet leans slightly to the | |||||
dependently type end of the spectrum, but can still infer the types | |||||
for many simple and complex programs without any annotations-the | |||||
programs that do not use generalised algebraic data types or rank-N | |||||
polymorphism. | |||||
[Theorems for Free]: https://people.mpi-sws.org/~dreyer/tor/papers/wadler.pdf | |||||
[my last post]: /posts/2018-02-18.html | |||||
[an implementation of the ST monad]: https://txt.amelia.how/st-monad.ml.html | |||||
[an entire compiler]: http://compcert.inria.fr/ | |||||
[operating system kernel]: https://sel4.systems/ | |||||
[^1]: In reality, the details are fuzzier. To be precise, pattern | |||||
matching on GADTs only introduces an implication constraint when the | |||||
type checker is applying a checking judgement. In practice, this means | |||||
that at least the return type must be explicitly annotated. | |||||
[^2]: Be warned that the example does not compile unless you remove the | |||||
modules, since our renamer is currently a bit daft. | |||||
[^3]: This is _my_ blog, and I'm allowed to brag about my projects, damn | |||||
it. |
@ -0,0 +1,247 @@ | |||||
--- | |||||
title: GADTs and Amulet | |||||
date: March 27, 2018 | |||||
maths: true | |||||
--- | |||||
Dependent types are a very useful feature - the gold standard of | |||||
enforcing invariants at compile time. However, they are still very much | |||||
not practical, especially considering inference for unrestricted | |||||
dependent types is equivalent to higher-order unification, which was | |||||
proven to be undecidable. | |||||
Fortunately, many of the benefits that dependent types bring aren't | |||||
because of dependent products themselves, but instead because of | |||||
associated features commonly present in those programming languages. One | |||||
of these, which also happens to be especially easy to mimic, are | |||||
_inductive families_, a generalisation of inductive data types: instead | |||||
of defining a single type inductively, one defines an entire _family_ of | |||||
related types. | |||||
Many use cases for inductive families are actually instances of a rather | |||||
less general concept, that of generalised algebraic data types, or | |||||
GADTs: Contrary to the indexed data types of full dependently typed | |||||
languages, these can and are implemented in several languages with | |||||
extensive inference, such as Haskell, OCaml and, now, Amulet. | |||||
Before I can talk about their implementation, I am legally obligated to | |||||
present the example of _length indexed vectors_, linked structures whose | |||||
size is known at compile time---instead of carrying around an integer | |||||
representing the number of elements, it is represented in the type-level | |||||
by a Peano[^1] natural number, as an _index_ to the vector type. By | |||||
universally quantifying over the index, we can guarantee by | |||||
parametricity[^2] that functions operating on these don't do inappropriate | |||||
things to the sizes of vectors. | |||||
```ocaml | |||||
type z ;; | |||||
type s 'k ;; | |||||
type vect 'n 'a = | |||||
| Nil : vect z 'a | |||||
| Cons : 'a * vect 'k 'a -> vect (s 'k) 'a | |||||
``` | |||||
Since the argument `'n` to `vect` (its length) varies with the constructor one | |||||
chooses, we call it an _index_; On the other hand, `'a`, being uniform over all | |||||
constructors, is called a _parameter_ (because the type is _parametric_ over | |||||
the choice of `'a`). These definitions bake the measure of length into | |||||
the type of vectors: an empty vector has length 0, and adding an element | |||||
to the front of some other vector increases the length by 1. | |||||
Matching on a vector reveals its index: in the `Nil` case, it's possible | |||||
to (locally) conclude that it had length `z`. Meanwhile, the `Cons` case | |||||
lets us infer that the length was the successor of some other natural | |||||
number, `s 'k`, and that the tail itself has length `'k`. | |||||
If one were to write a function to `map` a function over a `vect`or, | |||||
they would be bound by the type system to write a correct implementation | |||||
- well, either that or going out of their way to make a bogus one. It | |||||
would be possible to enforce total correctness of a function such as | |||||
this one, by adding linear types and making the vector parameter linear. | |||||
```ocaml | |||||
let map (f : 'a -> 'b) (xs : vect 'n 'a) : vect 'n 'b = | |||||
match xs with | |||||
| Nil -> Nil | |||||
| Cons (x, xs) -> Cons (f x, map f xs) ;; | |||||
``` | |||||
If we were to, say, duplicate every element in the list, an error would | |||||
be reported. Unlike some others, this one is not very clear, and it | |||||
definitely could be improved. | |||||
``` | |||||
Occurs check: The type variable jx | |||||
occurs in the type s 'jx | |||||
· Arising from use of the expression | |||||
Cons (f x, Cons (f x, map f xs)) | |||||
│ | |||||
33 │ | Cons (x, xs) -> Cons (f x, Cons (f x, map f xs)) ;; | |||||
│ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |||||
``` | |||||
This highlights the essence of GADTs: pattern matching on them reveals | |||||
equalities about types that the solver can later exploit. This is what | |||||
allows the programmer to write functions that vary their return types | |||||
based on their inputs - a very limited form of type-term dependency, | |||||
which brings us ever closer to the Calculus of Constructions corner of | |||||
Barendregt's lambda cube[^3]. | |||||
The addition of generalised algebraic data types has been in planning | |||||
for over two years---it was in the original design document. In a | |||||
mission that not even my collaborator noticed, all of the recently-added | |||||
type system and IR features were directed towards enabling the GADT | |||||
work: bidirectional type checking, rank-N polymorphism and coercions. | |||||
All of these features had cover stories: higher-ranked polymorphism was | |||||
motivated by monadic regions; bidirectional type checking was motivated | |||||
by the aforementioned polymorphism; and coercions were motivated by | |||||
newtype optimisation. But, in reality, it was a conspiracy to make GADTs | |||||
possible: having support for these features simplified implementing our | |||||
most recent form of fancy types, and while adding all of these in one go | |||||
would be possible, doing it incrementally was a lot saner. | |||||
While neither higher-ranked types nor GADTs technically demand a | |||||
bidirectional type system, implementing them with such a specification | |||||
is considerably easier, removing the need for workarounds such as boxy | |||||
types and a distinction between rigid/wobbly type variables. Our | |||||
algorithm for GADT inference rather resembles Richard Eisenberg's | |||||
[Bake]{.textsc}[^4], in that it only uses local equalities in _checking_ | |||||
mode. | |||||
Adding GADTs also lead directly to a rewrite of the solver, which now | |||||
has to work with _implication constraints_, of the form `(Q₁, ..., Qₙ) | |||||
=> Q`, which should be read as "Assuming `Q₁` through `Qₙ`, conclude | |||||
`Q`." Pattern matching on generalised constructors, in checking mode, | |||||
captures every constraint generated by checking the right-hand side of a | |||||
clause and captures that as an implication constraint, with all the | |||||
constructor-bound equalities as assumptions. As an example, this lets us | |||||
write a type-safe cast function: | |||||
```ocaml | |||||
type eq 'a 'b = Refl : eq 'a 'a | |||||
(* an inhabitant of eq 'a 'b is a proof that 'a and 'b are equal *) | |||||
let subst (Refl : eq 'a 'b) (x : 'a) : 'b = x ;; | |||||
``` | |||||
Unfortunately, to keep inference decidable, many functions that depend | |||||
on generalised pattern matching need explicit type annotations, to guide | |||||
the type checker. | |||||
When _checking_ the body of the function, namely the variable reference | |||||
`x`, the solver is working under an assumption `'a ~ 'b` (i.e., `'a` and | |||||
`'b` stand for the same type), which lets us unify the stated type of | |||||
`x`, namely `'a`, with the return type of the function, `'b`. | |||||
If we remove the local assumption, say, by not matching on | |||||
`Refl`{.haskell}, the solver will not be allowed to unify the two type | |||||
variables `'a` and `'b`, and an error message will be reported[^6]: | |||||
``` | |||||
examples/gadt/equality.ml[11:43 ..11:43]: error | |||||
Can not unify rigid type variable b with the rigid type variable a | |||||
· Note: the variable b was rigidified because of a type ascription | |||||
against the type forall 'a 'b. t 'a 'b -> 'a -> 'b | |||||
and is represented by the constant bq | |||||
· Note: the rigid type variable a, in turn, | |||||
was rigidified because of a type ascription | |||||
against the type forall 'a 'b. t 'a 'b -> 'a -> 'b | |||||
· Arising from use of the expression | |||||
x | |||||
│ | |||||
11 │ let subst (_ : t 'a 'b) (x : 'a) : 'b = x ;; | |||||
│ ~ | |||||
``` | |||||
Our intermediate language was also extended, from a straightforward | |||||
System F-like lambda calculus with type abstractions and applications, | |||||
to a System F<sub>C</sub>-like system with _coercions_, _casts_, and | |||||
_coercion abstraction_. Coercions are the evidence, produced by the | |||||
solver, that an expression is usable as a given type---GADT patterns | |||||
bind coercions like these, which are the "reification" of an implication | |||||
constraint. This lets us make type-checking on the intermediate language | |||||
fast and decidable[^5], as a useful sanity check. | |||||
The two new judgements for GADT inference correspond directly to new | |||||
cases in the `infer` and `check` functions, the latter of which I | |||||
present here for completeness. The simplicity of this change serves as | |||||
concrete evidence of the claim that bidirectional systems extend readily | |||||
to new, complex features, producing maintainable and readable code. | |||||
```haskell | |||||
check (Match t ps a) ty = do | |||||
(t, tt) <- infer t | |||||
ps <- for ps $ \(p, e) -> do | |||||
(p', ms, cs) <- checkPattern p tt | |||||
let tvs = Set.map unTvName (boundTvs p' ms) | |||||
(p',) <$> implies (Arm p e) tt cs | |||||
(local (typeVars %~ Set.union tvs) | |||||
(extendMany ms (check e ty))) | |||||
pure (Match t ps (a, ty)) | |||||
``` | |||||
This corresponds to the checking judgement for matches, presented below. | |||||
Note that in my (rather informal) theoretical presentation of Amulet | |||||
typing judgements, we present implication constraints as a lexical scope | |||||
of equalities conjoined with the scope of variables; Inference | |||||
judgements (with double right arrows, $\Rightarrow$) correspond to uses of | |||||
`infer`, pattern checking judgements ($\Leftarrow_\text{pat}$) | |||||
correspond to `checkPattern`, which also doubles as $\mathtt{binds}$ and | |||||
$\mathtt{cons}$, and the main checking judgement $\Leftarrow$ is the | |||||
function `check`. | |||||
$$ | |||||
\frac{\Gamma; \mathscr{Q} \vdash e \Rightarrow \tau | |||||
\quad \Gamma \vdash p_i \Leftarrow_\text{pat} \tau | |||||
\quad \Gamma, \mathtt{binds}(p_i); \mathscr{Q}, \mathtt{cons}(p_i) | |||||
\vdash e_i \Leftarrow \sigma} | |||||
{\Gamma; \mathscr{Q} \vdash \mathtt{match}\ e\ \mathtt{with}\ \{p_i \to | |||||
e_i\} \Leftarrow \sigma} | |||||
$$ | |||||
Our implementation of the type checker is a bit more complex, because it | |||||
also does (some) elaboration and bookkeeping: tagging terms with types, | |||||
blaming type errors correctly, etc. | |||||
--- | |||||
This new, complicated feature was a lot harder to implement than | |||||
originally expected, but in the end it worked out. GADTs let us make the | |||||
type system _stronger_, while maintaining the decidable inference that | |||||
the non-fancy subset of the language enjoys. | |||||
The example presented here was the most boring one possible, mostly | |||||
because [two weeks ago] I wrote about their impact on the language's | |||||
ability to make things safer. | |||||
[^1]: Peano naturals are one particular formulation of the natural | |||||
numbers, which postulates that zero (denoted `z` above) is a natural | |||||
number, and any natural number's successor (denoted `s 'k` above) is | |||||
itself natural. | |||||
[^2]: This is one application of Philip Wadler's [Theorems for Free] | |||||
technique: given a (polymorphic) type of some function, we can derive | |||||
much of its behaviour. | |||||
[^3]: Amulet is currently somewhere on the edge between λ2 - the second | |||||
order lambda calculus, System F, and λP2, a system that allows | |||||
quantification over types and terms using the dependent product form, | |||||
which subsumes both the ∀ binder and the → arrow. Our lack of type | |||||
functions currently leaves us very far from the CoC. | |||||
[^4]: See [his thesis]. Our algorithm, of course, has the huge | |||||
simplification of not having to deal with full dependent types. | |||||
[^5]: Even if we don't do it yet---work is still ongoing to make the | |||||
type checker and solver sane. | |||||
[^6]: And quite a good one, if I do say so! The compiler | |||||
syntax highlights and pretty-prints both terms and types relevant to the | |||||
error, as you can see [here]. | |||||
[Theorems for Free]: http://homepages.inf.ed.ac.uk/wadler/topics/parametricity.html | |||||
[his thesis]: https://repository.brynmawr.edu/cgi/viewcontent.cgi?article=1074&context=compsci_pubs | |||||
[two weeks ago]: /posts/2018-03-14.html | |||||
[here]: https://i.amelia.how/68c4d.png |
@ -0,0 +1,304 @@ | |||||
--- | |||||
title: Amulet updates | |||||
date: August 11, 2018 | |||||
maths: true | |||||
--- | |||||
Jesus, it's been a while. Though my last post was almost 6 months ago | |||||
(give or take a few), I've been busy working on | |||||
[Amulet](https://github.com/tmpim/amulet), which continues to grow, | |||||
almost an eldritch abomination you try desperately, but fail, to kill. | |||||
Since my last post, Amulet has changed a ton, in noticeable and | |||||
not-so-noticeable ways. Here are the major changes to the compiler since | |||||
then. | |||||
Parser improvements | |||||
=================== | |||||
No language is good to use if it's inconvenient. So, in an effort to | |||||
make writing code more convenient, we've removed the need for `;;` after | |||||
top-level declarations, and added a _bunch_ of indentation sensitivity, | |||||
thus making several keywords optional: `begin`{.ocaml} and `end`{.ocaml} | |||||
are implicit in the body of a `fun`{.ocaml}, `match`{.ocaml}, or | |||||
`let`{.ocaml}, which has made those keywords almost entirely obsolete. | |||||
The body of a `let`{.ocaml} also need not be preceded by `in`{.ocaml} if | |||||
meaning is clear from indentation. | |||||
To demonstrate, where you would have | |||||
```ocaml | |||||
let foo = | |||||
let bar = fun x -> begin | |||||
a; | |||||
b; | |||||
c | |||||
end in begin | |||||
bar (); | |||||
bar 1; | |||||
end ;; | |||||
``` | |||||
One can now write | |||||
```ocaml | |||||
let foo = | |||||
let bar = fun x -> | |||||
a | |||||
b | |||||
c | |||||
bar () | |||||
bar 1 | |||||
``` | |||||
Moreover, we've added shorthand syntax for building and destructuring | |||||
records: `{ x, y, z }`{.ocaml} is equivalent to `{ x = x, y = y, z = z | |||||
}`{.ocaml} in both pattern and expression position. | |||||
Changes to record typing | |||||
======================== | |||||
Whereas `{ x with a = b }` would extend the record `x` to contain a new | |||||
field `a` (with value `b`), it's now _monomorphic update_ of the record | |||||
`x`. That is: `x` must _already_ contain a field called `a`, with the | |||||
same type as `b`. | |||||
This lets you write a function for updating a field in a record, such as | |||||
the one below, which would previously be impossible. Supporting | |||||
polymorphic update is not a priority, but it'd be nice to have. The way | |||||
PureScript, another language with row-polymorphic records, implements | |||||
polymorphic update does not fit in with our constraint based type | |||||
system. A new type of constraint would have to be introduced | |||||
specifically for this, which while not impossible, is certainly | |||||
annoying. | |||||
```ocaml | |||||
let foo : forall 'row. { 'row | x : int } -> { 'row | x : int } = | |||||
fun r -> { r with x = r.x + 1 } | |||||
``` | |||||
The impossibility of supporting polymorphic update with regular | |||||
subsumption constraints $a \le b$ stems from the fact that, when faced | |||||
with such a constraint, the compiler must produce a coercion function | |||||
that turns _any_ $a$ into a $b$ _given the types alone_. This is | |||||
possible for, say, field narrowing---just pick out the fields you want | |||||
out of a bigger record---but not for update, since the compiler has no | |||||
way of turning, for instance, an `int`{.ocaml} into a `string`{.ocaml}. | |||||
Stronger support for Rank-N Types | |||||
================================= | |||||
Changes to how we handle subsumption have made it possible to store | |||||
polymorphic values in not only tuples, but also general records. For | |||||
instance: | |||||
```ocaml | |||||
let foo = { | |||||
apply : forall 'a 'b. ('a -> 'b) -> 'a -> 'b = | |||||
fun x -> x | |||||
} (* : { apply : forall 'a 'b. ('a -> 'b) -> 'a -> 'b } *) | |||||
``` | |||||
`foo`{.ocaml} is a record containing a single polymorphic application | |||||
function. It can be used like so: | |||||
```ocaml | |||||
let () = | |||||
let { apply } = foo | |||||
apply (+ 1) 1 | |||||
apply (fun x -> x) () | |||||
``` | |||||
Pattern-matching Let | |||||
==================== | |||||
A feature I've desired for a while now, `let` expressions (and | |||||
declarations!) can have a pattern as their left-hand sides, as | |||||
demonstrated above. These can be used for any ol' type, including for | |||||
cases where pattern matching would be refutable. I haven't gotten around | |||||
to actually implementing this yet, but in the future, pattern matching | |||||
in `let`s will be restricted to (arbitrary) product types only. | |||||
```ocaml | |||||
type option 'a = Some of 'a | None | |||||
type foo = Foo of { x : int } | |||||
let _ = | |||||
let Some x = ... (* forbidden *) | |||||
let Foo { x } = ... (* allowed *) | |||||
``` | |||||
Even more "in-the-future", if we ever get around to adding attributes | |||||
like OCaml's, the check for this could be bypassed by annotating the | |||||
declaration with (say) a `[@partial]`{.ocaml} attribute. | |||||
Unfortunately, since Amulet _is_ a strict language, these are a bit | |||||
limited: They can not be recursive in _any_ way, neither directly nor | |||||
indirectly. | |||||
```ocaml | |||||
(* type error *) | |||||
let (a, b) = (b, a) | |||||
(* similarly *) | |||||
let (a, b) = x | |||||
and x = (a, b) | |||||
``` | |||||
Cycle detection and warnings | |||||
============================ | |||||
A verification pass is run over the AST if type-checking succeeds, to | |||||
forbid illegal uses of recursion (strict language) and, as an additional | |||||
convenience, warn when local variables go unused. | |||||
For instance, this is [forbidden](/static/verify_error.png): | |||||
```ocaml | |||||
let f = (fun x -> x) g | |||||
and g = (fun x -> x) f | |||||
``` | |||||
And this gives a [warning](/static/verify_warn.png): | |||||
```ocaml | |||||
let _ = | |||||
let { a, b } = { a = 1, b = 2 } | |||||
() | |||||
``` | |||||
Plans for this include termination (and/or productivity) (as a | |||||
warning) and exhaustiveness checks (as an error). | |||||
No more `main` | |||||
============ | |||||
Since pattern-matching `let`{.ocaml}s are allowed at top-level, there's no more | |||||
need for `main`. Instead of | |||||
```ocaml | |||||
let main () = | |||||
... | |||||
``` | |||||
Just match on `()`{.ocaml} at top-level: | |||||
```ocaml | |||||
let () = | |||||
... | |||||
``` | |||||
This gets rid of the (not so) subtle unsoundness introduced by the code | |||||
generator having to figure out how to invoke `main`, and the type | |||||
checker not checking that `main` has type `unit -> 'a`{.ocaml}, and also | |||||
allows us to remove much of the silly special-casing around variables | |||||
called `main`. | |||||
```ocaml | |||||
let main x = x + 1 | |||||
(* attempt to perform arithmetic on a nil value *) | |||||
``` | |||||
Implicit Parameters | |||||
=================== | |||||
A bit like Scala's, these allow marking a function's parameter as | |||||
implicit and having the type checker find out what argument you meant | |||||
automatically. Their design is based on a bit of reading other compiler | |||||
code, and also the paper on modular implicits for OCaml. However, we do | |||||
not have a ML-style module system at all (much to my dismay, but it's | |||||
being worked on), much less first class modules. | |||||
Implicit parameters allow ad-hoc overloading based on dictionary passing | |||||
(like type classes, but with less inference). | |||||
```ocaml | |||||
type show 'a = Show of 'a -> string | |||||
let show ?(Show f) = f | |||||
let implicit show_string = | |||||
Show (fun x -> x) | |||||
let "foo" = show "foo" | |||||
``` | |||||
Here, unification makes it known that `show` is looking for an implicit | |||||
argument of type `show string`{.ocaml}, and the only possibility is | |||||
`show_string`{.ocaml}, which is what gets used. | |||||
Implicit laziness | |||||
================= | |||||
There is a built-in type `lazy : type -> type`{.ocaml}, a function | |||||
`force : forall 'a. lazy 'a -> 'a` for turning a thunk back into a | |||||
value, and a keyword `lazy` that makes a thunk out of any expression. | |||||
`lazy 'a`{.ocaml} and `'a` are mutual subtypes of eachother, and the | |||||
compiler inserts thunks/`force`{.ocaml}s where appropriate to make code | |||||
type-check. | |||||
```ocaml | |||||
let x && y = if x then force y else false | |||||
let () = | |||||
false && launch_the_missiles () | |||||
(* no missiles were launched in the execution of this program *) | |||||
``` | |||||
General refactoring | |||||
=================== | |||||
- Literal patterns are allowed for all types, and they're tested of | |||||
using `(==)`. | |||||
- Amulet only has one type constructor in its AST for all its kinds of | |||||
functions: `forall 'a. 'a -> int`{.ocaml}, `int -> string`{.ocaml} and `show string => | |||||
unit`{.ocaml} are all represented the same internally and disambiguated | |||||
by dependency/visibility flags. | |||||
- Polymorphic recursion is checked using "outline types", computed | |||||
before fully-fledged inference kicks in based solely on the shape of | |||||
values. This lets us handle the function below without an annotation on | |||||
its return type by computing that `{ count = 1 }` _must_ have type `{ | |||||
count : int }` beforehand. | |||||
Combined with the annotation on `x`, this gives us a "full" type | |||||
signature, which lets us use checking for `size`, allowing polymorphic | |||||
recursion to happen. | |||||
~~~{.ocaml} | |||||
type nested 'a = Nested of nested ('a * 'a) * nested ('a * 'a) | One of 'a | |||||
let size (x : nested 'a) = | |||||
match x with | |||||
| One _ -> { count = 1 } | |||||
| Nested (a, _) -> { count = 2 * (size a).count } | |||||
~~~ | |||||
- The newtype elimination pass was rewritten once and, unfortunately, | |||||
disabled, since it was broken with some touchy code. | |||||
- Operator declarations like `let 2 + 2 = 5 in 2 + 2` are admissible. | |||||
- Sanity of optimisations is checked at runtime by running a type | |||||
checker over the intermediate language programs after all optimisations | |||||
- Local opens are allowed, with two syntaxes: Either | |||||
`M.( x + y)`{.ocaml} (or `M.{ a, b }`{.ocaml}) or `let open M in x + | |||||
y`{.ocaml}. | |||||
- Amulet is inconsistent in some more ways, such as `type : type` | |||||
holding. | |||||
- There are no more kinds. | |||||
Conclusion | |||||
========== | |||||
This post was a bit short, and also a bit hurried. Some of the features | |||||
here deserve better explanations, but I felt like giving them an | |||||
_outline_ (haha) would be better than leaving the blag to rot (yet | |||||
again). | |||||
Watch out for a blog post regarding (at _least_) implicit parameters, | |||||
which will definitely involve the changes to subtyping involving | |||||
records/tuples. |
@ -0,0 +1,329 @@ | |||||
--- | |||||
title: Compositional Typing for ML | |||||
date: January 28, 2019 | |||||
maths: true | |||||
--- | |||||
\long\def\ignore#1{} | |||||
Compositional type-checking is a neat technique that I first saw in a | |||||
paper by Olaf Chitil[^1]. He introduces a system of principal _typings_, | |||||
as opposed to a system of principal _types_, as a way to address the bad | |||||
type errors that many functional programming languages with type systems | |||||
based on Hindley-Milner suffer from. | |||||
Today I want to present a small type checker for a core ML (with, | |||||
notably, no data types or modules) based roughly on the ideas from that | |||||
paper. This post is _almost_ literate Haskell, but it's not a complete | |||||
program: it only implements the type checker. If you actually want to | |||||
play with the language, grab the unabridged code | |||||
[here](https://github.com/zardyh/mld). | |||||
\ignore{ | |||||
\begin{code} | |||||
{-# LANGUAGE GeneralizedNewtypeDeriving, DerivingStrategies #-} | |||||
\end{code} | |||||
} | |||||
--- | |||||
\begin{code} | |||||
module Typings where | |||||
import qualified Data.Map.Merge.Strict as Map | |||||
import qualified Data.Map.Strict as Map | |||||
import qualified Data.Set as Set | |||||
import Data.Foldable | |||||
import Data.List | |||||
import Data.Char | |||||
import Control.Monad.Except | |||||
\end{code} | |||||
We'll begin, like always, by defining data structures for the language. | |||||
Now, this is a bit against my style, but this system (which I | |||||
shall call ML<sub>$\Delta$</sub> - but only because it sounds cool) is not | |||||
presented as a pure type system - there are separate grammars for terms | |||||
and types. Assume that `Var`{.haskell} is a suitable member of all the | |||||
appropriate type classes. | |||||
\begin{code} | |||||
data Exp | |||||
= Lam Var Exp | |||||
| App Exp Exp | |||||
| Use Var | |||||
| Let (Var, Exp) Exp | |||||
| Num Integer | |||||
deriving (Eq, Show, Ord) | |||||
data Type | |||||
= TyVar Var | |||||
| TyFun Type Type | |||||
| TyCon Var | |||||
deriving (Eq, Show, Ord) | |||||
\end{code} | |||||
ML<sub>$\Delta$</sub> is _painfully_ simple: It's a lambda calculus | |||||
extended with `Let`{.haskell} since there needs to be a demonstration of | |||||
recursion and polymorphism, and numbers so there can be a base type. It | |||||
has no unusual features - in fact, it doesn't have many features at all: | |||||
no rank-N types, GADTs, type classes, row-polymorphic records, tuples or | |||||
even algebraic data types. | |||||
I believe that a fully-featured programming language along the lines of | |||||
Haskell could be shaped out of a type system like this, however I am not | |||||
smart enough and could not find any prior literature on the topic. | |||||
Sadly, it seems that compositional typings aren't a very active area of | |||||
research at all. | |||||
The novelty starts to show up when we define data to represent the | |||||
different kinds of scopes that crop up. There are monomorphic | |||||
$\Delta$-contexts, which assign _types_ to names, and also polymorphic | |||||
$\Gamma$-contexts, that assign _typings_ to names instead. While we're | |||||
defining `newtype`{.haskell}s over `Map`{.haskell}s, let's also get | |||||
substitutions out of the way. | |||||
\begin{code} | |||||
newtype Delta = Delta (Map.Map Var Type) | |||||
deriving (Eq, Ord, Semigroup, Monoid) | |||||
newtype Subst = Subst (Map.Map Var Type) | |||||
deriving (Eq, Show, Ord, Monoid) | |||||
newtype Gamma = Gamma (Map.Map Var Typing) | |||||
deriving (Eq, Show, Ord, Semigroup, Monoid) | |||||
\end{code} | |||||
The star of the show, of course, are the typings themselves. A typing is | |||||
a pair of a (monomorphic) type $\tau$ and a $\Delta$-context, and in a | |||||
way it packages both the type of an expression and the variables it'll | |||||
use from the scope. | |||||
\begin{code} | |||||
data Typing = Typing Delta Type | |||||
deriving (Eq, Show, Ord) | |||||
\end{code} | |||||
With this, we're ready to look at how inference proceeds for | |||||
ML<sub>$\Delta$</sub>. I make no effort at relating the rules | |||||
implemented in code to anything except a vague idea of the rules in the | |||||
paper: Those are complicated, especially since they deal with a language | |||||
much more complicated than this humble calculus. In an effort not to | |||||
embarrass myself, I'll also not present anything "formal". | |||||
--- | |||||
\begin{code} | |||||
infer :: Exp -- The expression we're computing a typing for | |||||
-> Gamma -- The Γ context | |||||
-> [Var] -- A supply of fresh variables | |||||
-> Subst -- The ambient substitution | |||||
-> Either TypeError ( Typing -- The typing | |||||
, [Var] -- New variables | |||||
, Subst -- New substitution | |||||
) | |||||
\end{code} | |||||
There are two cases when dealing with variables. Either a typing is | |||||
present in the environment $\Gamma$, in which case we just use that | |||||
with some retouching to make sure type variables aren't repeated - this | |||||
takes the place of instantiating type schemes in Hindley-Milner. | |||||
However, a variable can also _not_ be in the environment $\Gamma$, in | |||||
which case we invent a fresh type variable $\alpha$[^2] for it and insist on | |||||
the monomorphic typing $\{ v :: \alpha \} \vdash \alpha$. | |||||
\begin{code} | |||||
infer (Use v) (Gamma env) (new:xs) sub = | |||||
case Map.lookup v env of | |||||
Just ty -> -- Use the typing that was looked up | |||||
pure ((\(a, b) -> (a, b, sub)) (refresh ty xs)) | |||||
Nothing -> -- Make a new one! | |||||
let new_delta = Delta (Map.singleton v new_ty) | |||||
new_ty = TyVar new | |||||
in pure (Typing new_delta new_ty, xs, sub) | |||||
\end{code} | |||||
Interestingly, this allows for (principal!) typings to be given even to | |||||
code containing free variables. The typing for the expression `x`, for | |||||
instance, is reported to be $\{ x :: \alpha \} \vdash \alpha$. Since | |||||
this isn't meant to be a compiler, there's no handling for variables | |||||
being out of scope, so the full inferred typings are printed on the | |||||
REPL- err, RETL? A read-eval-type-loop! | |||||
``` | |||||
> x | |||||
{ x :: a } ⊢ a | |||||
``` | |||||
Moreover, this system does not have type schemes: Typings subsume those | |||||
as well. Typings explicitly carry information regarding which type | |||||
variables are polymorphic and which are constrained by something in the | |||||
environment, avoiding a HM-like generalisation step. | |||||
\begin{code} | |||||
where | |||||
refresh :: Typing -> [Var] -> (Typing, [Var]) | |||||
refresh (Typing (Delta delta) tau) xs = | |||||
let tau_fv = Set.toList (ftv tau `Set.difference` foldMap ftv delta) | |||||
(used, xs') = splitAt (length tau_fv) xs | |||||
sub = Subst (Map.fromList (zip tau_fv (map TyVar used))) | |||||
in (Typing (applyDelta sub delta) (apply sub tau), xs') | |||||
\end{code} | |||||
`refresh`{.haskell} is responsible for ML<sub>$\Delta$</sub>'s analogue of | |||||
instantiation: New, fresh type variables are invented for each type | |||||
variable free in the type $\tau$ that is not also free in the context | |||||
$\Delta$. Whether or not this is better than $\forall$ quantifiers is up | |||||
for debate, but it is jolly neat. | |||||
The case for application might be the most interesting. We infer two | |||||
typings $\Delta \vdash \tau$ and $\Delta' \vdash \sigma$ for the | |||||
function and the argument respectively, then unify $\tau$ with $\sigma | |||||
\to \alpha$ with $\alpha$ fresh. | |||||
\begin{code} | |||||
infer (App f a) env (alpha:xs) sub = do | |||||
(Typing delta_f type_f, xs, sub) <- infer f env xs sub | |||||
(Typing delta_a type_a, xs, sub) <- infer a env xs sub | |||||
mgu <- unify (TyFun type_a (TyVar alpha)) type_f | |||||
\end{code} | |||||
This is enough to make sure that the expressions involved are | |||||
compatible, but it does not ensure that the _contexts_ attached are also | |||||
compatible. So, the substitution is applied to both contexts and they | |||||
are merged - variables present in one but not in the other are kept, and | |||||
variables present in both have their types unified. | |||||
\begin{code} | |||||
let delta_f' = applyDelta mgu delta_f | |||||
delta_a' = applyDelta mgu delta_a | |||||
delta_fa <- mergeDelta delta_f' delta_a' | |||||
pure (Typing delta_fa (apply mgu (TyVar alpha)), xs, sub <> mgu) | |||||
\end{code} | |||||
If a variable `x` has, say, type `Bool` in the function's context but `Int` | |||||
in the argument's context - that's a type error, one which that can be | |||||
very precisely reported as an inconsistency in the types `x` is used at | |||||
when trying to type some function application. This is _much_ better than | |||||
the HM approach, which would just claim the latter usage is wrong. | |||||
There are three spans of interest, not one. | |||||
Inference for $\lambda$ abstractions is simple: We invent a fresh | |||||
monomorphic typing for the bound variable, add it to the context when | |||||
inferring a type for the body, then remove that one specifically from | |||||
the typing of the body when creating one for the overall abstraction. | |||||
\begin{code} | |||||
infer (Lam v b) (Gamma env) (alpha:xs) sub = do | |||||
let ty = TyVar alpha | |||||
mono_typing = Typing (Delta (Map.singleton v ty)) ty | |||||
new_env = Gamma (Map.insert v mono_typing env) | |||||
(Typing (Delta body_delta) body_ty, xs, sub) <- infer b new_env xs sub | |||||
let delta' = Delta (Map.delete v body_delta) | |||||
pure (Typing delta' (apply sub (TyFun ty body_ty)), xs, sub) | |||||
\end{code} | |||||
Care is taken to apply the ambient substitution to the type of the | |||||
abstraction so that details learned about the bound variable inside the | |||||
body will be reflected in the type. This could also be extracted from | |||||
the typing of the body, I suppose, but _eh_. | |||||
`let`{.haskell}s are very easy, especially since generalisation is | |||||
implicit in the structure of typings. We simply compute a typing from | |||||
the body, _reduce_ it with respect to the let-bound variable, add it to | |||||
the environment and infer a typing for the body. | |||||
\begin{code} | |||||
infer (Let (var, exp) body) gamma@(Gamma env) xs sub = do | |||||
(exp_t, xs, sub) <- infer exp gamma xs sub | |||||
let exp_s = reduceTyping var exp_t | |||||
gamma' = Gamma (Map.insert var exp_s env) | |||||
infer body gamma' xs sub | |||||
\end{code} | |||||
Reduction w.r.t. a variable `x` is a very simple operation that makes | |||||
typings as polymorphic as possible, by deleting entries whose free type | |||||
variables are disjoint with the overall type along with the entry for | |||||
`x`. | |||||
\begin{code} | |||||
reduceTyping :: Var -> Typing -> Typing | |||||
reduceTyping x (Typing (Delta delta) tau) = | |||||
let tau_fv = ftv tau | |||||
delta' = Map.filter keep (Map.delete x delta) | |||||
keep sigma = not $ Set.null (ftv sigma `Set.intersection` tau_fv) | |||||
in Typing (Delta delta') tau | |||||
\end{code} | |||||
--- | |||||
Parsing, error reporting and user interaction do not have interesting | |||||
implementations, so I have chosen not to include them here. | |||||
Compositional typing is a very promising approach for languages with | |||||
simple polymorphic type systems, in my opinion, because it presents a | |||||
very cheap way of providing very accurate error messages much better | |||||
than those of Haskell, OCaml and even Elm, a language for which good | |||||
error messages are an explicit goal. | |||||
As an example of this, consider the expression `fun x -> if x (add x 0) | |||||
1`{.ocaml} (or, in Haskell, `\x -> if x then (x + (0 :: Int)) else (1 :: | |||||
Int)`{.haskell} - the type annotations are to emulate | |||||
ML<sub>$\Delta$</sub>'s insistence on monomorphic numbers). | |||||
Types Bool and Int aren't compatible | |||||
When checking that all uses of 'x' agree | |||||
When that checking 'if x' (of type e -> e -> e) | |||||
can be applied to 'add x 0' (of type Int) | |||||
Typing conflicts: | |||||
· x : Bool vs. Int | |||||
The error message generated here is much better than the one GHC | |||||
reports, if you ask me. It points out not that x has some "actual" type | |||||
distinct from its "expected" type, as HM would conclude from its | |||||
left-to-right bias, but rather that two uses of `x` aren't compatible. | |||||
<interactive>:4:18: error: | |||||
• Couldn't match expected type ‘Int’ with actual type ‘Bool’ | |||||
• In the expression: (x + 0 :: Int) | |||||
In the expression: if x then (x + 0 :: Int) else 0 | |||||
In the expression: \ x -> if x then (x + 0 :: Int) else 0 | |||||
Of course, the prototype doesn't care for positions, so the error | |||||
message is still not as good as it could be. | |||||
Perhaps it should be further investigated whether this approach scales | |||||
to at least type classes (since a form of ad-hoc polymorphism is | |||||
absolutely needed) and polymorphic records, so that it can be used in a | |||||
real language. I have my doubts as to if a system like this could | |||||
reasonably be extended to support rank-N types, since it does not have | |||||
$\forall$ quantifiers. | |||||
**UPDATE**: I found out that extending a compositional typing system to | |||||
support type classes is not only possible, it was also [Gergő Érdi's MSc. | |||||
thesis](https://gergo.erdi.hu/projects/tandoori/)! | |||||
**UPDATE**: Again! This is new. Anyway, I've cleaned up the code and | |||||
[thrown it up on GitHub](https://github.com/zardyh/mld). | |||||
Again, a full program implementing ML<sub>$\Delta$</sub> is available | |||||
[here](https://github.com/zardyh/mld). | |||||
Thank you for reading! | |||||
[^1]: Olaf Chitil. 2001. Compositional explanation of types and | |||||
algorithmic debugging of type errors. In Proceedings of the sixth ACM | |||||
SIGPLAN international conference on Functional programming (ICFP '01). | |||||
ACM, New York, NY, USA, 193-204. | |||||
[DOI](http://dx.doi.org/10.1145/507635.507659). | |||||
[^2]: Since I couldn't be arsed to set up monad transformers and all, | |||||
we're doing this the lazy way (ba dum tss): an infinite list of | |||||
variables, and hand-rolled reader/state monads. |
@ -0,0 +1,191 @@ | |||||
--- | |||||
title: "A Quickie: Manipulating Records in Amulet" | |||||
date: September 22, 2019 | |||||
maths: true | |||||
--- | |||||
Amulet, unlike some [other languages], has records figured out. Much like | |||||
in ML (and PureScript), they are their own, first-class entities in the | |||||
language as opposed to being syntax sugar for defining a product | |||||
constructor and projection functions. | |||||
### Records are good | |||||
Being entities in the language, it's logical to characterize them by | |||||
their introduction and elimination judgements[^1]. | |||||
Records are introduced with record literals: | |||||
$$ | |||||
\frac{ | |||||
\Gamma \vdash \overline{e \downarrow \tau} | |||||
}{ | |||||
\Gamma \vdash \{ \overline{\mathtt{x} = e} \} \downarrow \{ \overline{\mathtt{x} : \tau} \} | |||||
} | |||||
$$ | |||||
And eliminated by projecting a single field: | |||||
$$ | |||||
\frac{ | |||||
\Gamma \vdash r \downarrow \{ \alpha | \mathtt{x} : \tau \} | |||||
}{ | |||||
\Gamma \vdash r.\mathtt{x} \uparrow \tau | |||||
} | |||||
$$ | |||||
Records also support monomorphic update: | |||||
$$ | |||||
\frac{ | |||||
\Gamma \vdash r \downarrow \{ \alpha | \mathtt{x} : \tau \} | |||||
\quad \Gamma \vdash e \downarrow \tau | |||||
}{ | |||||
\Gamma \vdash \{ r\ \mathtt{with\ x} = e \} \downarrow \{ \alpha | \mathtt{x} : \tau \} | |||||
} | |||||
$$ | |||||
### Records are.. kinda bad? | |||||
Unfortunately, the rather minimalistic vocabulary for talking about | |||||
records makes them slightly worthless. There's no way to extend a | |||||
record, or to remove a key; Changing the type of a key is also | |||||
forbidden, with the only workaround being enumerating all of the keys | |||||
you _don't_ want to change. | |||||
And, rather amusingly, given the trash-talking I pulled in the first | |||||
paragraph, updating nested records is still a nightmare. | |||||
```amulet | |||||
> let my_record = { x = 1, y = { z = 3 } } | |||||
my_record : { x : int, y : { z : int } } | |||||
> { my_record with y = { my_record.y with z = 4 } } | |||||
_ = { x = 1, y = { z = 4 } } | |||||
``` | |||||
Yikes. Can we do better? | |||||
### An aside: Functional Dependencies | |||||
Amulet recently learned how to cope with [functional dependencies]. | |||||
Functional dependencies extend multi-param type classes by allowing the | |||||
programmer to restrict the relationships between parameters. To | |||||
summarize it rather terribly: | |||||
```amulet | |||||
(* an arbitrary relationship between types *) | |||||
class r 'a 'b | |||||
(* a function between types *) | |||||
class f 'a 'b | 'a -> 'b | |||||
(* a one-to-one mapping *) | |||||
class o 'a 'b | 'a -> 'b, 'b -> 'a | |||||
``` | |||||
### Never mind, records are good | |||||
As of [today], Amulet knows the magic `row_cons` type class, inspired by | |||||
[PureScript's class of the same name]. | |||||
```amulet | |||||
class | |||||
row_cons 'record ('key : string) 'type 'new | |||||
| 'record 'key 'type -> 'new (* 1 *) | |||||
, 'new 'key -> 'record 'type (* 2 *) | |||||
begin | |||||
val extend_row : forall 'key -> 'type -> 'record -> 'new | |||||
val restrict_row : forall 'key -> 'new -> 'type * 'record | |||||
end | |||||
``` | |||||
This class has built-in solving rules corresponding to the two | |||||
functional dependencies: | |||||
1. If the original `record`, the `key` to be inserted, and its | |||||
`type` are all known, then the `new` record can be solved for; | |||||
2. If both the `key` that was inserted, and the `new` record, it is | |||||
possible to solve for the old `record` and the `type` of the `key`. | |||||
Note that rule 2 almost lets `row_cons` be solved for in reverse. Indeed, this is expressed by the type of `restrict_row`, which discovers both the `type` and the original `record`. | |||||
Using the `row_cons` class and its magical methods... | |||||
1. Records can be extended: | |||||
```amulet | |||||
> Amc.extend_row @"foo" true { x = 1 } | |||||
_ : { foo : bool, x : int } = | |||||
{ foo = true, x = 1 } | |||||
``` | |||||
2. Records can be restricted: | |||||
```amulet | |||||
> Amc.restrict_row @"x" { x = 1 } | |||||
_ : int * { } = (1, { x = 1 }) | |||||
``` | |||||
And, given [a suitable framework of optics], records can be updated | |||||
nicely: | |||||
```amulet | |||||
> { x = { y = 2 } } |> (r @"x" <<< r @"y") ^~ succ | |||||
_ : { x : { y : int } } = | |||||
{ x = { y = 3 } } | |||||
``` | |||||
### God, those are some ugly types | |||||
It's worth pointing out that making an optic that works for all fields, | |||||
parametrised by a type-level string, is not easy or pretty, but it is | |||||
work that only needs to be done once. | |||||
```ocaml | |||||
type optic 'p 'a 's <- 'p 'a 'a -> 'p 's 's | |||||
class | |||||
Amc.row_cons 'r 'k 't 'n | |||||
=> has_lens 'r 'k 't 'n | |||||
| 'k 'n -> 'r 't | |||||
begin | |||||
val rlens : strong 'p => proxy 'k -> optic 'p 't 'n | |||||
end | |||||
instance | |||||
Amc.known_string 'key | |||||
* Amc.row_cons 'record 'key 'type 'new | |||||
=> has_lens 'record 'key 'type 'new | |||||
begin | |||||
let rlens _ = | |||||
let view r = | |||||
let (x, _) = Amc.restrict_row @'key r | |||||
x | |||||
let set x r = | |||||
let (_, r') = Amc.restrict_row @'key r | |||||
Amc.extend_row @'key x r' | |||||
lens view set | |||||
end | |||||
let r | |||||
: forall 'key -> forall 'record 'type 'new 'p. | |||||
Amc.known_string 'key | |||||
* has_lens 'record 'key 'type 'new | |||||
* strong 'p | |||||
=> optic 'p 'type 'new = | |||||
fun x -> rlens @'record (Proxy : proxy 'key) x | |||||
``` | |||||
--- | |||||
Sorry for the short post, but that's it for today. | |||||
--- | |||||
[^1]: Record fields $\mathtt{x}$ are typeset in monospaced font to make | |||||
it apparent that they are unfortunately not first-class in the language, | |||||
but rather part of the syntax. Since Amulet's type system is inherently | |||||
bidirectional, the judgement $\Gamma \vdash e \uparrow \tau$ represents | |||||
type inference while $\Gamma \vdash e \downarrow \tau$ stands for type | |||||
checking. | |||||
[functional dependencies]: https://web.cecs.pdx.edu/~mpj/pubs/fundeps.html | |||||
[other languages]: https://haskell.org | |||||
[today]: https://github.com/tmpim/amulet/pull/168 | |||||
[PureScript's class of the same name]: https://pursuit.purescript.org/builtins/docs/Prim.Row#t:Cons | |||||
[a suitable framework of optics]: /static/profunctors.ml.html |
@ -0,0 +1,136 @@ | |||||
--- | |||||
title: "Announcement: amc-prove" | |||||
date: September 25, 2019 | |||||
maths: true | |||||
--- | |||||
`amc-prove` is a smallish tool to automatically prove (some) sentences | |||||
of constructive quantifier-free[^1] first-order logic using the Amulet | |||||
compiler's capability to suggest replacements for typed holes. | |||||
In addition to printing whether or not it could determine the truthiness | |||||
of the sentence, `amc-prove` will also report the smallest proof term it | |||||
could compute of that type. | |||||
### What works right now | |||||
* Function types `P -> Q`{.amcprove}, corresponding to $P \to Q$ in the logic. | |||||
* Product types `P * Q`{.amcprove}, corresponding to $P \land Q$ in the logic. | |||||
* Sum types `P + Q`{.amcprove}, corresponding to $P \lor Q$ in the logic | |||||
* `tt`{.amcprove} and `ff`{.amcprove} correspond to $\top$ and $\bot$ respectively | |||||
* The propositional bi-implication type `P <-> Q`{.amcprove} stands for $P \iff Q$ | |||||
and is interpreted as $P \to Q \land Q \to P$ | |||||
### What is fiddly right now | |||||
Amulet will not attempt to pattern match on a sum type nested inside a | |||||
product type. Concretely, this means having to replace $(P \lor Q) \land | |||||
R \to S$ by $(P \lor Q) \to R \to S$ (currying). | |||||
`amc-prove`'s support for negation and quantifiers is incredibly fiddly. | |||||
There is a canonical empty type, `ff`{.amcprove}, but the negation | |||||
connective `not P`{.amcprove} expands to `P -> forall 'a. | |||||
'a`{.amcprove}, since empty types aren't properly supported. As a | |||||
concrete example, take the double-negation of the law of excluded middle | |||||
$\neg\neg(P \lor \neg{}P)$, which holds constructively. | |||||
If you enter the direct translation of that sentence as a type, | |||||
`amc-prove` will report that it couldn't find a solution. However, by | |||||
using `P -> ff`{.amc-prove} instead of `not P`{.amc-prove}, a solution is | |||||
found. | |||||
```amc-prove | |||||
? not (not (P + not P)) | |||||
probably not. | |||||
? ((P + (P -> forall 'a. 'a)) -> forall 'a. 'a) -> forall 'a. 'a | |||||
probably not. | |||||
? ((P + (P -> ff)) -> ff) -> ff | |||||
yes. | |||||
fun f -> f (R (fun b -> f (L b))) | |||||
``` | |||||
### How to get it | |||||
`amc-prove` is bundled with the rest of the Amulet compiler [on Github]. | |||||
You'll need [Stack] to build. I recommend building with `stack build | |||||
--fast` since the compiler is rather large and `amc-prove` does not | |||||
benefit much from GHC's optimisations. | |||||
``` | |||||
% git clone https://github.com/tmpim/amc-prove.git | |||||
% cd amc-prove | |||||
% stack build --fast | |||||
% stack run amc-prove | |||||
Welcome to amc-prove. | |||||
? | |||||
``` | |||||
### Usage sample | |||||
Here's a small demonstration of everything that works. | |||||
```amc-prove | |||||
? P -> P | |||||
yes. | |||||
fun b -> b | |||||
? P -> Q -> P | |||||
yes. | |||||
fun a b -> a | |||||
? Q -> P -> P | |||||
yes. | |||||
fun a b -> b | |||||
? (P -> Q) * P -> Q | |||||
yes. | |||||
fun (h, x) -> h x | |||||
? P * Q -> P | |||||
yes. | |||||
fun (z, a) -> z | |||||
? P * Q -> Q | |||||
yes. | |||||
fun (z, a) -> a | |||||
? P -> Q -> P * Q | |||||
yes. | |||||
fun b c -> (b, c) | |||||
? P -> P + Q | |||||
yes. | |||||
fun y -> L y | |||||
? Q -> P + Q | |||||
yes. | |||||
fun y -> R y | |||||
? (P -> R) -> (Q -> R) -> P + Q -> R | |||||
yes. | |||||
fun g f -> function | |||||
| (L y) -> g y | |||||
| (R c) -> f c | |||||
? not (P * not P) | |||||
yes. | |||||
Not (fun (a, (Not h)) -> h a) | |||||
(* Note: Only one implication of DeMorgan's second law holds | |||||
constructively *) | |||||
? not (P + Q) <-> (not P) * (not Q) | |||||
yes. | |||||
(* Note: I have a marvellous term to prove this proposition, | |||||
but unfortunately it is too large to fit in this margin. *) | |||||
? (not P) + (not Q) -> not (P * Q) | |||||
yes. | |||||
function | |||||
| (L (Not f)) -> | |||||
Not (fun (a, b) -> f a) | |||||
| (R (Not g)) -> | |||||
Not (fun (y, z) -> g z) | |||||
``` | |||||
[^1]: Technically, amc-prove "supports" the entire Amulet type system, | |||||
which includes things like type-classes and rank-N types (it's equal in | |||||
expressive power to System F). However, the hole-filling logic is meant | |||||
to aid the programmer while she codes, not exhaustively search for a | |||||
solution, so it was written to fail early and fail fast instead of | |||||
spending unbounded time searching for a solution that might not be | |||||
there. | |||||
You can find the proof term I redacted from DeMorgan's first law [here]. | |||||
[on Github]: https://github.com/tmpim/amulet | |||||
[Stack]: https://haskellstack.org | |||||
[here]: /static/demorgan-1.ml.html |
@ -0,0 +1,202 @@ | |||||
--- | |||||
title: Interactive amc-prove | |||||
date: September 29, 2019 | |||||
--- | |||||
Following my [last post announcing amc-prove](/posts/2019-09-25.html), I | |||||
decided I'd make it more accessible to people who wouldn't like to spend | |||||
almost 10 minutes compiling Haskell code just to play with a fiddly | |||||
prover. | |||||
So I made a web interface for the thing: Play with it below. | |||||
Text immediately following <span style='color:red'>> </span> is editable. | |||||
<noscript> | |||||
Sorry, this post isn't for you. | |||||
</noscript> | |||||
<div id='prover-container'> | |||||
<div id='prover'> | |||||
<pre id='prover-output'> | |||||
<span style='color: red' id=prover-prompt>> </span><span id='prover-input' contenteditable='true'>(not P) + (not Q) -> not (P * Q)</span> | |||||
</pre> | |||||
</div> | |||||
</div> | |||||
<style> | |||||
div#prover-container { | |||||
width: 100%; | |||||
height: 600px; | |||||
border: 1px solid #c0c0c0; | |||||
background: #d6d6d6; | |||||
box-shadow: 1px 1px #c0c0c0; | |||||
border-radius: 0.3em; | |||||
} | |||||
div#prover { | |||||
width: 100%; | |||||
height: 100%; | |||||
overflow-y: scroll; | |||||
} | |||||
pre#prover-output, pre#prover-output > * { | |||||
white-space: pre-wrap; | |||||
} | |||||
span#prover-input { | |||||
border: 1px solid #c0c0c0; | |||||
font-family: monospace; | |||||
} | |||||
</style> | |||||
<script async> | |||||
let input = document.getElementById('prover-input'); | |||||
let output = document.getElementById('prover-output'); | |||||
let prompt = document.getElementById('prover-prompt'); | |||||
let ERROR_REGEX = /^\<input\>\[(\d+):(\d+) \.\.(\d+):(\d+)\]: error$/; | |||||
let NEGATIVE_REGEX = /^probably not\.$/; | |||||
let prove = async (sentence) => { | |||||
let response; | |||||
try { | |||||
response = await fetch('/prove', { | |||||
method: 'POST', | |||||
headers: { | |||||
Host: '/prove' | |||||
}, | |||||
body: sentence | |||||
}); | |||||
} catch (e) { | |||||
return { error: true, error_msg: ['Server responded with an error'] } | |||||
} | |||||
const prover_response = (await response.text()).split('\n').map(x => x.trimEnd()).filter(x => x !== ''); | |||||
const result_line = prover_response[0]; | |||||
console.log(result_line); | |||||
console.log(result_line.match(ERROR_REGEX)); | |||||
if (response.status !== 200) { | |||||
return { error: true, error_msg: ['Server responded with an error'] } | |||||
} else if (result_line.match(ERROR_REGEX) !== null) { | |||||
return { error: true, error_msg: prover_response.slice(1) } | |||||
} else if (result_line.match(NEGATIVE_REGEX) !== null) { | |||||
return { error: false, proven: false } | |||||
} else { | |||||
return { error: false, proven: true, proof: prover_response.slice(1) }; | |||||
} | |||||
}; | |||||
const LEX_RULES = [ | |||||
[/^(fun(ction)?|match|not)/, 'kw'], | |||||
[/^[a-z][a-z0-9]*/, 'va'], | |||||
[/^[A-Z][A-Za-z0-9]*/, 'dt'], | |||||
[/^[\(\)\[\]<->\+\*\-,|]/, 'va'] | |||||
]; | |||||
let tokenise = (code) => { | |||||
let tokens = [] | |||||
let exit = false; | |||||
while (code !== '' && !exit) { | |||||
let matched_this_loop = false; | |||||
LEX_RULES.map(([regex, clss]) => { | |||||
let had_spc = code.match(/^\s*/)[0] | |||||
let match = code.trimStart().match(regex); | |||||
if (match !== null) { | |||||
let matched = match[0]; | |||||
code = code.trimStart().slice(matched.length); | |||||
tokens.push({ t: had_spc + matched, c : clss }); | |||||
matched_this_loop = true; | |||||
}; | |||||
}); | |||||
if (!matched_this_loop) { | |||||
exit = true; | |||||
tokens.push({ t : code, c : 'va' }); | |||||
} | |||||
} | |||||
return tokens; | |||||
} | |||||
let syntax_highlight = (code) => { | |||||
let container = document.createElement('span'); | |||||
code.map(line => { | |||||
let elems = tokenise(line).map(token => { | |||||
let span = document.createElement('span'); | |||||
span.innerText = token.t; | |||||
span.classList.add(token.c); | |||||
container.appendChild(span); | |||||
}); | |||||
container.appendChild(document.createElement('br')); | |||||
}); | |||||
return container; | |||||
} | |||||
let history = []; | |||||
let history_index = 0; | |||||
input.onkeydown = async (e) => { | |||||
let key = e.key || e.which; | |||||
if (key == 'Enter' && !e.shiftKey) { | |||||
e.preventDefault(); | |||||
let text = input.innerText; | |||||
history_index = -1; | |||||
if (text !== history[history.length - 1]) { | |||||
history.push(text); | |||||
} | |||||
let result = await prove(text); | |||||
let old_input = syntax_highlight(['> ' + text]); | |||||
let out_block = document.createElement('span'); | |||||
if (result.error) { | |||||
out_block.style = 'color: red'; | |||||
out_block.innerText = 'error:\n'; | |||||
result.error_msg.slice(0, 1).map(e => { | |||||
let span = document.createElement('span'); | |||||
span.style = 'color: red'; | |||||
span.innerText = ' ' + e + '\n'; | |||||
out_block.appendChild(span); | |||||
}); | |||||
} else if (result.proven) { | |||||
out_block.classList.add('kw'); | |||||
out_block.innerText = 'yes:\n' | |||||
out_block.appendChild(syntax_highlight(result.proof)); | |||||
} else { | |||||
out_block.classList.add('kw'); | |||||
out_block.innerText = 'not proven\n'; | |||||
} | |||||
input.innerText = ''; | |||||
output.insertBefore(old_input, prompt); | |||||
output.insertBefore(out_block, prompt); | |||||
input.scrollIntoView(); | |||||
} else if (key === 'Up' || key === 'ArrowUp') { | |||||
e.preventDefault(); | |||||
console.log(history, history_index) | |||||
if(history_index > 0) { | |||||
history_index--; | |||||
} else if(history_index < 0 && history.length > 0) { | |||||
history_index = history.length - 1 | |||||
} else { | |||||
return; | |||||
} | |||||
input.innerText = history[history_index]; | |||||
input.focus(); | |||||
} else if (key == 'Down' || key === 'ArrowDown') { | |||||
e.preventDefault(); | |||||
if(history_index >= 0) { | |||||
history_index = history_index < history.length - 1 ? history_index + 1 : -1; | |||||
} else { | |||||
return; | |||||
} | |||||
input.innerText = history[history_index] || ''; | |||||
} | |||||
} | |||||
</script> |
@ -0,0 +1,369 @@ | |||||
--- | |||||
title: Typed Type-Level Computation in Amulet | |||||
date: October 04, 2019 | |||||
maths: true | |||||
--- | |||||
Amulet, as a programming language, has a focus on strong static typing. This has led us to adopt | |||||
many features inspired by dependently-typed languages, the most prominent of which being typed holes | |||||
and GADTs, the latter being an imitation of indexed families. | |||||
However, Amulet was up until recently sorely lacking in a way to express computational content in | |||||
types: It was possible to index datatypes by other, regular datatypes ("datatype promotion", in the | |||||
Haskell lingo) since the type and kind levels are one and the same, but writing functions on those | |||||
indices was entirely impossible. | |||||
As of this week, the language supports two complementary mechanisms for typed type-level programming: | |||||
_type classes with functional dependencies_, a form of logic programming, and _type functions_, which | |||||
permit functional programming on the type level. | |||||
I'll introduce them in that order; This post is meant to serve as an introduction to type-level | |||||
programming using either technique in general, but it'll also present some concepts formally and with | |||||
some technical depth. | |||||
### Type Classes are Relations: Programming with Fundeps | |||||
In set theory[^1] a _relation_ $R$ over a family of sets $A, B, C, \dots$ is a subset of the | |||||
cartesian product $A \times B \times C \times \dots$. If $(a, b, c, \dots) \in R_{A,B,C,\dots}$ we | |||||
say that $a$, $b$ and $c$ are _related_ by $R$. | |||||
In this context, a _functional dependency_ is a term $X \leadsto Y$ | |||||
where $X$ and $Y$ are both sets of natural numbers. A relation is said | |||||
to satisfy a functional dependency $X \leadsto Y$ when, for any tuple in | |||||
the relation, the values at $X$ uniquely determine the values at $Y$. | |||||
For instance, the relations $R_{A,B}$ satisfying $\{0\} \leadsto \{1\}$ are partial functions $A \to | |||||
B$, and if it were additionally to satisfy $\{1\} \leadsto \{0\}$ it would be a partial one-to-one | |||||
mapping. | |||||
One might wonder what all of this abstract nonsense[^2] has to do with type classes. The thing is, a | |||||
type class `class foo : A -> B -> constraint`{.amulet} is a relation $\text{Foo}_{A,B}$! With this in | |||||
mind, it becomes easy to understand what it might mean for a type class to satisfy a functional | |||||
relation, and indeed the expressive power that they bring. | |||||
To make it concrete: | |||||
```amulet | |||||
class r 'a 'b (* an arbitrary relation between a and b *) | |||||
class f 'a 'b | 'a -> 'b (* a function from a to b *) | |||||
class i 'a 'b | 'a -> 'b, 'b -> 'a (* a one-to-one mapping between a and b *) | |||||
``` | |||||
#### The Classic Example: Collections | |||||
In Mark P. Jones' paper introducing functional dependencies, he presents as an example the class | |||||
`collects : type -> type -> constraint`{.amulet}, where `'e`{.amulet} is the type of elements in the | |||||
collection type `'ce`{.amulet}. This class can be used for all the standard, polymorphic collections | |||||
(of kind `type -> type`{.amulet}), but it also admits instances for monomorphic collections, like a | |||||
`bitset`. | |||||
```amulet | |||||
class collects 'e 'ce begin | |||||
val empty : 'ce | |||||
val insert : 'e -> 'ce -> 'ce | |||||
val member : 'e -> 'ce -> bool | |||||
end | |||||
``` | |||||
Omitting the standard implementation details, this class admits instances like: | |||||
```amulet | |||||
class eq 'a => collects 'a (list 'a) | |||||
class eq 'a => collects 'a ('a -> bool) | |||||
instance collects char string (* amulet strings are not list char *) | |||||
``` | |||||
However, Jones points out this class, as written, has a variety of problems. For starters, `empty`{.amulet} has | |||||
an ambiguous type, `forall 'e 'ce. collects 'e 'ce => 'ce`{.amulet}. This type is ambiguous because the type | |||||
varialbe `e`{.amulet} is $\forall$-bound, and appears in the constraint `collects 'e 'ce`{.amulet}, but doesn't | |||||
appear to the right of the `=>`{.amulet}; Thus, we can't solve it using unification, and the program | |||||
would have undefined semantics. | |||||
Moreover, this class leads to poor inferred types. Consider the two functions `f`{.amulet} and `g`, below. | |||||
These have the types `(collects 'a 'c * collects 'b 'c) => 'a -> 'b -> 'c -> 'c`{.amulet} and | |||||
`(collects bool 'c * collects int 'c) => 'c -> 'c`{.amulet} respectively. | |||||
```amulet | |||||
let f x y coll = insert x (insert y coll) | |||||
let g coll = f true 1 coll | |||||
``` | |||||
The problem with the type of `f`{.amulet} is that it is too general, if we wish to model homogeneous | |||||
collections only; This leads to the type of `g`, which really ought to be a type error, but isn't; The | |||||
programming error in its definition won't be reported here, but at the use site, which might be in a | |||||
different module entirely. This problem of poor type inference and bad error locality motivates us to | |||||
refine the class `collects`, adding a functional dependency: | |||||
```amulet | |||||
(* Read: 'ce determines 'e *) | |||||
class collects 'e 'ce | 'ce -> 'e begin | |||||
val empty : 'ce | |||||
val insert : 'e -> 'ce -> 'ce | |||||
val member : 'e -> 'ce -> bool | |||||
end | |||||
``` | |||||
This class admits all the same instances as before, but now the functional dependency lets Amulet | |||||
infer an improved type for `f`{.amulet} and report the type error at `g`{.amulet}. | |||||
```amulet | |||||
val f : collects 'a 'b => 'a -> 'a -> 'b -> 'b | |||||
``` | |||||
``` | |||||
│ | |||||
2 │ let g coll = f true 1 coll | |||||
│ ^ | |||||
Couldn't match actual type int | |||||
with the type expected by the context, bool | |||||
``` | |||||
One can see from the type of `f`{.amulet} that Amulet can simplify the conjunction of constraints | |||||
`collects 'a 'c * collects 'b 'c`{.amulet} into `collects 'a 'c`{.amulet} and substitute `'b`{.amulet} | |||||
for `'a`{.amulet} in the rest of the type. This is because the second parameter of `collects`{.amulet} | |||||
is enough to determine the first parameter; Since `'c`{.amulet} is obviously equal to itself, | |||||
`'a`{.amulet} must be equal to `'b`. | |||||
We can observe improvement within the language using a pair of data types, `(:-) : constraint -> | |||||
constraint -> type`{.amulet} and `dict : constraint -> type`{.amulet}, which serve as witnesses of | |||||
implication between constraints and a single constraint respectively. | |||||
```amulet | |||||
type dict 'c = Dict : 'c => dict 'c | |||||
type 'p :- 'q = Sub of ('p => unit -> dict 'q) | |||||
let improve : forall 'a 'b 'c. (collects 'a 'c * collects 'b 'c) :- ('a ~ 'b) = | |||||
Sub (fun _ -> Dict) | |||||
``` | |||||
Because this program type-checks, we can be sure that `collects 'a 'c * collects 'b 'c`{.amulet} | |||||
implies `'a`{.amulet} is equal to `'b`{.amulet}. Neat! | |||||
### Computing with Fundeps: Natural Numbers and Vectors | |||||
If you saw this coming, pat yourself on the back. | |||||
I'm required by law to talk about vectors in every post about types. No, really; It's true. | |||||
I'm sure everyone's seen this by now, but vectors are cons-lists indexed by their type as a Peano | |||||
natural. | |||||
```amulet | |||||
type nat = Z | S of nat | |||||
type vect 'n 'a = | |||||
| Nil : vect Z 'a | |||||
| Cons : 'a * vect 'n 'a -> vect (S 'n) 'a | |||||
``` | |||||
Our running objective for this post will be to write a function to append two vectors, such that the | |||||
length of the result is the sum of the lengths of the arguments.[^3] But, how do we even write the | |||||
type of such a function? | |||||
Here we can use a type class with functional dependencies witnessing the fact that $a + b = c$, for | |||||
some $a$, $b$, $c$ all in $\mathbb{N}$. Obviously, knowing $a$ and $b$ is enough to know $c$, and the | |||||
functional dependency expresses that. Due to the way we're going to be implementing `add`, the other | |||||
two functional dependencies aren't admissible. | |||||
```amulet | |||||
class add 'a 'b 'c | 'a 'b -> 'c begin end | |||||
``` | |||||
Adding zero to something just results in that something, and if $a + b = c$ then $(1 + a) + b = 1 + c$. | |||||
```amulet | |||||
instance add Z 'a 'a begin end | |||||
instance add 'a 'b 'c => add (S 'a) 'b (S 'c) begin end | |||||
``` | |||||
With this in hands, we can write a function to append vectors. | |||||
```amulet | |||||
let append : forall 'n 'k 'm 'a. add 'n 'k 'm | |||||
=> vect 'n 'a -> vect 'k 'a -> vect 'm 'a = | |||||
fun xs ys -> | |||||
match xs with | |||||
| Nil -> ys | |||||
| Cons (x, xs) -> Cons (x, append xs ys) | |||||
``` | |||||
Success! | |||||
... or maybe not. Amulet's complaining about our definition of `append` even though it's correct; What | |||||
gives? | |||||
The problem is that while functional dependencies let us conclude equalities from pairs of instances, | |||||
it doesn't do us any good if there's a single instance. So we need a way to reflect the equalities in | |||||
a way that can be pattern-matched on. If your GADT senses are going off, that's a good thing. | |||||
#### Computing with Evidence | |||||
This is terribly boring to do and what motivated me to add type functions to Amulet in the first | |||||
place, but the solution here is to have a GADT that mirrors the structure of the class instances, and | |||||
make the instances compute that. Then, in our append function, we can match on this evidence to reveal | |||||
equalities to the type checker. | |||||
```amulet | |||||
type add_ev 'k 'n 'm = | |||||
| AddZ : add_ev Z 'a 'a | |||||
| AddS : add_ev 'a 'b 'c -> add_ev (S 'a) 'b (S 'c) | |||||
class add 'a 'b 'c | 'a 'b -> 'c begin | |||||
val ev : add_ev 'a 'b 'c | |||||
end | |||||
instance add Z 'a 'a begin | |||||
let ev = AddZ | |||||
end | |||||
instance add 'a 'b 'c => add (S 'a) 'b (S 'c) begin | |||||
let ev = AddS ev | |||||
end | |||||
``` | |||||
Now we can write vector `append` using the `add_ev` type. | |||||
```amulet | |||||
let append' (ev : add_ev 'n 'm 'k) | |||||
(xs : vect 'n 'a) | |||||
(ys : vect 'm 'a) | |||||
: vect 'k 'a = | |||||
match ev, xs with | |||||
| AddZ, Nil -> ys | |||||
| AddS p, Cons (x, xs) -> Cons (x, append' p xs ys) | |||||
and append xs ys = append' ev xs ys | |||||
``` | |||||
This type-checks and we're done. | |||||
### Functions on Types: Programming with Closed Type Functions | |||||
Look, duplicating the structure of a type class at the value level just so the compiler can figure out | |||||
equalities is stupid. Can't we make it do that work instead? Enter _closed type functions_. | |||||
```amulet | |||||
type function (+) 'n 'm begin | |||||
Z + 'n = 'n | |||||
(S 'k) + 'n = S ('k + 'n) | |||||
end | |||||
``` | |||||
This declaration introduces the type constructor `(+)`{.amulet} (usually written infix) and two rules | |||||
for reducing types involving saturated applications of `(+)`{.amulet}. Type functions, unlike type | |||||
classes which are defined like Prolog clauses, are defined in a pattern-matching style reminiscent of | |||||
Haskell. | |||||
Each type function has a set of (potentially overlapping) _equations_, and the compiler will reduce an | |||||
application using an equation as soon as it's sure that equation is the only possible equation based | |||||
on the currently-known arguments. | |||||
Using the type function `(+)`{.amulet} we can use our original implementation of `append` and have it | |||||
type-check: | |||||
```amulet | |||||
let append (xs : vect 'n 'a) (ys : vect 'k 'a) : vect ('n + 'k) 'a = | |||||
match xs with | |||||
| Nil -> ys | |||||
| Cons (x, xs) -> Cons (x, append xs ys) | |||||
let ys = append (Cons (1, Nil)) (Cons (2, Cons (3, Nil))) | |||||
``` | |||||
Now, a bit of a strange thing is that Amulet reduces type family applications as lazily as possible, | |||||
so that `ys` above has type `vect (S Z + S (S Z)) int`{.amulet}. In practice, this isn't an issue, as | |||||
a simple ascription shows that this type is equal to the more orthodox `vect (S (S (S Z))) | |||||
int`{.amulet}. | |||||
```amulet | |||||
let zs : vect (S (S (S Z))) int = ys | |||||
``` | |||||
Internally, type functions do pretty much the same thing as the functional dependency + evidence | |||||
approach we used earlier. Each equation gives rise to an equality _axiom_, represented as a | |||||
constructor because our intermediate language pretty much lets constructors return whatever they damn | |||||
want. | |||||
```amulet | |||||
type + '(n : nat) '(m : nat) = | |||||
| awp : forall 'n 'm 'r. 'n ~ Z -> 'm ~ 'n -> ('n + 'm) ~ 'n | |||||
| awq : forall 'n 'k 'm 'l. 'n ~ (S 'k) -> 'm ~ 'l | |||||
-> ('n + 'm) ~ (S ('k + 'l)) | |||||
``` | |||||
These symbols have ugly autogenerated names because they're internal to the compiler and should never | |||||
appear to users, but you can see that `awp` and `awq` correspond to each clause of the `(+)`{.amulet} | |||||
type function, with a bit more freedom in renaming type variables. | |||||
### Custom Type Errors: Typing Better | |||||
Sometimes - I mean, pretty often - you have better domain knowledge than Amulet. For instance, you | |||||
might know that it's impossible to `show` a function. The `type_error` type family lets you tell the | |||||
type checker this: | |||||
```amulet | |||||
instance | |||||
(type_error (String "Can't show functional type:" :<>: ShowType ('a -> 'b)) | |||||
=> show ('a -> 'b) | |||||
begin | |||||
let show _ = "" | |||||
end | |||||
``` | |||||
Now trying to use `show` on a function value will give you a nice error message: | |||||
```amulet | |||||
let _ = show (fun x -> x + 1) | |||||
``` | |||||
``` | |||||
│ | |||||
1 │ let _ = show (fun x -> x + 1) | |||||
│ ^^^^^^^^^^^^^^^^^^^^^ | |||||
Can't show functional type: int -> int | |||||
``` | |||||
### Type Families can Overlap | |||||
Type families can tell when two types are equal or not: | |||||
```amulet | |||||
type function equal 'a 'b begin | |||||
discrim 'a 'a = True | |||||
discrim 'a 'b = False | |||||
end | |||||
``` | |||||
But overlapping equations need to agree: | |||||
```amulet | |||||
type function overlap_not_ok 'a begin | |||||
overlap_not_ok int = string | |||||
overlap_not_ok int = int | |||||
end | |||||
``` | |||||
``` | |||||
Overlapping equations for overlap_not_ok int | |||||
• Note: first defined here, | |||||
│ | |||||
2 │ overlap_not_ok int = string | |||||
│ ^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |||||
but also defined here | |||||
│ | |||||
3 │ overlap_not_ok int = int | |||||
│ ^^^^^^^^^^^^^^^^^^^^^^^^ | |||||
``` | |||||
### Conclusion | |||||
Type families and type classes with functional dependencies are both ways to introduce computation in | |||||
the type system. They both have their strengths and weaknesses: Fundeps allow improvement to inferred | |||||
types, but type families interact better with GADTs (since they generate more equalities). Both are | |||||
important in language with a focus on type safety, in my opinion. | |||||
[^1]: This is not actually the definition of a relation with full generality; Set theorists are | |||||
concerned with arbitrary families of sets indexed by some $i \in I$, where $I$ is a set of indices; | |||||
Here, we've set $I = \mathbb{N}$ and restrict ourselves to the case where relations are tuples. | |||||
[^2]: At least it's not category theory. | |||||
[^3]: In the shower today I actually realised that the `append` function on vectors is a witness to | |||||
the algebraic identity $a^n * a^m = a^{n + m}$. Think about it: the `vect 'n`{.amulet} functor is | |||||
representable by `fin 'n`{.amulet}, i.e. it is isomorphic to functions `fin 'n -> 'a`{.amulet}. By | |||||
definition, `fin 'n`{.amulet} is the type with `'n`{.amulet} elements, and arrow types `'a -> | |||||
'b`{.amulet} have $\text{size}(b)^{\text{size}(a)}$ elements, which leads us to conclude `vect 'n | |||||
'a` has size $\text{size}(a)^n$ elements. |
@ -0,0 +1,126 @@ | |||||
--- | |||||
title: "A Quickie: A Use Case for Impredicative Polymorphism" | |||||
path: impredicative-polymorphism | |||||
date: October 19, 2019 | |||||
--- | |||||
Amulet now (as of the 18th of October) has support for impredicative | |||||
polymorphism based on [Quick Look impredicativity], an algorithm first | |||||
proposed for GHC that treats inference of applications as a two-step | |||||
process to enable inferring impredicative types. | |||||
As a refresher, impredicative types (in Amulet) are types in which a | |||||
`forall`{.amuçet} appears under a type constructor (that is not | |||||
`(->)`{.amulet} or `(*)`{.amulet}, since those have special variance in | |||||
the compiler). | |||||
Quick Look impredicativity works by doing type checking of applications | |||||
in two phases: the _quick look_, which is called so because it's faster | |||||
than regular type inference, and the regular type-checking of | |||||
arguments. | |||||
Given a `n`-ary application | |||||
<code>f x<sub>1</sub> ... x<sub>n</sub></code>: | |||||
<ol type="I"> | |||||
<li> | |||||
The _quick look_ proceeds by inferring the type of the function to | |||||
expose the first `n` quantifiers, based on the form of the arguments. | |||||
For a regular term argument `e`, we expect a `'t ->`{.amulet} quantifier; For | |||||
visible type arguments, we expect either `forall 'a.`{.amulet} or | |||||
`forall 'a ->`{.amulet}. | |||||
After we have each of the quantifiers, we quickly infer a type for each | |||||
of the _simple_ arguments in | |||||
<code>x<sub>1</sub> ... x<sub>n</sub></code>. | |||||
Here, simple means either a | |||||
variable, literal, application or an expression annotated with a type | |||||
`x : t`{.amulet}. With this type in hands, we unify it with the type | |||||
expected by the quantifier, to collect a partial substituion (in which | |||||
unification failures are ignored), used to discover impredicative | |||||
instantiation. | |||||
For example, say `f : 'a -> list 'a -> list 'a`{.amulet} (the cons | |||||
function)[^1], and we want to infer the application `f (fun x -> x) (Nil | |||||
@(forall 'xx. 'xx -> 'xx))`{.amulet}. Here, the quick look will inspect | |||||
each argument in turn, coming up with a `list 'a ~ list (forall 'xx. 'xx | |||||
-> 'xx)`{.amulet} equality by looking at the second argument. Since the | |||||
first argument is not simple, it tells us nothing. Thus, the second | |||||
phase starts with the substitution `'a := forall 'xx. 'xx -> | |||||
'xx`{.amulet}. | |||||
</li> | |||||
<li> | |||||
The second phase is traditional type-checking of each argument in turn, | |||||
against its respective quantifier. Here we use Amulet's type-checking | |||||
function `check` instead of applying type-inference then constraining | |||||
with subsumption since that results in more precise resuls. | |||||
However, instead of taking the quantifiers directly from the function's | |||||
inferred type, we first apply the substitution generated by the | |||||
quick-look. Thus, keeping with the example, we check the function `(fun | |||||
x -> x)`{.amulet} against the type `forall 'xx. 'xx -> 'xx`{.amulet}, | |||||
instead of checking it against the type variable `'a`{.amulet}. | |||||
This is important because checking against a type variable degrades to | |||||
inference + subsumption, which we wanted to avoid in the first place! | |||||
Thus, if we had no quick look, the function `(fun x -> x)`{.amulet} | |||||
would be given monomorphic type `'t1 -> 't2`{.amulet} (where | |||||
`'t1'`{.amulet}, `'t2`{.amulet} are fresh unification variables), and | |||||
we'd try to unify `list ('t1 -> 't2) ~ list (forall 'xx. 'xx -> | |||||
'xx)`{.amulet} - No dice! | |||||
</li> | |||||
</ol> | |||||
### Why does this matter? | |||||
Most papers discussing impredicative polymorphism focus on the boring, | |||||
useless example of stuffing a list with identity functions. Indeed, this | |||||
is what I demonstrated above. | |||||
However, a much more useful example is putting _lenses_ in lists (or | |||||
`optional`{.amulet}, `either`{.amulet}, or what have you). Recall the | |||||
van Laarhoven encoding of lenses: | |||||
```amulet | |||||
type lens 's 't 'a 'b <- forall 'f. functor 'f => ('a -> 'f 'b) -> 's -> 'f 't | |||||
``` | |||||
If you're not a fan of that, consider also the profunctor encoding of | |||||
lenses: | |||||
```amulet | |||||
type lens 's 't 'a 'b <- forall 'p. strong 'p => 'p 'a 'b -> 'p 's 't | |||||
``` | |||||
These types are _both_ polymorphic, which means we can't normally have a | |||||
`list (lens _ _ _ _)`{.amulet}. This is an issue! The Haskell `lens` | |||||
library works around this by providing a `LensLike`{.haskell} type, | |||||
which is not polymorphic and takes the functor `f` by means of an | |||||
additional parameter. However, consider the difference in denotation | |||||
between | |||||
```haskell | |||||
foo :: [Lens a a Int Int] -> a -> (Int, a) | |||||
bar :: Functor f => [LensLike f a a Int Int] -> a -> (Int, a) | |||||
``` | |||||
The first function takes a list of lenses; It can then use these lenses | |||||
in any way it pleases. The second, however, takes a list of lens-like | |||||
values _that all use the same functor_. Thus, you can't `view` using the | |||||
head of the list and `over` using the second element! (Recall that | |||||
`view` uses the `Const`{.haskell} functor and `over`{.amulet} the | |||||
`Identity`{.amulet} functor). Indeed, the second function can't use the | |||||
lenses at all, since it must work for an arbitrary functor and not | |||||
`Const`{.haskell}/`Identity`{.haskell}. | |||||
Of course, [Amulet lets you put lenses in lists]: See `lens_list` and | |||||
`xs` at the bottom of the file. | |||||
[^1]: Assume that the `'a`{.amulet} variable is bound by a `forall | |||||
'a.`{.amulet} quantifier. Since we don't use visible type application in | |||||
the following example, I just skipped mentioning it. | |||||
[Quick Look impredicativity]: https://github.com/serras/ghc-proposals/blob/quick-look/proposals/0000-quick-look-impredicativity.md | |||||
[Amulet lets you put lenses in lists]: /static/profunctor-impredicative.ml.html |
@ -0,0 +1,380 @@ | |||||
--- | |||||
title: The Semantics of Evaluation & Continuations | |||||
date: July 12th, 2020 | |||||
maths: true | |||||
--- | |||||
Continuations are a criminally underappreciated language feature. Very few languages (off the top of my head: most Scheme dialects, some Standard ML implementations, and Ruby) support the---already very expressive---undelimited continuations---the kind introduced by `call/cc`{.scheme}---and even fewer implement the more expressive delimited continuations. Continuations (and tail recursion) can, in a first-class, functional way, express _all_ local and non-local control features, and are an integral part of efficient implementations of algebraic effects systems, both as language features and (most importantly) as libraries. | |||||
<div class="text-image"> | |||||
<div> | |||||
Continuations, however, are notoriously hard to understand. In an informal way, we can say that a continuation is a first-class representation of the "future" of a computation. By this I do not mean a future in the sense of an asynchronous computation, but in a temporal sense. While descriptions like this are "correct" in some sense, they're also not useful. What does it mean to store the "future" of a computation in a value? | |||||
Operationally, we can model continuations as just a segment of control stack. This model is perhaps better suited for comprehension by programmers familiar with the implementation details of imperative programming languages, which is decidedly not my target audience. Regardless, this is how continuations (especially the delimited kind) are generally presented. | |||||
With no offense to the authors of these articles, some of which I greatly respect as programming language designers and implementers, I do not think that this approach is very productive in a functional programming context. This reductionist, imperative view would almost be like explaining the _concept_ of a proper tail call by using a trampoline, rather than presenting trampolines as one particular implementation strategy for the tail-call-ly challenged. | |||||
</div> | |||||
<figure> | |||||
<img class="tikzpicture" src="/diagrams/cc/delimcc.svg" /> | |||||
_Fig 1. A facsimile of a diagram you'd find attached to an explanation of delimited continuations. Newer frames on top._ | |||||
</figure> | |||||
</div> | |||||
In a more functional context, then, I'd like to present the concept of continuation as a _reification_ of an _evaluation context_. I stress that this presentation is not novel, though it is, perhaps, uncommon outside of the academic literature on continuations. Reification, here, is the normal English word. It's a posh way of saying "to make into a thing". For example, an `(eval)`{.scheme} procedure is a _reification_ of the language implementation---it's an interpreter made into a thing, a thing that looks, walks and quacks like a procedure. | |||||
The idea of evaluation contexts, however, seems to have remained stuck in the ivory towers that the mainstream so often accuse us of inhabiting. | |||||
Evaluation Contexts | |||||
------------------- | |||||
What _is_ an evaluation context? Unhelpfully, the answer depends on the language we're talking about. Since the language family you're most likely to encounter continuations in is Scheme, we'll use a Scheme-_like_ language (not corresponding to any of the R<sup>K</sup>RS standards). Our language has the standard set of things you'd find in a minimalist functional language used for an academic text: lambda expressions, written `(lambda arg exp)`{.scheme} that close over variables in lexical scope; applications, also n-ary, written `(func arg )`{.scheme}, `if`{.scheme}-expressions written `(if condition then else)`{.scheme}, with the `else` expression being optional and defaulting to the false value `#f`{.scheme}, integers, integer operations, and, of course, variables. | |||||
As an abbreviation one can write `(lambda (a . args) body)` to mean `(lambda a (lambda args body))` (recursively), and similarily for applications (associating to the left), but the language itself only has unary application and currying. This is in deviation from actual Scheme implementations which have complex parameter passing schemes, including variadic arguments (collecting any overflow in a list), keyword and optional arguments, etc. While all of these features are important for day-to-day programming, they do nothing but cloud the presentation here with needless verbosity. | |||||
```scheme | |||||
e ::= (lambda arg expr) ; function definitions | |||||
| (if expr expr expr) ; if expression | |||||
| (expr expr) ; function applications | |||||
| var ; variables | |||||
| #t | #f ; scheme programmers spell booleans funny | |||||
| 1 | 2 | 3 | 4 ... ; integers | |||||
``` | |||||
The set of values in this miniScheme language inclues lambda expressions, the booleans `#t`{.scheme} and `#f`{.scheme}, and the integers; Every other expression can potentially take a step. Here, taking a step means applying a _reduction rule_ in the language's semantics, or finding a _congruence_ rule that allows some sub-expression to suffer a reduction rule. | |||||
An example of reduction rule is β-reduction, which happens when the function being applied is a λ-abstraction and all of its arguments have been reduced to values. The rule, which says that an application of a lambda to a value can be reduced to its body in one step, is generally written in the notation of sequent calculus as below. | |||||
<details> | |||||
<summary>Fancy reduction rule type-set in TeX</summary> | |||||
<noscript>Sorry, I use KaTeX for my mathematics. Read on!</noscript> | |||||
$$\frac{}{(\lambda x.e) v \longrightarrow e\{v/x\}}$$ | |||||
</details> | |||||
However, let's use the Lisp notation above and write reduction rules as if they were code. The notation I'm using here is meant to evoke [Redex], a tool for defining programming semantics implemented as a Racket language. Redex is really neat and I highly recommend it for anyone interested in studying programming language semantics formally. | |||||
[Redex]: https://docs.racket-lang.org/redex/ | |||||
<!-- reduction rules --> | |||||
<div style="display: flex; flex-direction: column; justify-content: space-around"> | |||||
<h4 style="text-align: center">Reduction rules</h4> | |||||
<div class="mathpar"> | |||||
<div> | |||||
```scheme | |||||
(--> ((lambda x e) v) | |||||
(subst x v e)) | |||||
``` | |||||
</div> | |||||
<div> | |||||
```scheme | |||||
(--> (if #t e_1 e_2) | |||||
(e_1)) | |||||
``` | |||||
</div> | |||||
<div> | |||||
```scheme | |||||
(--> (if #f e_1 e_2) | |||||
(e_2)) | |||||
``` | |||||
</div> | |||||
</div> | |||||
</div> | |||||
These rules, standard though they may be, have a serious problem. Which, you might ask? They only apply when the expressions of interest are already fully evaluated. No rule matches for when the condition of an `if`{.scheme} expression is a function application, or another conditional; The application rule, also, only applies when the argument has already been evaluated (it's _call-by-value_, or _strict_). What can we do? Well, a simple and obvious solution is to specify _congruence_ rules that let us reduce in places of interest. | |||||
<!-- congruence rules --> | |||||
<div style="display: flex; flex-direction: column; justify-content: space-around"> | |||||
<h4 style="text-align: center">Congruence rules</h4> | |||||
<div class="mathpar"> | |||||
<div> | |||||
```scheme | |||||
[ (--> e_1 v) | |||||
-------------------------- | |||||
(--> (e_1 e_2) (v e_2))] | |||||
``` | |||||
Evaluating in function position | |||||
</div> | |||||
<div> | |||||
```scheme | |||||
[ (--> e_2 v) | |||||
-------------------------- | |||||
(--> (e_1 e_2) (e_1 v))] | |||||
``` | |||||
Evaluating in argument position | |||||
</div> | |||||
<div> | |||||
```scheme | |||||
[ (--> e_1 v) | |||||
----------------------- | |||||
(--> (if e_1 e_2 e_3) | |||||
(if v e_2 e_3))] | |||||
``` | |||||
Evaluating in scrutinee position | |||||
</div> | |||||
</div> | |||||
</div> | |||||
Hopefully the problem should be clear. If it isn't, consider adding binary operators for the field operations: Each of the 4 (addition, subtraction, multiplication, division) needs 2 congruence rules, one for reducing either argument, even though they each have a single reduction rule. In general, an N-ary operator will have N congruence rules, one for each of its N operands, but only one reduction rule! | |||||
The solution to this problem comes in the form of _evaluation contexts_. We can define a grammar of "expressions with holes", generally written as <noscript>E[·]</noscript><span class='script'>$\operatorname{E}[\cdot]$</span>, where the <noscript>·</noscript><span class='script'>$\cdot$</span> stands for an arbitrary expression. In code, we'll denote the hole with `<>`, perhaps in evocation of the macros `cut` and `cute` from SRFI 26[^1]. | |||||
Our grammar of evaluation contexts, which we'll call `E` in accordance with tradition, looks like this: | |||||
```scheme | |||||
E ::= <> ; any expression can be evaluated | |||||
| (E e) ; evaluate the function | |||||
| (v E) ; evaluate the argument | |||||
| (if E e e) ; evaluate the condition | |||||
``` | |||||
Now we can write all our congruence rules by appealing to a much simpler, and most importantly, singular, _context rule_, that says reduction is legal anywhere in an evaluation context. | |||||
<div style="text-align: center;"> | |||||
<div style="display: inline-block; text-align: left;"> | |||||
```scheme | |||||
[(--> e v) | |||||
---------------------------------- | |||||
(--> (in-hole E e) (in-hole E v)] | |||||
``` | |||||
</div> | |||||
_Redex uses the notation `(in-hole E e)` to mean an evaluation context `E` with `e` "plugging" the hole `<>`._ | |||||
</div> | |||||
What do evaluation contexts actually have to do with programming language implementation, however? Well, if you squint a bit, and maybe introduce some parameters, evaluation contexts look a lot like what you'd find attached to an operation in... | |||||
Continuation-passing Style | |||||
-------------------------- | |||||
I'm not good at structuring blog posts, please bear with me. | |||||
Continuation-passing style, also known as <span title="Child protective service- sike!" style="text-decoration: underline dotted">CPS</span>, is a popular intermediate representation for functional language compilers. The goal of CPS is to make evaluation order explicit, and to implement complex control operations like loops, early returns, exceptions, coroutines and more in terms of only lambda abstraction. To achieve this, the language is stratified into two kinds of expressions, "complex" and "atomic". | |||||
Atomic expressions, or atoms, are not radioactive or explosive: In fact, they're quite the opposite! The Atoms of CPS are the values of our direct-style language. Forms like `#t`{.scheme}, `#f`{.scheme}, numbers and lambda expressions will not undergo any more evaluation, and thus may appear anywhere. Complex expressions are those that _do_ cause evaluation, such as conditionals and procedure application. | |||||
To make sure that evaluation order is explicit, every complex expression has a _continuation_ attached, which here boils down to a function which receives the return value of the expression. Procedures, instead of returning to a caller, will instead tail-call their continuation. | |||||
The grammar of our mini Scheme after it has gone CPS transformation is as follows: | |||||
<div style="text-align: center;"> | |||||
<div style="display: inline-block; text-align: left;"> | |||||
```scheme | |||||
atom ::= (lambda (arg kont) expr) ; continuation argument | |||||
| var ; these remain as they were. | |||||
| #t | #f | |||||
| 1 | 2 | 3 | 4 | |||||
expr ::= (atom atom atom) ; function, argument, continuation | |||||
| (if atom expr expr) ; conditional, then_c, else_c | |||||
| atom ; atoms are also valid expressions | |||||
``` | |||||
</div> | |||||
</div> | |||||
Note that function application now has _three_ components, but all of them are atoms. Valid expressions include things like `(f x halt)`, which means "apply `f` to `x` such that it returns to `halt`", but do _not_ include `(f (g x y) (h y z))`, which have an ambiguous reduction order. Instead, we must write a λ-abstraction to give a name to the result of each intermediate computation. | |||||
For example, the (surface) language application `(e_1 e_2)`, where both are complex expressions, has to be rewritten as either of the following expressions, which correspond respectively to evaluating the function first or the argument first. `(e_1 (lambda r1 (e_2 (lambda r2 (r1 r2))))`{.scheme} | |||||
<div class="mathpar"> | |||||
<div> | |||||
```scheme | |||||
(e_1 (lambda r_1 | |||||
(e_2 (lambda r_2 | |||||
(r_1 r_2 k))))) | |||||
``` | |||||
Evaluating the function first | |||||
</div> | |||||
<div> | |||||
```scheme | |||||
(e_1 (lambda r_1 | |||||
(e_2 (lambda r_2 | |||||
(r_1 r_2 k))))) | |||||
``` | |||||
Evaluating the argument first | |||||
</div> | |||||
</div> | |||||
If you have, at any point while reading the previous 2 or so paragraphs, squinted, then you already know where I'm going with this. If not, do it now. | |||||
The continuation of an expression corresponds to its evaluation context. The `v`s in our discussion of semantics are the `atom`s of CPS, and most importantly, contexts `E` get closed over with a lambda expression `(lambda x E[x])`{.scheme}, replacing the hole `<>` with a bound variable `x`. Evaluating an expression `(f x)`{.scheme} in a context `E`, say `(<> v)` corresponds to `(f x (lambda x (x v)))`{.scheme}. | |||||
If a language implementation uses the same representation for both user procedures and lambda expressions---which is **very** inefficient, let me stress---then we get first-class _control_ for "free". First-class in the sense that control operations, like `return`, can be stored in variables, or lists, passed as arguments to procedures, etc. The fundamental first-class control operator for undelimited continuations is called `call-with-current-continuation`{.scheme}, generally abbreviated to `call/cc`{.scheme}. | |||||
<div style="text-align: center;"> | |||||
<div style="display: inline-block; text-align: left;"> | |||||
```scheme | |||||
(define call/cc (lambda (f cc) (f cc cc))) | |||||
``` | |||||
</div> | |||||
</div> | |||||
Using `call/cc`{.scheme} and a mutable cell holding a list we can implement cooperative threading. The `(yield)` operator has the effect of capturing the current continuation and adding it to (the end) of the list, dequeueing a potentially _different_ saved continuation from the list and jumping there instead. | |||||
<div style="text-align: center;"> | |||||
<div style="display: inline-block; text-align: left;"> | |||||
```scheme | |||||
(define threads '()) | |||||
; Jump to the next thread (read: continuation) or exit the program | |||||
; if there are no more threads to schedule | |||||
(define exit | |||||
(let ((exit exit)) | |||||
(lambda () | |||||
(if (null? threads) ; are we out of threads to switch to? | |||||
(exit) ; if so, exit the program | |||||
(let ((thr (car threads))) ; select the first thread | |||||
(set! threads (cdr threads)) ; dequeue it | |||||
(thr)))))))) ; jump there | |||||
; Add a function to the list of threads. After finishing its work, | |||||
; the function needs to (exit) so another thread can take over. | |||||
(define (fork f) | |||||
(set! threads (append threads | |||||
(list (lambda () (f) (exit)))))) | |||||
; Capture the current continuation, enqueue it, and switch to | |||||
; a different thread of execution. | |||||
(define (yield) | |||||
(call/cc (lambda (cc) | |||||
(set! threads (append threads (list cc))) | |||||
(exit)))) | |||||
``` | |||||
</div> | |||||
</div> | |||||
That's a cooperative threading implementation in 25 lines of Scheme! If whichever implementation you are using has a performant `call/cc`{.scheme}, this will correspond ropughly to the normal stack switching that a cooperative threading implementation has to do. | |||||
That last paragraph is a bit of a weasel, though. What's a "performant call/cc" look like? Well, `call/cc`{.scheme} continuations have _abortive_ behaviour[^2], which means they have the effect of _replacing_ the current thread of control when invoked, instead of prepending a segment of stack---which is known as "functional continuations". That is, it's basically a spiffed up `longjmp`{.c}, which in addition to saving the state of the registers, copies the call stack along with it. | |||||
However, `call/cc`{.scheme} is a bit overkill for applications such as threads, and even then, it's not the most powerful control abstraction. For one `call/cc`{.scheme} _always_ copies the entire continuation, with no way to *ahem* delimit it. Because of this, abstractions built on `call-with-current-continuation`{.scheme} [do not compose]. | |||||
[do not compose]: http://okmij.org/ftp/continuations/against-callcc.html | |||||
We can fix all of these problems, ironically enough, by adding _more_ power. We instead introduce a _pair_ operators, `prompt` and `control`, which... | |||||
Delimit your Continuations | |||||
-------------------------- | |||||
This shtick again? | |||||
Delimited continuations are one of those rare ideas that happen once in a lifetime and revolutionise a field---maybe I'm exaggerating a bit. They, unfortunately, have not seen very widespread adoption. But they do have all the characteristics of one of those revolutionary ideas: they're simple to explain, simple to implement, and very powerful. | |||||
The idea is obvious from the name, so much that it feels insulting to repeat it: instead of capturing the continuation, have a marker that _delimits_ what's going to be captured. What does this look like in our reduction semantics? | |||||
The syntax of evaluation contexts does not change, only the operations. We gain operations `(prompt e)`{.scheme} and `(control k e)`{.scheme}, with the idea being that when `(control k e)` is invoked inside of a `(prompt)` form, the evaluation context from the `control` to the nearest outermost `prompt` is reified as a function and bound to `k`. | |||||
<div class="mathpar"> | |||||
<div> | |||||
```scheme | |||||
[------------------- | |||||
(--> (prompt v) v)] | |||||
``` | |||||
Delimiting a value does nothing | |||||
</div> | |||||
<div> | |||||
```scheme | |||||
[---------------------------------------------------------- | |||||
(--> (prompt (in-hole E (control k body))) | |||||
(prompt ((lambda k body) (lambda x (in-hole E x)))))] | |||||
``` | |||||
Capture the continuation, bind it to the variable `k`, and keep going. | |||||
</div> | |||||
</div> | |||||
By not adding `(prompt E)` to the grammar of evaluation contexts, we ensure that `E` is devoid of any prompts by construction. This captures the intended semantics of "innermost enclosing prompt"---if `E` were modified to include prompts, the second rule would instead capture to the _outermost_ enclosing prompt, and we're back to undelimited call/cc. | |||||
Note that `prompt` and `control` are not the only pair of delimited control operators! There's also `shift` and `reset` (and `prompt0`/`control0`). `reset` is basically the same thing as `prompt`, but `shift` is different from `control` in that the captured continuation has the prompt—uh, the delimiter—reinstated, so that it cannot "escape". | |||||
<div style="text-align: center;"> | |||||
<div style="display: inline-block; text-align: left;"> | |||||
```scheme | |||||
[----------------------------------------------------------------- | |||||
(--> (reset (in-hole E (shift k body))) | |||||
(reset ((lambda k body) (lambda x (reset (in-hole E x))))))] | |||||
``` | |||||
Reinstate the prompt when the captured continuation is applied. | |||||
</div> | |||||
</div> | |||||
Yet another pair, which I personally prefer, is what you'd find in Guile's `(ice-9 control)`, namely `(call-with-prompt tag thunk handler)` and `(abort-to-prompt tag value)`. These are significantly more complex than bare `shift` and `reset` since they implement _multi-prompt_ delimited continuations. They're more like exception handlers than anything, with the addded power that your "exception handler" could restart the code after you `throw`{.java}. | |||||
<div style="text-align: center;"> | |||||
<div style="display: inline-block; text-align: left;"> | |||||
```scheme | |||||
(define-syntax reset | |||||
(syntax-rules () | |||||
((reset . body) | |||||
(call-with-prompt (default-prompt-tag) | |||||
(lambda () . body) | |||||
(lambda (cont f) (f cont)))))) | |||||
(define-syntax shift | |||||
(syntax-rules () | |||||
((shift k . body) | |||||
(abort-to-prompt (default-prompt-tag) | |||||
(lambda (cont) | |||||
((lambda (var) (reset . body)) | |||||
(lambda vals (reset (apply cont vals)))))) | |||||
``` | |||||
`call-with-prompt` and `abort-to-prompt` subsume `shift` and `reset`. | |||||
Taken from [the Guile Scheme implementation](https://fossies.org/linux/guile/module/ice-9/control.scm). | |||||
</div> | |||||
</div> | |||||
The operators `call-with-prompt` and `abort-to-prompt` are very convenient for the implementation of many control structures, like generators: | |||||
```scheme | |||||
(define-syntax for/generator | |||||
(syntax-rules () | |||||
((_ name gen . body) | |||||
(begin | |||||
(define (work cont) | |||||
(call-with-prompt | |||||
'generator-tag | |||||
cont | |||||
(lambda (cont name) (begin . body) | |||||
(work cont)))) | |||||
(work gen))))) | |||||
(define (yield x) (abort-to-prompt 'generator-tag x)) | |||||
(for/generator x (lambda () | |||||
(yield 1) | |||||
(yield 2) | |||||
(yield 3)) | |||||
(display x) (display #\newline)) | |||||
``` | |||||
Exception handlers and threading in terms of `shift` and `reset` are left as an exercise to the reader. | |||||
But... why? | |||||
----------- | |||||
Control abstractions appeal to our---or at least mine---sense of beauty. Being able to implement control flow operations as part of the language is often touted as one of the superpowers that Haskell gets from its laziness and purity, and while that certainly is true, control operators let us model many, _many_ more control flow abstractions, namely those involving non-local exits and entries. | |||||
Of course, all of these can be implemented in the language directly—JavaScript, for example, has `async`/`await`, stackless generators, _and_ exceptions. However, this is not an advantage. These significantly complicate the implementation of the language (as opposed to having a single pair of operators that's not much more complicated to implement than regular exception handlers) while also significantly _diminishing_ its expressive power! For example, using our definition of generators above, the Scheme code on the right does what you expect, but the JavaScript code on the left only yields `20`. | |||||
<div class="mathpar"> | |||||
<div> | |||||
```scheme | |||||
((lambda () | |||||
((lambda () | |||||
(yield 10))) | |||||
(yield 20))) | |||||
``` | |||||
</div> | |||||
<div> | |||||
```javascript | |||||
(function*() { | |||||
(function*() { | |||||
yield 10; | |||||
}) | |||||
yield 20; | |||||
})(); | |||||
``` | |||||
</div> | |||||
</div> | |||||
Delimited continuations can also be used to power the implementation of algebraic effects systems, such as those present in the language Koka, which much lower overhead (both in terms of code size and speed) than the type-driven local CPS transformation that Koka presently uses. | |||||
Language implementations which provide delimited control operators can also be extended with effect system support post-hoc, in a library, with an example being the [Eff] library for Haskell and the associated [GHC proposal] to add delimited control operators (`prompt` and `control`). | |||||
[Eff]: https://github.com/hasura/eff | |||||
[GHC proposal]: https://github.com/ghc-proposals/ghc-proposals/pull/313/ | |||||
[^1]: In reality it's because I'm lazy and type setting so that it works properly both with KaTeX and with scripting disabled takes far too many keystrokes. Typing some code is easier. | |||||
[^2]: People have pointed out to me that "abortive continuation" is a bit oxymoronic, but I guess it's been brought to you by the same folks who brought you "clopen set". |
@ -0,0 +1,264 @@ | |||||
--- | |||||
title: Reflections on Equality | |||||
date: November 1st, 2020 | |||||
--- | |||||
When shopping for a dependent type theory, many factors should be taken into consideration: how inductive data is represented (inductive schemas vs W-types), how inductive data _computes_ (eliminators vs case trees), how types of types are represented (universes à la Tarski vs à la Russell). However, the most important is their treatment of equality. | |||||
Conor McBride, a prominent figure in type theory research, noted in [a reddit comment] that you should never trust a type theorist who has not changed their mind about equality (I'm paraphrasing). Recently, I've embarked on a journey to improve my credibility (or, at least, get out of the "instantly discarded opinion" pool): I've changed my mind about equality. | |||||
# What's the fuss about? | |||||
Equality is very prevalent when using dependently typed languages, whether as frameworks for writing mathematical proofs or for writing verified computer programs. Most properties of mathematical operators are expressed as equalities which they should respect. For example, a semigroup is a set $S$ with an _associative_ $\times$ operator. The property of associativity just means the operator respects the equality $a \times (b \times c) \equiv (a \times b) \times c$! | |||||
However, the equality relation which can be expressed with proofs is not the only equality which a dependently-typed system needs to consider. There's also the _judgemental_ equality, that is, which terms are always identified independently of their semantics. For example, $a + b$ and $b + a$ are _propositionally_ equal, which can be shown by writing an inductive proof of this fact, but they're _judgmentally_ different, because they have different term structure. | |||||
There are two 'main' traditions of Martin-Löf dependent type theory: the intensional type theories are as in the paragraph above, but the extensional type theories make me a liar. Extensional type theory is obtained by adding a rule of _equality reflection_, which collapses the distinction between propositional and judgemental equality: whenever there exists a proof of an equality between terms $x$ and $y$, they're considered judgmentally equal. | |||||
Adding equality reflection makes a type system more expressive with respect to equality: for example, in extensional type theories, one can derive the rule of _function extensionality_, which says two functions are equal when they are equal pointwise. However, equality reflection also makes a type system _less_ expressive with respect to equality: there is only one way for two things to be equal! | |||||
Moreover, equality reflection complicates _type checking_ to an unacceptable degree. Rather than being able to check the validity of a proof by comparing a term against a known type, an entire typing derivation is necessary as input to the type checker. To see why, consider the following derivation: | |||||
<figure> | |||||
<div> | |||||
$$ | |||||
\cfrac{ | |||||
\cfrac{ | |||||
\cfrac{}{\mathop{\mathop{\vdash}} (+) \mathop{:} \mathbb{N} \to \mathbb{N} \to \mathbb{N}} | |||||
\quad | |||||
\cfrac{ | |||||
\text{no} \mathop{:} \bot\ \mathop{\mathop{\vdash}} \mathtt{"foo"} \mathop{:} \text{Str} | |||||
\quad | |||||
\cfrac{ | |||||
\cfrac{ | |||||
\text{no} \mathop{:} \bot\ \mathop{\mathop{\vdash}} \text{no} \mathop{:} \bot | |||||
}{ | |||||
\text{no} \mathop{:} \bot\ \mathop{\mathop{\vdash}} \mathrm{absurd}(\text{no}) \mathop{:} \text{Str} \equiv \mathbb{N} | |||||
} | |||||
}{ \text{no} \mathop{:} \bot\ \mathop{\mathop{\vdash}} \text{Str} = \mathbb{N} } | |||||
}{ \text{no} \mathop{:} \bot\ \mathop{\mathop{\vdash}} \mathtt{"foo"} \mathop{:} \mathbb{N} } | |||||
\quad | |||||
\cfrac{}{\mathop{\mathop{\vdash}} 2 \mathop{:} \mathbb{N}} | |||||
}{ \text{no} \mathop{:} \bot \mathop{\mathop{\vdash}} (2 + \mathtt{"foo"}) \mathop{:} \mathbb{N} } | |||||
}{ \mathop{\mathop{\vdash}} \lambda \text{no} \to (2 + \mathtt{"foo"}) : \bot \to \mathbb{N} } | |||||
$$ | |||||
</div> | |||||
<figcaption>If the context contains an element of the empty type, every term needs to be accepted</figcaption> | |||||
</figure> | |||||
Here, the context contains an element of the empty type, written $\bot$. It's _comparatively_ easy for the type checker to see that in this case, the context we're working under is absurd, and any equality should be accepted. However, there are many such empty types: $\mathrm{Fin}(0)$, for example. Consider the type family $\mathrm{SAT}$, indexed by a Boolean clause, such that $\mathrm{SAT}(c)$ reduces to $\top$ when $c$ is satisfiable and $\bot$ otherwise. How would you type-check the program $\lambda x \to 2 + \mathtt{"foo"}$ at type $\mathrm{SAT}(c) \to \mathbb{N}$, where $c$ is some adversarially-chosen, arbitrarily complex expression? How would you type check it at type $\mathrm{Halts}(m) \to \mathbb{N}$? | |||||
In contrast, Intensional Type Theory, or ITT for short, treats equality as if it were any other type. In ITT, equality is inductively generated by the constructor $\mathrm{refl}_x$ for any element $x$ of a type $A$, which leads to an induction principle saying that, if | |||||
- $A$ is a type, and | |||||
- $C$ is a proposition indexed by $x, y : A$ and $p : x \equiv_{A} y$, and | |||||
- $p_{\mathrm{refl}}$ is a proof of $C(x, x, \mathrm{refl_{x}})$, then | |||||
we can deduce, for all $x, y : A$ and $p : x \equiv_{A} y$, that $C(x, y, p)$ holds. | |||||
Given this operator, generally called axiom J (since "J" is the letter after **I**dentity, another word for "equality"), we can derive many of the properties of equality: transitivity (given $x \equiv y$ and $y \equiv z$, get $x \equiv z$) and symmetry (given $x \equiv y$, get $y \equiv x$) make $\equiv$ an _equivalence relation_, and substitutivity (assuming $P(x)$ and $x \equiv y$, get $P(y)$) justifies calling it "equality". | |||||
However, axiom J is both weaker _and_ stronger than equality reflection: for one, it doesn't let us prove that functions are equal when they are pointwise equal, which leads to several complications. However, it also doesn't let us prove that all equalities are equal to $\mathrm{refl}$, which lets us strengthen equality by postulating, for example, | |||||
## The univalence axiom of Voevodsky | |||||
The "tagline" for the univalence axiom, which lies at the center of Homotopy Type Theory (HoTT), is that "equality is equivalent to equivalence". More specifically, given a function $f : A \to B$, together with a proof that $f$ has left and right inverses, univalence gives us an equality $\mathrm{ua}(f)$ such that transporting "along" this path is the same as applying $f$. | |||||
For example, we could define the "usual" unary naturals $\mathbb{N}$ (which are easy to use for proving but terrible computationally) and _binary_ naturals $\mathbb{N}_2$ (which have more efficient computational behaviour at the cost of more complicated structure), demonstrate an equivalence $\mathrm{peano2Binary} : \mathbb{N} \cong \mathbb{N}_2$, then *transport* proofs about $\mathbb{N}$ to proofs about $\mathbb{N}_2$! | |||||
A [recent paper] by Tabareau et al explores the consequences of strengthening a type theory with univalence together with _parametricity_, unlocking efficient and _automated_ (given the equivalence) transport between algebraic structures of types. | |||||
<details> | |||||
<summary> A note on terminology </summary> | |||||
The HoTT interpretation of "types as topological spaces" leads us to interpret the type of equalities as the type of _paths_ in a space. From this, we get some terminology: instead of saying "cast $x$ with the equality $e$", we can equivalently say "transport $x$ along the path $p$". | |||||
Using the "path" terminology in a context with unicity of equality proofs is a bit misleading, but the terminology does not break down (a set-truncated system is just one where all paths are loops). Because of this, I'll use "path" and "equality" interchangeably. Sorry if this causes any confusion. | |||||
</details> | |||||
The undecidability of type checking ETT and the comparative weakness of ITT has led _many_ researchers to consider the following question: | |||||
> Can we have: | |||||
> | |||||
> * Decidable type checking | |||||
> * A "more extensional" equality | |||||
> * Good computational behaviour | |||||
> | |||||
> All at the same time? | |||||
Turns out, the answer is yes! | |||||
# Observational Equality | |||||
One early positive answer is that of Observational Type Theory, presented in [a 2007 paper] by Altenkirch, McBride, and Swierstra. The basic idea is that, instead of defining the type $a \equiv_{A} b$ as an inductive family of types, we define it so that equality _computes_ on the structure of $A$, $a$ and $b$ to _reduce_ to tractable forms. Observational type theory (OTT, from now on) has two _universes_ (types whose elements are types), $\mathrm{Prop}$ and $\mathrm{Set}$, such that $\mathrm{Prop} : \mathrm{Set}$ and $\mathrm{Set} : \mathrm{Set}$[^1] | |||||
The elements of $\mathrm{Prop}$ are taken to be _propositions_, not in the sense of propositions-as-types, but in the sense of HoTT. Propositions are the types $T$ for which, given $x, y : T$, one has $x \equiv y$: they're types with at most one element. Some propositions are $\top$, the trivially true proposition, and $\bot$, the empty proposition. | |||||
Given $A : u$, and some $B : v$ with one variable $x : A$ free (where $u$ and $v$ are possibly distinct universes), we can form the type $\prod_{x : A} B$ of _dependent products_ from $A$ to $B$. If $v$ is in $\mathrm{Prop}$, then the dependent product also lives in $\mathrm{Prop}$. Otherwise, it lives in $\mathrm{Set}$. Moreover, we can also form the type $\sum_{x : A} B$ of _dependent sums_ of $A$ and $B$, which always lives in $\mathrm{Set}$. | |||||
Given a type $A$ and two elements $a, b : A$, we can form the _proposition_ $a \equiv_{A} b$. Note the emphasis on the word proposition here! Since equivalence is a proposition, we have uniqueness of equality proofs by definition: there's at most one way for things to be equal, conflicting with univalence. So we get _some_ extensionality, namely of functions, but not for arbitrary types. Given types <span class=together>$A$ and $B$,</span> a proof $p : A \equiv B$ and $x : A$, we have the term $\mathrm{coe}(A, B, p, x) : B$, which represents the *coe*rcion of $x$ along the path $p$. | |||||
Here is where my presentation of observational equality starts to differentiate from the paper's: McBride et al use _heterogeneous_ equality, i.e. a 4-place relation $(x : A) \equiv (y : B)$, where $A$ and $B$ are potentially distinct types. But! Their system only allows you to _use_ an equality when $A \equiv B$. The main motivation for heterogeneous equality is to "bunch up" as many equalities as possible to be eliminated all in one go, since coercion in their system does not compute. However, if coercion computes normally, then we don't need to (and, in fact, can't) do this "bunching": one just uses coercion normally. | |||||
The key idea of OTT is to identify as "equal" objects which support the same _observations_: for functions, observation is _application_; for pairs, it's projection, etc. This is achieved by making the definition of equality acts as a "pattern matching function" on the structure of terms and types. For example, there is a rule which says an equality between functions is a function that returns equalities: | |||||
<figure> | |||||
<div> | |||||
$$ | |||||
\cfrac{}{f \equiv_{(x : A) \to B(x)} g \longrightarrow (x : A) \to (f\ x) \equiv_{B(x)} (g\ x)} | |||||
$$ | |||||
</div> | |||||
<figcaption>Equality of functions is extensional by definition</figcaption> | |||||
</figure> | |||||
So, not only do we have a term `funext` of type `((x : A) → f x == g x) → f == g` but one with a stronger type, namely `((x : A) → f x == g x) == (f == g)`, and that term is.. `refl`! | |||||
OTT is appropriate, in my opinion, for doing set-level mathematics: where types have no "interesting" equality structure. However, it breaks down at higher _h_-levels, where there is interesting structure to be found in the equalities between elements of types. This is because OTT, by placing the $\equiv$ type in its universe of propositions, validates the principle of uniqueness of identity proofs, which says any two proofs of the same equality are themselves equal. UIP conflicts with the univalence axiom of Homotopy Type Theory, by (to use the HoTT terminology) saying that all types are sets. | |||||
<span class="theorem paragraph-marker">**Theorem**</span>. Suppose there exists one universe, $\mathscr{U}$, which contains the type of Booleans. Assuming univalence, the type $a \equiv_{\mathscr{U}} b$ is not a proposition. | |||||
<span class="paragraph-marker">**Proof**</span>. The function $\mathrm{not} : 2 \to 2$, which maps $\mathtt{tt}$ to $\mathtt{ff}$ and vice-versa, is an equivalence, being its own left and right inverses. Thus, by univalence, we have a path $\mathrm{ua}(\mathrm{not}) : 2 \equiv_{\mathscr{U}} 2$. | |||||
To see that this path is different from $\mathrm{refl}_2$, consider its behaviour with respect to transport: $\mathrm{transp}(\mathrm{refl}_2, \mathtt{tt}) \equiv tt$ but $\mathrm{transp}(\mathrm{ua}(\mathrm{not}), \mathtt{tt}) \equiv \mathtt{ff}$. Since $\mathtt{tt}$ is different from $\mathtt{ff}$, it follows that $\mathrm{ua}(\mathrm{not})$ is different from $\mathrm{refl}_2$. <span class=qed>$\blacksquare$</span> | |||||
So, if having univalence is desirable, OTT is off the table. However, the previous example of transporting proofs between equivalent types of natural numbers might not have convinced you that HoTT is indeed an interesting field of study, and univalence might seem mostly like a novelty, a shiny thing to pursue for its own sake (it certainly did to me, at first). So why _is_ HoTT interesting? | |||||
# HoTT | |||||
Between 2012 and 2013, a special year of research took place in the Institute for Advanced Studies to develop a type theory that can be used as a foundation for mathematics "at large". Their result: the book *Homotopy Type Theory: Univalent Foundations for Mathematics*. The IAS Special Year on Univalent Foundations would have been interesting even if it hadn't started a new branch of type theory, just from the participants: Thierry Coquand, Thorsten Altenkirch, Andrej Bauer, Per Martin-Löf and, of course, the late Vladimir Voevodsky. | |||||
HoTT's main contribution to the field, in my (lay) opinion, is the interpretation of _types as spaces_: by interpreting types as homotopy theoretical spaces, a semantics for types with more interesting "equality structure", so to speak, arises. In the "classical" intensional type theory of Agda and friends, and indeed the theory of OTT commented on above, equality is a _proposition_. Agda (without the `--without-K` option) accepts the following proof: | |||||
```agda | |||||
uip : (A : Set) (x y : A) (p q : x ≡ y) → p ≡ q | |||||
uip A x .x refl refl = refl | |||||
``` | |||||
Apart from ruling out univalence, UIP rules out another very interesting class of types which can be found in HoTT: the _higher inductive_ types, which contain constructors for _equalities_ as well as for values. | |||||
The simplest higher inductive type is the interval, which has two "endpoints" (`i0` and `i1`) and a path between them: | |||||
```agda | |||||
data I : Type where | |||||
i0 i1 : I | |||||
seg : i0 ≡ i1 | |||||
``` | |||||
The names $i_0$, $i_1$ and $\mathrm{seg}$ were chosen to remind the reader of a line *seg*ment between a pair of points. Therefore, we may represent the type $\mathbb{I}$ as a diagram, with discrete points representing $i_0$ and $i_1$, and a line, labelled $\mathrm{seg}$, connecting them. | |||||
<figure> | |||||
<img style="height: 90px" src="/diagrams/eq/interval.svg" /> | |||||
<figcaption>A diagrammatic representation of the type `I`</figcaption> | |||||
</figure> | |||||
The real power of the `I` type comes from its induction principle, which says that, given a proposition $P : \mathbb{I} \to \mathrm{Type}$, if: | |||||
* There exists a pair of proofs $pi_0 : P(i_0)$ and $pi_1 : P(i_1)$, and | |||||
* $pi_0$ and $pi_1$ are equal "with respect to" the path $\mathrm{seg}$, then | |||||
$P$ holds for every element of the interval. However, that second constraint, which relates the two proofs, needs a bit of explaining. What does it mean for two elements to be equal with respect _to a path_? Well, the "obvious" interpretation would be that we require a proof $p\mathrm{seg} : pi_0 \equiv pi_1$. However, this isn't well-typed! The type of $pi_0$ is $P(i_0)$ and the type of $pi_1$ is $P(i_1)$, so we need a way to make them equal. | |||||
This is where the path $\mathrm{seg}$ comes in to save the day. Since it states $i_0$ and $i_1$ are equal, we can transport the proof $pi_0$ _along_ the path $\mathrm{seg}$ to get an inhabitant of the type $P(i_1)$, which makes our desired equality $pi_0$ between $pi_1$ _with respect to_ $\mathrm{seg}$ come out as $\mathrm{transp}(P, \mathrm{seg}, pi_0) \equiv pi_1$, which _is_ well-typed. | |||||
That's fine, I hear you say, but there is a question: how is the interval type _useful_? It certainly looks as though if it were useless, considering it's just got one element pretending to be two! However, using the interval higher inductive type, we can actually _prove_ functional extensionality, the principle that says two functions are equal when they're equal pointwise, everywhere. The proof below can be found, in a more presentable manner, in [the HoTT book], section 6.3. | |||||
<span class="theorem paragraph-marker">**Theorem**</span>. If $f$ and $g$ are two functions between $A$ and $B$ such that $f(x) \equiv g(x)$ for all elements $x : A$, then $f \equiv g$. | |||||
<span class="paragraph-marker">**Proof**</span>. Call the proof we were given $p : \prod_{(x : A)} f(x) \equiv g(x)$ We define, for all $x : A$, the function $p^\prime_{x} : \mathbb{I} \to B$ by induction on $I$. Let $p^\prime_{x}(i_0) = f(x)$, $p^\prime_{x}(i_1) = g(x)$. The equality between these terms is given by $p^\prime_{x}(\mathrm{seg}) = p(x)$. | |||||
Now, define $q : \mathbb{I} \to A \to B$ by $q(i) = \lambda x. p^\prime_x(i)$. We have that $q(i_0)$ is the function $\lambda x. p^\prime_{x}(i_0)$, which is defined to be $\lambda x. f(x)$, which is $\eta$-equal to $f$. Similarly, $q(i_1)$ is equal to $g$, and thus, $q(\mathrm{seg}) : f \equiv g$. <span class="qed">$\blacksquare$</span> | |||||
Isn't that _cool_? By augmenting our inductive types with the ability to additionally specify equalities between elements, we get a proof of function extensionality! I think that's pretty cool. Of course, if HITs were limited to structures like the interval, spheres, and other abstract mathematical things, they wouldn't be very interesting for programmers. However, the ability to endow types with additional equalities is _also_ useful when doing down-to-earth programming! A [2017 paper] by Basold et al explores three applications of HITs to programming, in addition to containing an accessible introduction to HoTT in general. | |||||
Another very general higher inductive type, one that might be more obviously useful, is the general type of _quotients_. Whenever $A$ is a type and $R$ is a binary, _propositional_ relation between members of $A$, we can form the quotient type $A/R$, which is given by the following constructors[^2]: | |||||
* $\mathrm{intro}$ says that, for each $x : A$, we can make an element of $A/R$, and | |||||
* $\mathrm{quot}$ which gives, for each $x, y : A$ which are related by $R$, an equality between $\mathrm{intro}(x)$ and $\mathrm{intro}(y)$. | |||||
The induction principle for quotients, which is far too complicated to include here (but can be derived mechanically from the specification given above and the knowledge of "equal with respect to some path" from the section on the interval), says roughly that we can pattern-match on $A/R$ if and only the function we're defining does not distinguish between elements related by $R$. | |||||
This type is very general! For example, given a type of naturals $\mathbb{N}$ and a two-place relation $\mathrm{mod2}$ which holds for numbers congruent modulo 2, we can form the quotient type $\mathbb{N}/\mathrm{mod2}$ of naturals mod 2. Functions (say, $f$) defined on this type must then respect the relation that, whenever $x \equiv y \mod 2$, $f(x) \equiv f(y)$. | |||||
All of this talk about HoTT, and in fact the book itself, though, neglected to mention one thing. What is the _computational content_ of the univalence axiom? What are the reduction rules for matching on higher inductive types? How do we take a proof, written in the language of HoTT, and run it? The Book does not address this, and in fact, it's still a half-open problem, and has been since 2013. Computing in the presence of all these paths, paths between paths, ad infinitum is mighty complicated. | |||||
# Cubical TT | |||||
The challenge of making a computational HoTT did not stop Cohen et al, who in [a 2016 paper] presented _Cubical Type Theory_, which, after three years of active research, provides a way to compute in the presence of univalence. There's just one problem, though: cubical type theory is _hella complicated_. | |||||
The core idea is simple enough: we extend type theory with a set of names $\mathbb{I}$, with points $0$ and $1$ and operations $\vee$, $\wedge$, and $1 - r$, which behave like a de Morgan algebra. To represent equalities, the type $\mathrm{Path}\ A\ t\ u$ is introduced, together with a "name abstraction" operation $\langle i \rangle\ t$ and "path application" $t\ r$, where $r$ is an element of the interval. | |||||
<div class="mathpar"> | |||||
<figure> | |||||
<div> | |||||
$$ | |||||
\frac{\Gamma, i : \mathbb{I} \vdash t : A}{\Gamma \vdash \langle i \rangle\ t : \mathrm{Path}\ A\ t[0/i]\ t[1/i]} | |||||
$$ | |||||
</div> | |||||
<figcaption>Path formation</figcaption> | |||||
</figure> | |||||
<figure> | |||||
<div> | |||||
$$ | |||||
\frac{\Gamma, t : \mathrm{Path}\ A\ a\ b\quad\Gamma \vdash r : \mathbb{I}}{\Gamma \vdash t\ r : A} | |||||
$$ | |||||
</div> | |||||
<figcaption>Path elimination</figcaption> | |||||
</figure> | |||||
</div> | |||||
The intuition, the authors say, is that a term with $n$ variables of $\mathbb{I}$-type free corresponds to an $n$-dimensional cube. | |||||
<table> | |||||
<tr> | |||||
<td>$\cdot \vdash A : \mathrm{Type}$</td> | |||||
<td class=image><img style="height: 30px;" alt="0 dimensional cube" src="/diagrams/eq/0cube.svg" /></td> | |||||
</tr> | |||||
<tr> | |||||
<td>$i : \mathbb{I} \vdash A : \mathrm{Type}$</td> | |||||
<td class=image><img style="height: 32px;" alt="1 dimensional cube" src="/diagrams/eq/1cube.svg" /></td> | |||||
</tr> | |||||
<tr> | |||||
<td>$i, j : \mathbb{I} \vdash A : \mathrm{Type}$</td> | |||||
<td class=image><img style="height: 120px;" alt="2 dimensional cube" src="/diagrams/eq/2cube.svg" /></td> | |||||
</tr> | |||||
</table> | |||||
This is about where anyone's "intuition" for Cubical Type Theory, especially my own, flies out the window. Specifically, using abstraction and the de Morgan algebra on names, we can define operations such as reflexivity (introduced with $\langle i \rangle a : \mathrm{Path}\ A\ a\ a$), symmetry ($\lambda p. \langle i \rangle p\ (1 - i) : \mathrm{Path}\ A\ a\ b \to \mathrm{Path}\ A\ b\ a$), congruence, and even function extensionality, which has a delightfully simple proof: $\lambda p. \langle i \rangle\ \lambda x. p\ x\ i$. | |||||
However, to transport along these paths, the paper defines a "face lattice", which consists of constraints on elements of the interval, uses that to define "systems", which are arbitrary restrictions of cubes; From systems, one can define "composition", which compute the lid of an open box (yeah, I don't get it either), "Kan filling", and finally, transport. Since the authors give a semantics of Cubical Type Theory in a previously well-established model of cubical sets, I'll just.. take their word on this. | |||||
The [Cubical Agda documentation] has a section explaining a generalised transport operation `transp` and the regular transport operation `transport`. I recommend that you go check _that_ out, since explaining each is beyond my powers. However, this transport operation _does_ let us prove that the J induction principle for equalities also holds for these cubical paths, and from that we can define all of the other nice operations! | |||||
Cubical Agda, a proof assistant based on Cubical Type Theory, exports a library which provides all of HoTT's primitive notions (The identity type, transport, the J rule, function extensionality, univalence), that compute properly. Furthermore, it supports higher inductive types! However, as I understand it, these can not be compiled into an executable program yet. This is because the `transp` operation, fundamental to computation in the presence of path types, is defined on the structure of the type we're transporting over, and type structure is not preserved when compiling. | |||||
# So What? | |||||
Even after all this explanation of fancy equality relations, you might still be unconvinced. I certainly was, for a while. But I'd argue that, if you care about typed programming enough to be interested in dependent types _at all_, you should be interested in, at least, the quotient inductive types of OTT, if not the more general higher inductive types of HoTT. | |||||
The reason for this is simple: inductive types let you restrict how elements of a type are _formed_. Quotient inductive types let you restrict, in a principled way, how elements of a type are _used_. Whereas in languages without quotients, like Haskell or even non-cubical Agda, one is forced to use the module system to hide the inductive structure of a type if they wish to prevent unauthorized fiddling with structure, in a language with quotients, we can have the type checker enforce, internally, that these invariants are maintained. | |||||
Just like quantitative type theory gives programmers a way to reason about resource usage, and efficiently implement mutable structures in a referentially-transparent way, I strongly believe that quotient types, and even univalent parametricity (when we figure out how to compile that) are the "next step forward" in writing reliable software using types as a verification method. | |||||
However, dependent types are not the only verification method! Indeed, there are a number of usability problems to solve with dependent types for them to be adopted by the mainstream. A grim fact that everyone who wants reliable software has to face every day is that most programmers are out there using _JavaScript_, and that the most popular typed language released recently is _Go_.[^3] | |||||
So I leave you, dear reader, with this question: what can we do, as researchers and implementors, to make dependently typed languages more user-friendly? If you are not a researcher, and you have tried a dependently typed language, what are pain points you have encountered? And if you haven't used a dependently typed language, why not? | |||||
<div class="special-thanks"> | |||||
With special thanks to | |||||
* [My friend V for helping with my supbar my English](https://anomalous.eu/) | |||||
* [My friend shikhin for proof-reading the mathematics.](https://twitter.com/idraumr) | |||||
</div> | |||||
[^1]: This is a simplified presentation that uses a single, inconsistent universe (Girard's paradox applies). The actual presentation uses a stratified, predicative hierarchy of $\mathrm{Set}_i$ universes to avoid this paradox. | |||||
[^2]: The presentation of quotients in the HoTT book also contains a _0-truncation_ constructor, which has to do with limiting quotients to work on sets only. The details are, IMO, out of scope for this post; So check out section 6.10 of the book to get all of them. | |||||
[^3]: Please read the italics on this paragraph as derision. | |||||
[a 2007 paper]: http://www.cs.nott.ac.uk/~psztxa/publ/obseqnow.pdf | |||||
[2017 paper]: https://www.researchgate.net/publication/315794623_Higher_Inductive_Types_in_Programming | |||||
[recent paper]: https://arxiv.org/abs/1909.05027 | |||||
[a 2016 paper]: https://arxiv.org/abs/1611.02108 | |||||
[a reddit comment]: https://www.reddit.com/r/haskell/comments/y8kca/generalizednewtypederiving_is_very_very_unsafe/c5tawm8/ | |||||
[the HoTT book]: https://homotopytypetheory.org/book/ | |||||
[Cubical Agda documentation]: https://agda.readthedocs.io/en/v2.6.0/language/cubical.html |
@ -0,0 +1,638 @@ | |||||
--- | |||||
title: On Induction | |||||
date: January 15th, 2021 | |||||
--- | |||||
<!-- | |||||
battle plan: | |||||
- motivate induction via lists | |||||
- close enough to naturals that we can appeal to people's intuition about those | |||||
- a general schema for normal induction (easy enough) | |||||
- handwave about induction-induction (some syntax?) and induction-recursion (a universe?) | |||||
- handwave quotient & higher induction-induction | |||||
--> | |||||
[Last time] on this... thing... I update _very occasionally_, I talked about possible choices for representing equality in type theory. Equality is very important, since many properties of programs and mathematical operators are stated as equalities (e.g., in the definition of a group). However, expressing properties is useless if we can't prove them, and this is where inductive types come in. | |||||
[Last time]: /posts/reflections-on-equality.html | |||||
Inductive types allow users to extend their type theories with new types, generated by _constructors_, which respect an _induction principle_. Constructors are conceptually simple: they provide a way of *construct*ing elements of that type, like the name implies. The induction principle, though, requires a bit more thought. | |||||
The only inductive type | |||||
----------------------- | |||||
Since this is a blog post about type theory, I am required, by law, to start this discussion about inductive types with the most boring one: the set of all natural numbers, $\mathbb{N}$. The set $\mathbb{N}$ (to be read, and written, `Nat`, from now on) is both uninteresting enough that the discussion can focus on the induction aspect, but also non-trivial enough for some complications to arise. | |||||
The type `Nat`{.dt} is generated by two constructors, one for `zero`{.dt}, and one for the `suc`{.dt}cessor of another number. We can represent this in Agda as in the code block below. Taken together, the signature for the type former (`Nat : Set`{.agda}) and the signatures of all the constructors make up the complete specification of the inductive type, called (confusingly) a signature. Looking at only these we can figure out its fundamental property—its induction principle. | |||||
```agda | |||||
data Nat : Set where | |||||
zero : Nat | |||||
suc : Nat → Nat | |||||
``` | |||||
Before we consider the full induction principle, it's useful to consider a simply-typed restriction, called the _recursor_. Every inductive type has a recursor, but this name makes the most sense when talking about natural numbers, because it operationally models primitive recursion. The recursor for the natural numbers is a (dependent) function with the following parameters: | |||||
- A _motive_, the type we elminate into; | |||||
- A _method_ for the constructor `zero`, that is, an element of `A`, and | |||||
- A _method_ for the constructor `suc`, which is a function from `A → A`. | |||||
To put it into code: | |||||
```agda | |||||
foldNat : (A : Type) → A → (A → A) → Nat → A | |||||
``` | |||||
Furthermore, the recursor has, as anything in type theory, computational behaviour. That is, when applied to all of its arguments, `foldNat` evaluates. This evaluation is recursive, but pay attention to the structure: we only ever apply `foldNat` to values "smaller" (contained in) the data we started with. | |||||
```agda | |||||
foldNat : (A : Type) → A → (A → A) → Nat → A | |||||
foldNat A z s zero = z | |||||
foldNat A z s (suc x) = s (foldNat A z s x) | |||||
``` | |||||
To see how one derives the recursor from the signature, we would have to take a detour through category theory. While a complete semantic argument is preferable, for now, some handwaving is in order. To cite my sources: this presentation of algebras was given in [Kaposi 2020], a very well-written paper which you should check out if you want to understand how algebras work for higher inductive types. | |||||
[Kaposi 2020]: https://arxiv.org/pdf/1902.00297.pdf | |||||
Take a **`Nat`{.dt}-algebra** to be a dependent triple `(C : Set) × C × (C → C)`{.agda}. The type `C` is known as the _carrier_ of the algebra. From this type of algebras we can define a type family of _`Nat`{.dt}-algebra homomorphisms_ between two algebras, which are structure-preserving maps between them. This definition is a bit complicated, so it's in a box: | |||||
```agda | |||||
Alg : Set₁ | |||||
Alg = Σ Set₀ λ Nat → Σ Nat λ _ → (Nat → Nat) | |||||
Morphism : Alg → Alg → Set | |||||
Morphism (N0 , z0 , s0) (N1 , z1 , s1) = | |||||
Σ (N0 → N1) λ NM → | |||||
Σ (NM z0 ≡ z1) λ _ → | |||||
((n : N0) → NM (s0 n) ≡ s1 (NM n)) | |||||
``` | |||||
A morphism between algebras $(N_0, z_0, s_0)$ and $(N_1, z_1, s_1)$ consists of a map $N^M : N_0 \to N_1$ between their carriers together with data proving that $N^M$ maps the objects in $N_0$ to the "right" objects in $N_1$. | |||||
Suppose for a second that we have a "special" `Nat`{.dt}-algebra, denoted `Nat*`{.dt}. The statement that there exists a recursion principle for the `Nat`{.dt}ural numbers is then a function with the type boxed below. | |||||
```agda | |||||
recursion : (α : Alg) → Morphism Nat* α | |||||
``` | |||||
To borrow some more terminology from our category theorist friends, it's stating that `Nat*`{.dt} is _weakly initial_ in the category of `Nat`{.dt}-algebras: For every other algebra `α` we have a homomorphism `Nat* → α`. It turns out that, if our type theory has inductive types, we can implement this function! The initial algebra is given by `(Nat, zero, suc)` and the `recursion` principle by the function below: | |||||
```agda | |||||
Nat* : Alg | |||||
Nat* = (Nat, zero, suc) | |||||
recursion : (α : Alg) → Morphism Nat* α | |||||
recursion (N0 , z0 , s0) = map , refl , λ n → refl where | |||||
map : Nat → N0 | |||||
map zero = z0 | |||||
map (suc x) = s0 (map x) | |||||
``` | |||||
In particular, to be very needlessly formal, "every inductive type is the _initial algebra_ of some strictly-positive endofunctor." It's no surprise, then, that we can give such an initial algebra for the natural numbers in Agda, a theory with inductive types! The "shape" of this functor depends on the constructors of the inductive type, but the general principle is the same: Data representing the type (the carrier) together with data for every constructor, with the inductive type "replaced" by the carrier. | |||||
And then some more | |||||
------------------ | |||||
A trivial inductive type we can discuss is the unit type, or top type: | |||||
```agda | |||||
data Unit : Set where | |||||
point : Unit | |||||
``` | |||||
Let's speedrun through the definition of `Unit`{.dt}-algebras, `Unit`{.dt}-algebra homomorphisms, and the initial `Unit`{.dt}-algebra. The name `point`{.dt} was picked for the constructor to be indicative of these structures: `Unit`{.dt} is the _initial pointed type_. A pointed type `T` is a type for which we have a point `p : T`—oops, there's our algebra type! It's `(T : Set) × T`. | |||||
_Wait, what? Really?_, I hear you ask. You proceed: _An algebra for the `Unit` type is just a pointed type?_ Yup, my dear reader: the only data we need to eliminate from `Unit` is the type we're eliminating into (the carrier `C`) and a value in `C`, that we map our `point` to! | |||||
The family of types between unit types can also be specified in Agda, but I'm absolutely sure you can work it out: It's a function $U^M : U_0 \to U_1$ such that $U^M p_0 \equiv p_1$. | |||||
```agda | |||||
Unit-Alg : Set₁ | |||||
Unit-Alg = Σ Set λ Unit → Unit | |||||
Unit-Morphism : Unit-Alg → Unit-Alg → Set | |||||
Unit-Morphism (U0 , p0) (U1 , p1) = Σ (U0 → U1) λ UM → UM p0 ≡ p1 | |||||
``` | |||||
Our definition for Unit-recursion is laughable. It's so trivial that I'm going to go a bit further, and instead of proving _weak_ initiality as we did for the naturals, prove _initiality_: There exists a **unique** map from `(Unit, point)` to any other algebra. For this, we need a notion of uniqueness. Contractibility will do: A type is contractible iff it's pointed and, for every other element, there exists an equality between the point and that element. Check out the box: | |||||
```agda | |||||
isContr : Set → Set | |||||
isContr T = Σ T λ center → (other : T) → other ≡ center | |||||
``` | |||||
Or, this is where I _would_ put a nice box together with an Agda-checked proof that `(Unit , point)` is initial... if I had one! Agda does not have a strong enough notion of equality for us to prove this. In particular, Agda out-of-the-box has no _function extensionality_ principle (see my post on equality). However, we _can_ postulate one, and after writing some equality-manipulation functions, write that proof. | |||||
<details> | |||||
<summary>Equality stuff</summary> | |||||
```agda | |||||
postulate | |||||
funext : {ℓ ℓ' : _} | |||||
{A : Set ℓ} {B : A → Set ℓ'} | |||||
(f g : (x : A) → B x) | |||||
→ ((x : A) → f x ≡ g x) → f ≡ g | |||||
subst : {ℓ ℓ' : _} | |||||
{A : Set ℓ} (B : A → Set ℓ') | |||||
{x y : A} | |||||
→ x ≡ y → B x → B y | |||||
subst B refl x = x | |||||
pairPath : {ℓ ℓ' : _} | |||||
{A : Set ℓ} | |||||
{B : A → Set ℓ'} | |||||
{a b : Σ A B} | |||||
(p : (a ₁) ≡ (b ₁)) | |||||
→ subst B p (a ₂) ≡ (b ₂) | |||||
→ a ≡ b | |||||
pairPath {_} {_} {A} {B} {(a , b)} {(a' , b')} refl refl = refl | |||||
_∘_ : {ℓ : _} {A : Set ℓ} {x y z : A} → y ≡ z → x ≡ y → x ≡ z | |||||
refl ∘ refl = refl | |||||
UIP : {ℓ : _} {A : Set ℓ} {x y : A} (p q : x ≡ y) → p ≡ q | |||||
UIP refl refl = refl | |||||
``` | |||||
</details> | |||||
I'll be the first to admit that all this equality stuff, _especially_ the postulate, is a bit gross. However, it's all justifiable: In particular, the setoid model of type theory validates all of MLTT + funext + UIP, so we're in the clear. The `pairPath` function has a very complicated type: it defines the setoid (or groupoid!) structure of the $\sum$-types. | |||||
If you can intuit that an equality between `(a, b) ≡ (c, d)` consists of a pair of equalities `(a ≡ c) × (b ≡ d)`, `pairPath` is that, but dependent. Since the second components of the pairs have types depending on the first element, we need to coerce `e` along the path between `x ≡ y` to get an element `subst B p e : B y` that we can compare with `e'`. | |||||
```agda | |||||
Unit-initiality : (α : Unit-Alg) | |||||
→ isContr (Unit-Morphism (Unit , point) α) | |||||
Unit-initiality (U0 , p0) = (map , refl) , contract where | |||||
map : Unit → U0 | |||||
map point = p0 | |||||
contract : (other : Unit-Morphism (Unit , point) (U0 , p0)) | |||||
→ other ≡ (map , refl) | |||||
contract ( fun , p ) = | |||||
pairPath | |||||
(funext fun map (λ { point → p })) | |||||
(UIP _ _) | |||||
``` | |||||
With funext, we can also prove `Nat-initiality`, the equivalent statement about natural numbers. Left as an exercise to the reader, though! | |||||
Upgrading your recursion: displayed algebras | |||||
-------------------------------------------- | |||||
Just like we can go from regular pairs to dependent pairs by complicating things, we can complicate the recursor to get an _induction principle_, which uniquely characterises each inductive type. From an algebra (the input to the recursor), we build a **displayed algebra**, which is a logical predicate over the algebra's carrier together with proofs for each of its methods. Let's go back to the natural numbers for a second, since those are more interesting than the unit type. | |||||
Given a $\mathbb{N}$-algebra $\alpha = (N, z, s)$, we define the type of displayed $\mathbb{N}$-algebras over $\alpha$ to be $\sum(N^D : N \to \mathrm{Set}) \sum(z^D : N^D z) (\prod(n : N) N^D n \to N^D (s\ n))$. That is, we "upgrade" the carrier $N$ to a predicate on carriers $N \to \mathrm{Set}$, $z$ gets upgraded to a proof of $N^D z$, and $s$ gets upgraded to a proof that $N^D (s\ n)$ follows from $N^D n$. Check out the box: | |||||
```agda | |||||
Nat-Displayed : (α : Nat-Alg) → Set₁ | |||||
Nat-Displayed (N , z , s) = | |||||
Σ (N → Set) λ Nd → | |||||
Σ (Nd z) λ _ -> | |||||
(n : N) → Nd n → Nd (s n) | |||||
``` | |||||
Now we can finally connect the idea of "inductive types" back to the idea of "induction" that some of us got familiar with in a high school proofs class, namely over the natural numbers: | |||||
> Suppose we have a property of the natural numbers, $P$. If: | |||||
> | |||||
> - $P$ holds for $0$, and | |||||
> - Assuming P holds for n, P holds for $n + 1$, then | |||||
> | |||||
> $P$ holds for every natural number. | |||||
This is the same thing we have encoded in the displayed algebras over $(N, z, s)$! Since a predicate is, type-theoretically, a family of types, we interpret our $P$ as $N^D : N \to \mathrm{Set}$. A predicate holds for a value iff you have an inhabitant of the predicate applied to that value, so our proofs become terms of the right type. The universal quantifier for $n$ becomes a dependent function, and the implication a regular function. | |||||
The final piece of our complicated terminology puzzle is the idea of a **section** of a displayed algebra over an algebra, a sentence so utterly convoluted that I wish I had never written it. Just like displayed algebras complicate the idea of algebras, sections complicate algebra homomorphisms. Just like a homomorphism has a map between the algebras' carriers, a section has a dependent map from algebra's carrier to the predicate of the algebra displayed over that one. The dependency structure doesn't make a lot of sense written out, so check out the code: | |||||
```agda | |||||
Nat-Section : (α : Nat-Alg) → Nat-Displayed α → Set | |||||
Nat-Section (N , z , s ) (Nd , zd , sd ) = | |||||
Σ ((n : N) → Nd n) λ Ns → | |||||
Σ (Ns z ≡ zd) λ _ → | |||||
((n : N) → Ns (s n) ≡ sd n (Ns n)) | |||||
``` | |||||
The first component of a section is the induced function generated by the motives packaged together by the displayed algebra $(N^D, z^D, s^D)$. The second two components are _reduction rules_, packaged as propositional equalities! The second component, for instance, specifies that $N^S$ (the function) applied to the $z$ero of our algebra returns the method $z^D$. | |||||
The existence of an induction principle is packaged up by saying that any algebra displayed over the initial one has a section. In code, it's a dependent map `Induction : (M : Nat-Displayed Nat*) → Section M`{.agda}. We can implement that in Agda like in the box below, but because the `map` obeys the computation rules automatically, none of the proof components are incredibly interesting—they're both trivial proofs. | |||||
```agda | |||||
Nat-induction : (M : Nat-Displayed Nat*) → Nat-Section Nat* M | |||||
Nat-induction (Nd , zd , sd) = map , refl , λ s → refl where | |||||
map : (n : Nat) → Nd n | |||||
map zero = zd | |||||
map (suc x) = sd x (map x) | |||||
``` | |||||
As an _incredibly_ convoluted example we can use all our algebra machinery to implement.. drumroll, please... addition! | |||||
```agda | |||||
add : Nat → Nat → Nat | |||||
add = (Nat-induction displayed) ₁ where | |||||
displayed : Nat-Displayed Nat* | |||||
displayed = (λ n → Nat → Nat) | |||||
, (λ z → z) | |||||
, λ _ k z → suc (k z) | |||||
_ : add (suc (suc zero)) (suc (suc zero)) | |||||
≡ suc (suc (suc (suc zero))) | |||||
_ = refl | |||||
``` | |||||
Again using our trivial example as a bit more practice before we move on, let's talk displayed algebras and sections for the Unit type. They're both quite easy. Assuming $(U, p)$ is our algebra, the displayed algebra ends up with a predicate $U^D : U → \mathrm{Set}$ together with a proof $p^D : U\ p$. Sections are also quite easy, since there's only one "propositional reduction rule" to give. | |||||
```agda | |||||
Unit-Displayed : Unit-Alg → Set₁ | |||||
Unit-Displayed (U , p) = Σ (U → Set) λ UD → UD p | |||||
Unit-Section : (α : Unit-Alg) → Unit-Displayed α → Set | |||||
Unit-Section (U , p) (UD , pD) = | |||||
Σ ((x : U) → UD x) λ US → | |||||
US p ≡ pD | |||||
``` | |||||
Agda once more makes the proofs for writing `Unit-Induction` trivial, so I'll leave both its type and implementation as an exercise for the reader. | |||||
You saw it coming: Vectors | |||||
-------------------------- | |||||
Both of the exceedingly interesting types we looked at above were simple, since they both just exist in a universe, `Set₀`. Now we up the difficulty considerably by adding both _parameters_ and _indices_. Both are arguments to the type itself, but they differ in how they can be used in the constructors. Again, since I am writing about dependent types, I'm obligated by law to mention this example.. fixed-length lists, or vectors. These are _parametrised_ by a type $A$ of elements and _indexed_ by a number $n : \mathbb{N}$, the length. | |||||
```agda | |||||
data Vec (A : Set₀) : Nat → Set₀ where | |||||
nil : Vec A zero | |||||
cons : (n : Nat) → A → Vec A n → Vec A (suc n) | |||||
``` | |||||
Parameters, at least in Agda, are introduced before the `:` in the type signature. Indices come later, but both can be dependent. Parameters have to be the same for every constructor (we only ever mention `Vec A`), but the `Nat`ural can vary between the constructors. One introduces a vector of length zero, and one increments the length of a smaller vector. | |||||
The type of algebras for vectors is a tad more interesting, so let's take a look. Since `Vec` is parametrised by `A`, so is `Vec-Alg`. We don't speak of general "vector algebras", only "vector-of-A algebras", so to speak. Since the natural number is an index, it becomes an argument to our carrier type. Both of the "constructor" data are applied to their indices, and since the `A` is bound as an argument to the algebra function, we can refer to it in the type of `cons` (the third component). | |||||
```agda | |||||
Vec-Alg : Set₀ → Set₁ | |||||
Vec-Alg A = | |||||
Σ (Nat → Set₀) λ Vec → | |||||
Σ (Vec zero) λ nil → | |||||
{- cons : -} (n : Nat) → A → Vec n → Vec (suc n) | |||||
``` | |||||
The type of homomorphisms for `Vec A`-algebras is mechanically derived from the type above by starting from a map `(n : Nat) → V0 n → V1 n` (where `V0`, `V1` are the carriers of the "source" and "target" algebras) and imposing the necessary conditions. | |||||
```agda | |||||
Vec-Morphism : (A : Set₀) → Vec-Alg A → Vec-Alg A → Set₀ | |||||
Vec-Morphism A (V0 , n0 , c0) (V1 , n1 , c1) = | |||||
Σ ((n : Nat) → V0 n → V1 n) λ VM → | |||||
Σ (VM zero n0 ≡ n1) λ _ → | |||||
((n : Nat) (a : A) (xs : V0 n) | |||||
→ VM (suc n) (c0 n a xs) ≡ c1 n a (VM n xs)) | |||||
``` | |||||
Just like in the Nat-algebra morphisms, we have two cases, one of which requires exchanging our map with a constructor—the successor/cons case. It's mostly the same, except for the `A` parameter we have to thread everywhere, and all the indices we have to respect. And, just like back then, assuming funext, we have an initial `Vec A`-algebra given by `(Vec A, nil, cons)`. The proof is full of fiddly details[^1], but it's again justified by the setoid model, with only `funext` needing to be postulated. | |||||
```agda | |||||
Vec-initiality : (A : Set) (α : Vec-Alg A) | |||||
→ isContr (Vec-Morphism A (Vec A , nil , cons) α) | |||||
Vec-initiality A (V0 , n0 , c0) = (map , resp) , contract where | |||||
map : (n : Nat) → Vec A n → V0 n | |||||
map zero nil = n0 | |||||
map (suc n) (cons .n a xs) = c0 n a (map n xs) | |||||
resp = refl , λ n a x → refl | |||||
``` | |||||
Above we provide the map from `Vec A n` to the algebra `α`'s carrier, `V0`, and prove that it `resp`{.kw}ects the equalities imposed by `Vec A`-homomorphisms. Since Agda has pattern matching built in, these equalities are trivial (`refl`{.agda}). Our proof continues upside-down: The interesting thing here is the `map'≡map` equality. Skip the noise: keep reading after the code. | |||||
```{.agda .continues} | |||||
contract : (other : Vec-Morphism A (Vec A , nil , cons) | |||||
(V0 , n0 , c0)) | |||||
→ other ≡ (map , refl , λ n a xs → refl) | |||||
contract (map' , nM , cM) | |||||
= pairPath | |||||
map'≡map | |||||
(pairPath | |||||
(UIP _ _) | |||||
(funext3 _ _ (λ n a xs → UIP _ _))) | |||||
where | |||||
map'~map : (n : Nat) (xs : Vec A n) → map' n xs ≡ map n xs | |||||
map'~map .zero nil = nM | |||||
map'~map .(suc n) (cons n x xs) = | |||||
subst | |||||
(λ e → map' (suc n) (cons n x xs) ≡ c0 n x e) | |||||
(map'~map n xs) | |||||
(cM n x xs) | |||||
map'≡map = funext2 _ _ map'~map | |||||
``` | |||||
We build the `map'≡map`{.va} equality by an appeal to `funext2`{.va}, a helper defined in terms of our `funext`{.op} postulate to reduce the noise a tad. Again, I want to stress that this is a failure of Agda: There is a family of semantic models of type theory in which equality in function types is pointwise _by definition_, including the setoid model, which would be very convenient here since we could keep our appeals to `UIP`{.op}. Anything that validates `funext`{.op} would do, though, including Cubical Agda. | |||||
Other than that, the proof is standard: Everything we need is given by the `nM`{.va} and `cM`{.va} equalities of the `other`{.va} morphism we're contracting. In the inductive step, the case for `(cons n x xs)`, we have a path `cM n x xs : map' (suc n) (cons n x xs) ≡ c0 n x (map' n xs)`. Since we want the `map' n xs` on the right-hand side to be `map n xs`, we `subst`{.op} it away using the path `map'~map n xs : map' n xs ≡ map n xs` obtained by a recursive application of `map'~map`{.fn}. | |||||
Now we turn to dependent elimination of vectors, or induction. For this we need to calculate the type of displayed algebras over a given `Vec A`-algebra. It's mechanical: The `V` of our algebra gets upgraded to a family of predicates `P : (n : Nat) → V n → Set₀`. The "constructor data" given by `n , c` become "proof data" of a type extending the `Vec A`-algebra. | |||||
```agda | |||||
Vec-Displayed : (A : Set₀) → Vec-Alg A → Set₁ | |||||
Vec-Displayed A (V , z , c) = | |||||
Σ ((n : Nat) → V n → Set₀) λ P → | |||||
Σ (P zero z) λ nil → | |||||
{- cons -} (n : Nat) (x : A) (tail : V n) | |||||
→ P n tail → P (suc n) (c n x tail) | |||||
``` | |||||
In the `cons` case, the algebra has access both to the `tail` of type `V n` _and_ the inductive assumption `P n tail`. This represents, operationally, a choice between recurring or not. | |||||
You know the drill by now: after displayed algebras, algebra sections. Assume an algebra `α = (V, nil, cons)` and a displayed algebra `(P, nD, cD)` over `α`. The first component will be a dependent function of type `(n : Nat) (x : V n) → P n x`, and the second and third components will be propositional equalities representing reduction rules. Since the vectors and natural numbers are similar in structure, we'll end up with similar-looking reduction rules, just noisier. Here they are: | |||||
```agda | |||||
Vec-Section : (A : Set₀) (α : Vec-Alg A) → Vec-Displayed A α → Set | |||||
Vec-Section A (V , n , c) (P , nD , cD) = | |||||
Σ ((n : Nat) (x : V n) → P n x) λ map → | |||||
Σ (map zero n ≡ nD) λ _ → | |||||
( (n : Nat) (x : A) (tl : V n) | |||||
→ map (suc n) (c n x tl) ≡ cD n x tl (map n tl)) | |||||
``` | |||||
They're not pretty, so take a minute to internalise them. Again, we have the dependent `map`{.op}, together with a propositional equality which says `map zero n` evaluates to the `nD` datum of our displayed map. The third component specifies that `map (suc n)` defers to the `cD` component and recurs in the tail. It's not complicated conceptually, but it is complex to write down. Finally, the induction principle for natural numbers says that any displayed algebra over the initial `(Vec A , nil , cons)` has a section. Again, Agda trivialises the equalities: | |||||
```agda | |||||
Vec-Induction : (A : Set₀) | |||||
(α : Vec-Displayed A (Vec A , nil , cons)) | |||||
→ Vec-Section A (Vec A , nil , cons) α | |||||
Vec-Induction A (P , nD , cD) = map , refl , λ n a xs → refl where | |||||
map : (n : Nat) (xs : Vec A n) → P n xs | |||||
map .zero nil = nD | |||||
map .(suc n) (cons n x xs) = cD n x xs (map n xs) | |||||
``` | |||||
I bet you've never seen types like these | |||||
---------------------------------------- | |||||
This is a bet I'm willing to take. Unless you've gone looking for ways to formalise semantics of inductive types before, it's not very likely for you to have come across this algebra machinery before. A far more common way of dealing with inductive data types is dependent pattern matching: | |||||
```agda | |||||
add : Nat → Nat → Nat | |||||
add zero n = n | |||||
add (suc x) n = suc (x + n) | |||||
map : {n : Nat} {A B : Set₀} → (A → B) → Vec A n → Vec B n | |||||
map f nil = nil | |||||
map f (cons x xs) = cons (f x) (map f xs) | |||||
``` | |||||
However, there is a very good reason not to work directly with pattern matching in a formalisation. That reason can be found on page 24 of [this paper] by Cockx et al. Take a minute to recover and we can continue talking. A more tractable presentation of inductive types is directly with induction principles, instead of taking this detour through category theory. For instance, the natural numbers: | |||||
```agda | |||||
Nat-induction : (P : Nat → Set) | |||||
→ P zero | |||||
→ ((n : Nat) → P n → P (suc n)) | |||||
→ (n : Nat) → P n | |||||
Nat-induction P pz ps zero = pz | |||||
Nat-induction P pz ps (suc n) = ps n (Nat-induction P pz ps) | |||||
``` | |||||
It turns out that this presentation, and the one based on algebras I've been going on about, are actually equivalent! Very plainly so. If you unfold the type of `Nat-Induction` (the one about algebras), and do some gratuitous renaming, you get this: | |||||
```agda | |||||
Nat-Induction : | |||||
(M : Σ (Nat → Set) | |||||
λ P → Σ (P zero) | |||||
λ _ → (n : Nat) → P n → P (suc n) | |||||
) | |||||
→ Σ ((n : Nat) → M ₁ n) | |||||
λ Nat-ind → Σ (Nat-ind zero ≡ M ₂ ₁) | |||||
λ _ → (n : Nat) → Nat-ind (suc n) = M ₂ ₂ n (Nat-ind n) | |||||
``` | |||||
It's ugly, especially with all the subscript projections, but if we apply some good ol' currying we can recover this type, by transforming (the dependent analogue of) `A × B × C → D` with `A → B → C → D`: | |||||
```agda | |||||
Nat-Induction : (P : Nat → Set) | |||||
(pzero : P zero) | |||||
(psuc : (n : Nat) → P n → P (suc n)) | |||||
→ Σ ((n : Nat) → P n) | |||||
λ ind → Σ (ind zero ≡ pzero) | |||||
λ _ → (n : Nat) → ind (suc n) ≡ psuc n (ind n) | |||||
``` | |||||
I've gone and done some more renaming. `P` is a name for `M ₁`, `pzero` is `M ₂ ₁`, and `psuc` is `M ₂ ₂`. Again, just to emphasise what the contents of "a displayed algebra section over the initial algebra" turn out to be: | |||||
- The first component is **a function from the data type to a predicate**, | |||||
- The $n$ following components are **propositional equalities representing reduction rules**, for each constructor, recurring as appropriate. | |||||
If you'll allow me one of _those_ asides, this is just incredibly cool to me. The fact we can start from an inductive signature and derive not only the recursor, but from that derive the motives and methods of induction, and from _that_ derive exactly the behaviour of the induced map for a particular bundle of motives and methods? It feels.. right. Like this is what induction was meant to be. Not some needlessly formal underpinning for pattern matching, like I used to think of it as, but as distinguished, rich mathematical objects determined exactly by their constructors. | |||||
Complicating it further: Induction-induction | |||||
-------------------------------------------- | |||||
We've seen, in the past 2 thousand-something words, how to build algebras, algebra homomorphisms, displayed algebras and algebra sections for inductive types, both indexed and not. Now, we'll complicate it further: Induction-induction allows the formation of two inductive families of types $A : \mathrm{Set}$ and $B : A \to \mathrm{Set}$ _together_, such that the constructors of A can refer to those of B and vice-versa. | |||||
The classic example is defining a type together with an inductively-defined predicate on it, for instance defining sorted lists together with a "less than all elements" predicate in one big induction. However, in the interest of brevity, I'll consider a simpler example: A syntax for contexts and Π-types in type theory. We have one base type, `ι`, which is valid in any context, and a type for dependent products `Π`. Its type expresses the rule for $\prod$-introduction, which says that given $\Gamma \vdash \sigma\ \mathrm{type}$ and $\Gamma, \sigma \vdash \tau\ \mathrm{type}$, then $\Gamma \vdash (\prod(\sigma) \tau)\ \mathrm{type}$. | |||||
```agda | |||||
data Ctx : Set | |||||
data Ty : Ctx → Set | |||||
data Ctx where | |||||
stop : Ctx | |||||
pop : (Γ : Ctx) → Ty Γ → Ctx | |||||
data Ty where | |||||
ι : {Γ : Ctx} → Ty Γ | |||||
Π : {Γ : Ctx} (σ : Ty Γ) (τ : Ty (pop Γ σ)) → Ty Γ | |||||
``` | |||||
Here I'm using Agda's forward-declaration feature to specify the signatures of `Ctx` and `Ty` before their constructors. This is because the constructors of `Ctx` mention the type `Ty`, and the type _of_ `Ty` mentions `Ctx`, so there's no good way to untangle them. | |||||
When talking about our standard set of categorical gizmos for inspecting values of inductive type, it's important to note that, just like we can't untangle the definitions of `Ctx` and `Ty`, we can't untangle their algebras either. Instead of speaking of `Ctx`-algebras or `Ty`-algebras, we can only talk of `Ctx-Ty`-algebras.[^2] Let's think through the algebras first, since the rest are derived from that. | |||||
For our `Nat`-algebras, we had as a carrier an element of `Set₀`. For the `Vec`-algebras, we needed a carrier which was an element of `Nat → Set₀`. Now, since we have two types, one of which is indexed, we can't describe their carriers in isolation: We have a _telescope_ of carriers $Σ (\mathrm{Ty}^A : \mathrm{Set}_0) Σ (\mathrm{Ctx}^A : \mathrm{Ty}^A \to \mathrm{Set}_0)$ over which the type of the methods is quantified. Let's go through them in order. | |||||
- `stop : Ctx` gives rise to a method $\mathrm{stop}^A : \mathrm{Ctx}^A$, | |||||
- `pop : (Γ : Ctx) → Ty Γ → Ctx` generates a method $\mathrm{pop}^A : (Γ^A : \mathrm{Ctx}^A) → \mathrm{Ty}^A\ Γ^A → \mathrm{Ctx}^A$ | |||||
- `ι : {Γ : Ctx} → Ty Γ` leads us to jot down a method $\iota^A : \{Γ^A : \mathrm{Ctx}^A\} → \mathrm{Ty}^A\ \Gamma^A$ | |||||
- `Π : {Γ : Ctx} (σ : Ty Γ) (τ : Ty (pop Γ σ)) → Ty Γ` finally gives us a method with the hell-of-a-type | |||||
$\Pi^A : \{\Gamma^A : \mathrm{Ctx}^A\} (\sigma^A : \mathrm{Ty}^A\ \Gamma) (\tau : \mathrm{Ty}^A (\mathrm{pop}^A\ \Gamma^A\ \sigma^A) → \mathrm{Ty}^A\ \Gamma^A)$ | |||||
Let's go back to Agda-land, where I will write exactly the same thing but with syntax somehow even less convenient than LaTeX. Just as a reminder, `Σ A λ x → B` is how I've been writing $\sum(x : A) B$. | |||||
```agda | |||||
Ctx-Ty-Alg : Set₁ | |||||
Ctx-Ty-Alg = | |||||
Σ Set₀ λ Ctx → | |||||
Σ (Ctx → Set₀) λ Ty → | |||||
Σ Ctx λ stop → | |||||
Σ ((Γ : Ctx) → Ty Γ → Ctx) λ pop → | |||||
Σ ({Γ : Ctx} → Ty Γ) λ ι → | |||||
({Γ : Ctx} (σ : Ty Γ) (τ : Ty (pop Γ σ)) → Ty Γ) | |||||
``` | |||||
The type of homomorphisms for these algebras are the same as the ones we've seen before, except for the fact they're way more complicated. Both of the carrier components still become functions, and all of the method components still become equalities which must be respected, except now there's more of them. | |||||
```{.agda tag=Suffering} | |||||
Ctx-Ty-Morphism : Ctx-Ty-Alg → Ctx-Ty-Alg → Set₀ | |||||
Ctx-Ty-Morphism | |||||
(C0 , T0 , s0 , p0 , i0 , f0) | |||||
(C1 , T1 , s1 , p1 , i1 , f1) | |||||
= | |||||
Σ (C0 → C1) λ Ctx → | |||||
Σ ((x : C0) → T0 x → T1 (Ctx x)) λ Ty → | |||||
-- Constructor data for Ctx | |||||
Σ (Ctx s0 ≡ s1) λ stop → | |||||
Σ ( (x : C0) (ty : T0 x) | |||||
→ Ctx (p0 x ty) ≡ p1 (Ctx x) (Ty x ty)) | |||||
λ pop → | |||||
-- Constructor data for Ty | |||||
Σ ({Γ : C0} → Ty Γ (i0 {Γ}) ≡ i1 {Ctx Γ}) λ iota → | |||||
( {Γ : C0} (σ : T0 Γ) (τ : T0 (p0 Γ σ)) | |||||
→ Ty Γ (f0 {Γ} σ τ) | |||||
≡ f1 (Ty Γ σ) (subst T1 (pop Γ σ) (Ty (p0 Γ σ) τ))) | |||||
``` | |||||
Ok, I lied. There's more than just the usual "more stuff" complication. In the case for `Π` (nameless, the last component), we need to compare `f0 σ τ` with `f1 (Ty Γ σ) (Ty (p0 Γ σ) τ)`. Read past the symbols: We need to make sure that the `Ty` operation maps `Π` types to `Π` types. But, there's a catch! `Ty (p0 Γ σ) τ)` has type `T1 (Ctx (p0 Γ σ))` (i.e., assemble the context in `C0` with `p0`, then translate to `C1` with `T1`), but `f1` wants something in `T1 (p1 (Ctx Γ) (Ty Γ σ))` (assemble the context in `C1` with translated components). | |||||
We know these types are equal though, the equation `pop` says _just that_! Agda isn't that clever, though (thankfully), so we have to manually cast along the path `pop Γ σ` to make these types line up. Since we have so many equations, we definitely would've ended up with at least one dependent path.[^3] | |||||
I'll pass on proving strict initiality in the interest of brevity, settling for the weak variant. Recall that initiality means we have a distinguished `Ctx-Ty`-algebra, given by the type formers and constructors, from which we can make a morphism to any other algebra. In types: | |||||
```agda | |||||
Ctx-Ty-Initial : (M : Ctx-Ty-Alg) → Ctx-Ty-Morphism Ctx-Ty* M | |||||
Ctx-Ty-Initial (C0 , T0 , s0 , p0 , i0 , f0) | |||||
= C , T , refl , (λ x ty → refl) , refl , λ σ τ → refl | |||||
where | |||||
C : Ctx → C0 | |||||
T : (x : Ctx) → Ty x → T0 (C x) | |||||
C stop = s0 | |||||
C (pop c t) = p0 (C c) (T c t) | |||||
T γ ι = i0 {C γ} | |||||
T γ (Π σ τ) = f0 (T γ σ) (T (pop γ σ) τ) | |||||
``` | |||||
I want to make clear that, while this looks like a bloody mess, it was incredibly simple to implement. See the equation `T γ (Π σ τ) = ...`? I didn't write that. I didn't _need_ to. Check out the type of the last component of `Ctx-Ty-Morphism Ctx-Ty* M`. It's literally just saying what the right-hand-side of `T γ (Π σ τ)` needs to be for `(C, T)` to be the morphism part of a **homo**morphism. And the homo (ahem) part is trivial! It's all `refl`s again (are you noticing a trend?). | |||||
Elevating it: Higher Inductive Types | |||||
------------------------------------ | |||||
The algebra approach isn't limited to the `Set`-level mathematics we've been doing up to now, and it scales very simply to types with path constructors in addition to point constructors. For this part, we unfortunately can't _implement_ any of the proofs in standard Agda, especially Agda `--with-K` that we've been using up to now, but we can still talk about their types. Let's start with the second simplest higher inductive type: Not the circle, but the interval. | |||||
The interval has two endpoints, `l` and `r`, and a line `seg`{.op}ment between them. It's a contractible type, so it's equivalent to the Unit type we've seen before, but it's.. more. The existence of the interval type implies funext, and we'll show that using algebras. | |||||
```agda | |||||
data I : Type₀ where | |||||
l r : I | |||||
seg : i0 ≡ i1 | |||||
``` | |||||
The type of algebras for the interval is also simple. We have a carrier, $I^A$, two points $l^A : I^A$ and $r^A : I^A$, and an _equality_ $seg^A : l^A \equiv r^A$. Wait, an equality? Yup! Since the enpoinds `l` and `r` are equal, so must be whatever they map to. But, since we're in HoTT-land now, this equality doesn't necessarily need to be trivial, as we'll see in a second. | |||||
```agda | |||||
I-Alg : Type₁ | |||||
I-Alg = Σ Type₀ λ I → Σ I λ l → Σ I λ r → l ≡ r | |||||
``` | |||||
The type of algebra homomorphisms, like before, encompasses a map together with some propositional reductions of that map. We have three "standard" components `I`, `l`, `r`, which are just like for the type of booleans, and a mystery fourth component which expresses the right coherence condition. | |||||
```agda | |||||
I-Alg-Morphism : I-Alg → I-Alg → Type | |||||
I-Alg-Morphism (I0 , l0 , r0 , seg0) (I1 , l1 , r1 , seg1) = | |||||
Σ (I0 → I1) λ I → | |||||
Σ (I l0 ≡ l1) λ l → | |||||
Σ (I r0 ≡ r1) λ r → | |||||
``` | |||||
Now the type of the fourth component is interesting. We want `seg0` and `seg1` to be "equal" in some appropriate sense of equality. But they have totally different types! `seg0 : l0 ≡I0 r0` and `seg1 : l1 ≡I1 r1`. It's easy enough to fix the type of `seg0` to be a path in `I1`: Since `I : I0 → I1`, its action on paths `ap I seg0` is a path in `I1` between `I l0` and `I r0`. Out of the pan, into the fire, though. Now the endpoints don't match up! | |||||
Can we fix `I l0` and `I r0` in the endpoints of `seg0`? The answer, it turns out, is yes. We can use path _transitivity_ to alter both endpoints. `r` correctly lets us go from `I r0` to `r1`, but `l` is wrong—it takes `l1` to `I r0`. We want that backwards, so we apply _symmetry_ here. | |||||
```agda | |||||
(r ∘ (ap I seg0 ∘ sym l)) ≡ seg1 | |||||
``` | |||||
Now, I'll reproduce an argument from the HoTT book, which proves function extensionality follows from the interval type and its eliminator. But we'll do it with algebras. First, we need to postulate (since Agda is weak), that there exists a weakly initial `I`-algebra. This'll play the part of our data type. In reality, there's a couple more details, but we can ignore those, right? | |||||
<details> | |||||
<summary>We can't.</summary> | |||||
Instead of just full-on postulating the existence of an `I`-algebra and its initiality, I'll define the type `I` (lacking its `seg`) normally, postulate `seg`, then postulate the coherence condition in the definition of `I-Recursion`. This simplifies some proofs since it means `I-Recursion` will compute definitionally on point constructors. | |||||
```agda | |||||
data I : Type where | |||||
l r : I | |||||
postulate | |||||
seg : l ≡ r | |||||
I* : I-Alg | |||||
I* = (I , l , r , seg) | |||||
I-Recursion : (α : I-Alg) → I-Alg-Morphism I* α | |||||
I-Recursion (I0 , l0 , r0 , seg0) = go , refl , refl , subst (λ e → e ≡ seg0) (∘refl (ap go seg)) sorry where | |||||
go : I → I0 | |||||
go l = l0 | |||||
go r = r0 | |||||
postulate | |||||
sorry : ap go seg ≡ seg0 | |||||
``` | |||||
There's a bit of ugly equality-wrangling still, but it's fine. All that matters is that it type checks, I suppose. | |||||
</details> | |||||
Now, the argument. We want a function that, given $f, g : A \to B$ and a homotopy $p : \prod(x : A) \to f(x) \equiv g(x)$, produces a path $f \equiv g$. We define a family of functions $h_x : I \to B$ given a $x : A$, which maps `l` to `f(x)`, `r` to `g(x)`, and in the "`seg` case", returns `p(x)`. The algebra is simple: | |||||
```agda | |||||
I→funext : {A B : Type} (f g : A → B) (hom : (x : A) → f x ≡ g x) → f ≡ g | |||||
I→funext {A} {B} f g p = ? where | |||||
h : A → I → B | |||||
h x = (I-Recursion (B , f x , g x , p x)) ₁ | |||||
``` | |||||
And for my next trick, I'll define the function `h' = flip h`. Or, in Agda terms, `h' = λ i x → h x i`, a function from `I → A → B`. We have a path between the two endpoints of the segment, so we also have a path between the two "endpoinds" of this function, which we can calculate: | |||||
```{.agda tag="Mmm, substitution"} | |||||
ap h' seg : (λ i x → h x i) l ≡ (λ i x → h x i) r -- type of ap | |||||
= (λ x → h x l) ≡ (λ x → h x r) -- β-reduce | |||||
= (λ x → f x) ≡ (λ x → g x) -- computation for h | |||||
= f ≡ g -- η-reduce | |||||
``` | |||||
This concludes our proof: | |||||
```agda | |||||
I→funext : {A B : Type} (f g : A → B) (hom : (x : A) → f x ≡ g x) → f ≡ g | |||||
I→funext {A} {B} f g p = ap h' seg where | |||||
h : A → I → B | |||||
h x = (I-Recursion (B , f x , g x , p x)) ₁ | |||||
h' : I → A → B | |||||
h' i x = h x i | |||||
``` | |||||
This concludes our post | |||||
----------------------- | |||||
God, this was a long one. Four thousand words! And a bit more, with this conclusion. | |||||
I've wanted to write about inductive types for a hecking long time now, ever since the day I finished the equality post. However, coming by motivations in the past year has been.. hard, for everyone. So it's good that I finally managed to it out there! _77 days later_. God. I might write some more type theory in the future, but don't hold your breath! 77 days is like, 36 thousand times more than you could do that for. | |||||
I ended up writing about algebras instead of eliminators or pattern matching because they seamlessly scale up even to higher induction-induction, which ends up having super complicated eliminators, far too much for me to derive by hand (especially early in the AMs, which is when most of this post was written). | |||||
<div class="special-thanks"> | |||||
With special thanks to the proofreader: | |||||
* [My friend Jonathan](https://squiddev.cc) | |||||
</div> | |||||
[this paper]: https://dl.acm.org/doi/10.1145/3236770 | |||||
[^1]: And, full disclosure, took me almost an hour to write at 2 AM... | |||||
[^2]: The best part of this presentation is that it makes painfully clear the infernal way that everything in type theory depends on everything else. | |||||
[^3]: Please forgive my HoTT accent. |
@ -0,0 +1,607 @@ | |||||
--- | |||||
title: Cubical Type Theory | |||||
date: March 7th, 2021 | |||||
--- | |||||
Hello, everyone! It's been a while, hasn't it? Somehow, after every post, I manage to convince myself that I'm gonna be better and not let a whole season go by between posts, but it never happens. For the last two posts I've been going on at length about fancy type theories, and this post, as the title implies, is no exception. In fact, two posts ago I mentioned, offhand, cubical type theory as a possibility for realising HoTT in a constructive way, but 128 days ago I did not understand cubical type theory in the slightest. | |||||
Now, however, I do! I still don't know what the hell the word "fibration" is supposed to mean, or indeed "fibrant", but we can gloss over that and present cubical type theory with as little category-theoretical jargon as possible. In fact, I have a mostly[^1]-complete implementation of cubical type theory for us to use as a discussion framework. | |||||
[^1]: At the time of writing, in the very early AM between Saturday and Sunday, the only thing missing is the implementation of composition for higher inductive types. However, this is mostly because I'm hella bored of writing code and wanted to write words instead. This way I can have more fun! | |||||
As mentioned in [Reflections on Equality], the main idea of Cubical Type Theory is the type of paths, so let's talk about that at length. | |||||
[Reflections on Equality]: /posts/reflections-on-equality.html | |||||
Paths | |||||
----- | |||||
Even in boring old Martin-Löf type theory, as soon as we have equalities and induction, we can prove a very interesting theorem: Every function preserves paths. This is actually a simplification of a more fundamental fact in MLTT, its groupoid structure, in which funct*ions* are interpreted as funct*ors*. Like a category-theoretical functor has an action on objects and an action on morphisms, a type-theoretical function has an action on values and an **a**ction on **p**aths. | |||||
Using path induction, we can prove it (roughly) like this. Suppose (given a $f : A \to B$), there is a path $p : x \equiv_A y$. By induction, we may assume $y$ is $x$ and $p$ is $\mathrm{refl}$, in which case what we need to prove is $f(x) \equiv_{B} f(x)$. But this is what $\mathrm{refl}$ states. This isn't a complicated proof, because it's not a complicated theorem: the images of equal elements are equal, big deal. | |||||
This is where things get a little mind-bending. What would happen if we had a type with "two" values, with a path between them? The values of the function at either end could be different, but they would still be... equal. This is the main idea of cubical type theory: We add an interval type, $\mathbb{I}$, which denotes the interval _object_ $[0,1]$ in our model. Then we can drop the inductive definition of equalities as generated by $\mathrm{refl}$ and simply _define_ equalities in $A$ as functions $\mathbb{I} \to A$. | |||||
Let's not get ahead of ourselves, though, and talk a bit more about the interval type. It has two elements, $i0$ and $i1$, but it's not isomorphic to the type of booleans: Internally to the type theory, we have no way of distinguishing $i0$ and $i1$, since every function must be continuous. | |||||
Since it denotes $[0,1]$, we can define a lattice operations on elements of the interval, enough to equip it with the structure of a _De Morgan algebra_, but not a boolean algebra. We have meets, $a \land b$, the logical operation of "and", interpreted as $\min(a,b)$; Joins, $a \lor b$, the logical operation of "or", interpreted as $\max(a,b)$; And an involution, $\neg a$, which denotes the algebraic operation $1 - a$.[^2] | |||||
[^2]: If you, like me, are always confused by why $a \land b$ is min and $a \lor b$ is max, check out these Desmos links: [min] and [max]. Keep these in mind the next time you're confused :) | |||||
[min]: https://www.desmos.com/calculator/8qkr6deosy | |||||
[max]: https://www.desmos.com/calculator/gohdyaehna | |||||
These operations follow the usual laws of Boolean logic save for two: In general, $min(x, 1 - x)$ is not $0$ and $max(x, 1 - x)$ is not $1$, only for the endpoints. While internally to the type theory we have no element representing "half", since the object $\mathbb{I}$ denotes _does_ have these filler points, we can't in general expect those equations to hold. Hence, De Morgan algebra, not Boolean. | |||||
Another thing to keep in mind is that, while the interval is an expression which other expressions have as a type (namely, $\Gamma \vdash i0 : \mathbb{I}$ and $\Gamma \vdash i1 : \mathbb{I}$), we do not call it a type. We reserve the word type for objects with more structure (which we will discuss later). For now, it's enough to think of the interval as a "pre"type, something which is almost, but not quite, a type. Cubical type theory has plenty of these pretypes so we include a separate universe $\mathscr{U}_{\omega}$ to classify them. | |||||
Now that we're familiar with the interval, we can discuss the actual title of this section, paths. We define the type of paths in $A$ as a refinement of the function space $f : \mathbb{I} \to A$, where the values of $f(i0)$ and $f(i1)$ are indicated in the type. Hence the formation rule, on the left: | |||||
<div class="mathpar"> | |||||
$$\frac{\Gamma, i : \mathbb{I} \vdash e : A}{\Gamma \vdash \lambda i. e : \mathrm{Path}\ A\ e[i0/i]\ e[i1/i]}$$ | |||||
$$\frac{\Gamma \vdash p : \mathrm{Path}\ A\ x\ y\quad \Gamma \vdash i : \mathbb{I}}{\Gamma \vdash p(i) : A}$$ | |||||
</div> | |||||
On the right is the elimination rule, which says that if we have an element of the interval we can project the value the path takes at that point. Alternatively we could represent paths by the type with an inclusion $\mathrm{inP} : \prod{(f : \mathbb{I} \to A)} \to \mathrm{Path}\ A\ f(i0)\ f(i1)$ and projection $\mathrm{outP} : \mathrm{Path}\ A\ x\ y \to \mathbb{I} \to A$. Furthermore, we impose a pair of "regularity" equations, which state that $p(i0) = x$ and $p(i1) = y$ for paths $p : \mathrm{Path}\ A\ x\ y$. | |||||
One important difference between functions out of the interval and paths is that, while the former would be put in the universe $\mathscr{U}_{\omega}$ by virtue of its domain being a pretype, paths *do* have the required additional structure to be in the universe $\mathscr{U}$ of "proper types", as long as the type $A$ of the endpoints does. | |||||
Using the algebraic structure of the interval we can define some operations on paths, which we may represent diagramatically. For simplicity, paths will be drawn as direct lines between their endpoints, and the type will be left to be inferred from the context; A path whose bound variable is $i$ will be drawn in the left-to-right direction, and a path whose bound variable is $j$ will be drawn in the upwards direction. | |||||
Since bound interval variables _are_ variables, they have all the same structural rules as normal variables! In particular, weakening lets us drop an interval variable to have a constant path. This is a proof of reflexivity, which we diagram as follows: | |||||
<figure> | |||||
<img src="/diagrams/ctt/refl_tt.svg" alt="reflexivity" style="min-width: 250px;" /> | |||||
<figcaption>The reflexivity path for $a$ is represented by a constant path.</figcaption> | |||||
</figure> | |||||
Given a path $p$ with endpoints $x$ and $y$ (concisely written as $p : x \equiv y$) we compute its inversion, $sym(p) : y \equiv x$ by "precomposition" with the interval involution: | |||||
<figure> | |||||
<img src="/diagrams/ctt/pi_vs_pnoti_1.svg" alt="p from a to b" style="min-width: 250px;" /> | |||||
<img src="/diagrams/ctt/pi_vs_pnoti_2.svg" alt="the inverse of p from b to a" style="min-width: 250px;" /> | |||||
<figcaption>By inverting the interval argument, we can invert paths.</figcaption> | |||||
</figure> | |||||
The meet and join operations on the interval let us define two kinds of squares called _connections_, which let us concisely turn a one-dimensional _path_ into a two-dimensional _square_, which gives us paths between paths (paths in the second dimension). The connection generated by $i \land j$ is going to be especially helpful in a bit, when we prove that singletons are contractible, and hence that paths are a suitable definition of equality. | |||||
<div class="mathpar"> | |||||
<figure style="width: 50%;"> | |||||
<img src="/diagrams/ctt/land_connection.svg" alt="and connection" style="min-width: 250px;" /> | |||||
<figcaption>The square generated by $\lambda i\ j. p(i \land j)$</figcaption> | |||||
</figure> | |||||
<figure style="width: 50%;"> | |||||
<img src="/diagrams/ctt/lor_connection.svg" alt="or connection" style="min-width: 250px;" /> | |||||
<figcaption>The square generated by $\lambda i\ j. p(i \lor j)$</figcaption> | |||||
</figure> | |||||
</div> | |||||
Let's walk through the construction of the left square, keeping in mind that $i$ goes right and $j$ goes up. Since the top and bottom faces vary in the $i$ direction but not the $j$ direction, they'll all have a prefixed $\lambda i$; The left and right faces just correspond to applying the outermost lambda inside the square. For the faces, we have: | |||||
- Left: $(\lambda i\ j. p(i \land j))\ i0$, which reduces to $\lambda j. p(i0)$, is the constant path at $a$; | |||||
- Top: $\lambda i. (\lambda i\ j. p(i \land j))\ i\ i1$, which reduces to $\lambda i. p(i)$, is the path $p$; | |||||
- Bottom: $\lambda i. (\lambda i\ j. p(i \land j))\ i\ i0$, which reduces to $\lambda i. p(i0)$. Again, $\mathrm{refl}_a$. | |||||
- Right: $(\lambda i\ j. p(i \land j))\ i1$, which reduces to $\lambda j. p(j)$--- you guessed it, it's $p$; | |||||
You can see that in either the $i$ or $j$ direction the inside of this square connects the path $p$ with the constant path at its left endpoint. This is exactly what we need for the following proof that singletons are contractible: | |||||
``` | |||||
singContr : {A : Type} {a : A} -> isContr (Singl A a) | |||||
singContr {A} {a} = ((a, \i -> a), \y i -> (y.2 i, \j -> y.2 (iand i j))) | |||||
``` | |||||
This proof is written syntactically, in the language of [cubical](https://git.amelia.how/amelia/cubical). This proof appears on [line 114] of the massive source file which has everything I've tried to prove with this so far. What's a module system? The actual proof file has some preliminaries which would be interesting if you care about how cubical type theory is actually implemented. | |||||
Another operation on equalities which is very hard in MLTT, but trivial with cubes, is function extensionality. You can see why this would be simple if you consider that a pointwise equality between functions would be an element of $A \to \mathbb{I} \to B$, while an equality between functions themselves is an element of $\mathbb{I} \to A \to B$. By simply swapping the binders, we get the naive function extensionality. | |||||
The proof of full function extensionality as per the HoTT book is also very simple, but it requires quite a bit more infrastructure to talk about; For now, rather than saying `happly` (line 667) is an equivalence, we can simply say that `happly` has `funext` as right and left inverses, and the proof is trivial in both directions ([line 675]). | |||||
With the infrastructure so far we can't prove a whole lot, though. For instance, we have prove that singletons are contractible, but this doesn't freely get us axiom J; Neither can we prove that every property respects equalities, or anything like that. For that sort of proof, we need to introduce a transport operation, which, given the left endpoint of a path of types, returns the right endpoint. However, cubical type theory refuses to be simple. | |||||
Quick sidenote, path application corresponds to the eliminator for $\mathbb{I}$, since it conceptually has the type in the box below. We use here the type of _dependent paths_, PathP. | |||||
``` | |||||
iElim : {A : I -> Type} {x : A i0} {y : A i1} -> PathP A x y | |||||
-> (i : I) -> A i | |||||
iElim p i = p i | |||||
``` | |||||
Simplicity is disallowed | |||||
------------------------ | |||||
While providing a primitive $\mathrm{transp} : \prod{(A : \mathbb{I}) \to \mathscr{U}} \to A(i0) \to A(i1)$ might seem like all we need to make paths a sensible notion of equality, reality is not that simple. In particular, transport on paths is hard to define with such an operation, so, as is tradition in type theory, we make things simpler by making them more general. Rather than providing a primitive transport, we provide a primitive _composition_ operation, which generalises transport and composition of paths. | |||||
Composition expresses the funny-sounding principle that "every open box has a lid". No, that is not a joke; That's actually what we're talking about. A description in (almost!) English would be to say that composition, given a shape, a partial cube of that shape, and a face (which must agree with the partial cube), returns the opposite face. If you think that description is nonsensical, strap in, because interpreting it type-theoretically requires another 67 lines of definitions in the code! For reference, the almost 2000 words which precede this paragraph covered roughly 20 lines of actual code. | |||||
Crying over what I still have to write won't help me get this blog post out any sooner though, so let's get to it. | |||||
### Cofibrations | |||||
Again that god damn word. In addition to the interval object, to define a cubical model of type theory, we need a notion of _cofibration_, which is a fancy way of saying "shape of a partial cube". In the papers which introduced cubical type theory, they use a "face lattice", $\mathbb{F}$. However, this turns out to be needlessly complicated, as we can get this structure from the interval object. | |||||
To each element $\phi : \mathbb{I}$ (referred to as a _formula_) we assign a _cofibrant proposition_ $[\phi] : \mathscr{U}_{\omega}$[^3] which is inhabited when $\phi = i1$. In the code, we write `IsOne phi` for $[\phi]$ and it is inhabited by a distinguished element `itIs1 : IsOne i1`. This family of types is *definitionally proof-irrelevant*, which means that any two inhabitants of `IsOne phi` are equal. | |||||
<details> | |||||
<summary>A note on terminology</summary> | |||||
Throughout the rest of this post I'll refer to elements of the interval as either "endpoints" or "formulas" depending on how they're used. These aren't technical terms, and are meant to be just indicative. The convention is roughly that, if $i : \mathbb{I}$ is used as the argument to a path, or to a filler, or it's the bound variable in a composition (or etc), it's called an _endpoint_; If it's used to denote a restriction (i.e., there might reasonably be an element of $[\phi]$ in the context), it's called a _formula_. | |||||
Also I apologise for the garbled terminology (or even ambiguity) when talking about $[\phi]$ vs $\phi$, since both can reasonably be called formulas. | |||||
</details> | |||||
[^3]: As a funny sidenote, the object in a category (if it exists) which corresponds to the type-theoretical universe of propositions is called the _subobject classifier_, written $\Omega$. So $[]$ is a family of maps $\mathbb{I} \to \Omega_{\omega}$. If only we could fit another $\Omega$ in there... | |||||
We can interpret these propositions as being _shapes of partial cubes_. For instance, the proposition $[i \lor \neg i]$ (for $i : \mathbb{I}$) represents a "line" which is defined when $i = i0$ or $i = i1$, but not in the middle; This isn't a line as much as it is a pair of points. | |||||
Thinking back to the "human-readable" description of the composition operation, the proposition $\phi$ specifies the _shape_ of the open box, but not the box itself. | |||||
### Partial Elements | |||||
We call a function $f : [\phi] \to A$ a _partial element_ of $A$, that is, an element of $A$ which is only defined when $[\phi]$ is inhabited. For these we have a special pattern-matching notation, termed a _system_, which is written between brackets. | |||||
``` | |||||
partialBool : (i : I) -> Partial (ior i (inot i)) Bool | |||||
partialBool = \i [ (i = i0) -> false, (i = i1) -> true ] | |||||
``` | |||||
The element `partialBool` above is a boolean with different values when `i = i0` or `i = i1`. However, this does not lead to a contradiction, because to extract the underlying bool we need to apply `partialBool` not only to an element of the interval, but also to an inhabitant of `IsOne (ior i (inot i))`. This is why it's critical that the type checker distinguishes between $i \lor \neg i$ and $i1$! | |||||
As another implementation note, the type `Partial phi A` is a version of `IsOne phi -> A` with a more extensional equality. Two elements of `Partial phi A` are equal when they represent the same subcube, i.e., they take equal values for every assignment of variables which makes `phi = i1`. | |||||
Furthermore, there is a _dependent_ version of `Partial`{.kw}, `PartialP`{.kw}[^4], which allows the type `A` itself to be a partial element of $\mathscr{U}$. This will be used later when we introduce the glueing operation. | |||||
In the composition operation, the partial element with shape $\phi$ specifies the open box itself. | |||||
[^4]: By convention we call the dependent versions of cubical primitives their name suffixed with a big P. `PathP`, `PartialP`, etc. Don't ask me why. | |||||
### Extensibility | |||||
Given a type $A$ and a partial element $u : [\phi] \to A$, we can define the type of elements $a : A$ which _extend_ $u$. These are _total_ elements, in that their existence does not depend on the inhabitation of $[\phi]$ (for any $\phi$). To say they extend $u$ is to say that, given $[\phi]$, we have that $u(\mathtt{1is1}) = a$. In the theory, where we have All the fancy symbols, we write $A[\phi \to u]$ for the type of extensions of $u$, but in the code, where we're limited to boring ASCII, we just write `Sub A phi u`. | |||||
We can make any total element `u : A` into a partial element, with any formula that we want, by ignoring the proof. The constructor `inS` for the `Sub`-types expresses that this partial element agrees with `u` on any `phi` that we choose. | |||||
``` | |||||
inS : {A : Type} {phi : I} (u : A) -> Sub A phi (\x -> u) | |||||
``` | |||||
We also have a projection operation for `Sub` types, which undoes `inS`. Furthermore, `outS {A} {i1} {u} x` computes to `u i1 itIs1`, since `x` agrees with `u` on `phi`. | |||||
``` | |||||
outS : {A : Type} {phi : I} {u : Partial phi A} -> Sub A phi u -> A | |||||
``` | |||||
With the idea of a cubical `Sub`{.kw}type we can express the type of the fourth argument of the composition operation, the "bottom" face of an open box with _agrees with_ (extends!) the partial element specifying the sides. | |||||
## Composition | |||||
As stated before, the composition operation takes as input the description of an open cube with a face removed and computes that missing face. However this is not a helpful definition if we do not yet have intuition for what "cubes with missing faces" look like! So before explaining the computational behaviour of the composition operation (which is... quite something), let me show you some examples. | |||||
Before we get to the examples, for reference, this is the type of the composition operation, written out in syntax: | |||||
``` | |||||
comp : (A : I -> Type) {phi : I} (u : (i : I) -> Partial phi (A i)) | |||||
-> (a0 : Sub (A i0) phi (u i0)) | |||||
-> A i1 | |||||
``` | |||||
A trivial use of composition is one where we take the formula $\phi = i0$, that is, the partial cube specifying the sides is defined _nowhere_. In this case we may illustrate the input face of the composition operation as agreeing with... nothing. | |||||
<figure> | |||||
<img src="/diagrams/ctt/transp.svg" alt="transport, illustrated" style="min-width: 150px;" /> | |||||
<figcaption>The arguments to `comp A {i0} (\k [])`, illustrated.</figcaption> | |||||
</figure> | |||||
That's right, in the case where the formula is always false and the partial cube is empty, the input of the composition operation is just a point `a0 : A i0`, the left endpoint of a path. And by looking at the type of the composition operation, or thinking about its description, you can see where this is going! We give it `a0 : A i0`, and it gives us an element `comp A {i0} (\k []) a0 : A i1`! | |||||
That's right, by ignoring the extra power which the composition operation gives us over boring transport, we get back boring transport. Not too surprising, let's keep going. | |||||
For an example which illustrates composition with a cube, suppose we have three points, $x$, $y$, and $z$, all in some type $A$. Furthermore suppose that we have paths $p : x \equiv y$ and $q : y \equiv z$. By the transitive property of equality, we know there should be a path between $y$ and $z$. Furthermore, we know that transporting along this composite should be equivalent to transporting along $p$ then along $q$. But how can we, using cubical methods, build the composite of $p$ and $q$? | |||||
If you guessed the answer was "using composition", you... don't get a lot of extra points. It was heavily implied. But you can still have a cookie, since I suppose it can't be helped. To create this composite we need to draw a square with 3 lines, such that the missing line connects $x$ and $z$. Furthermore, the requirement that transporting along the composite transports along both constituent paths will guide us in creating this drawing. We only have two paths, though! | |||||
<figure> | |||||
<img src="/diagrams/ctt/comp_path.svg" alt="composition of paths" style="min-width: 230px;"> | |||||
<figcaption>The lid of this square gives us the composite $q \circ p$ of $p$ and $q$.</figcaption> | |||||
</figure> | |||||
Turns out that only having two paths is not an issue, since we can always take the reflexivity path to get the side we didn't have. To make it clearer, the partial element $u : (j : \mathbb{I}) \to \mathrm{Partial}\ (\neg i \lor i)\ A$ is the tube with sides $a$ and $q(j)$, and the input $p(i) : A$ is the bottom side. These agree because when $j$ (the direction of composition) is $i0$ (the base), $u$ has left endpoint $a$ and right endpoint $b$; A path between these is exactly what $p(i)$ ($i$ is the direction of the path) is. | |||||
``` | |||||
trans : {A : Type} {x : A} {y : A} {z : A} | |||||
-> Path x y | |||||
-> Path y z | |||||
-> Path x z | |||||
trans {A} {x} p q i = | |||||
comp (\i -> A) | |||||
{ior i (inot i)} | |||||
(\j [ (i = i0) -> x, (i = i1) -> q j ]) | |||||
(inS (p i)) | |||||
``` | |||||
This expression is a syntactic representation of the composition drawn above; The dotted line in that diagram is the result of the composition operation. | |||||
## Cubical Complication 2: Computing Compositions | |||||
It doesn't suffice to describe the composition operation in types, we also need to describe how it computes when applied to enough arguments. The composition operation reduces to a canonical element of the type $A(i1)$ based on the structure of the function $A : \mathbb{I} \to \mathscr{U}$, by cases. For example, when $A$ computes to a function type, the composition will evaluate to a lambda expression; When $A$ is a $\sum$-type, it computes to a pair, etc. | |||||
Before we get started, one thing to note is that, since we have the $i \land j$ operation on elements of the interval, the composition operation can compute not only missing faces, but the missing _inside_ of a cube, which we call its filler. For instance, the filler `fill A {i0} (\k []) a0 i` connects `a0` and `comp A {i0} (\k []) a0` in the `i` direction, since it is the 1-dimensional cube (path) between the given and missing faces. | |||||
``` | |||||
fill : (A : I -> Type) {phi : I} | |||||
(u : (i : I) -> Partial phi (A i)) | |||||
(a0 : Sub (A i0) phi (u i0)) | |||||
-> (i : I) -> A i | |||||
fill A {phi} u a0 i = | |||||
comp (\j -> A (iand i j)) | |||||
{ior phi (inot i)} | |||||
(\j [ (phi = i1) as p -> u (iand i j) p, (i = i0) -> outS a0 ]) | |||||
(inS (outS a0)) | |||||
``` | |||||
Fillers will be fundamental in reducing compositions in dependent types, including pairs, functions, and general inductive types. | |||||
### Simple types | |||||
A good place to start is composition for inductive types without parameters, since that is trivial. For instance, any composition in the booleans just evaluates to argument. This is also the case for many other types: the natural numbers, the integers, the rational numbers, etc. | |||||
$$\mathrm{comp}\ (\lambda i. \mathrm{Bool})\ [\phi \to u]\ a0 = a0$$ | |||||
For parametrised types like lists, we need to explain composition by recursion. In the `nil` case it's trivial, we can just return `nil`. In the `cons` case, though, we need to recursively apply composition in the head and the tail, to end up with a list of the right type, agreeing with the right faces. | |||||
$$ | |||||
\mathrm{comp}\ (\lambda i. \mathrm{List}(A))\ [\phi \to \mathtt{cons}\ x\ xs]\ (\mathtt{cons}\ a\ as) =\\ | |||||
\mathtt{cons}\ (\mathrm{comp}\ (\lambda i. A) [\phi \to x]\ a) (\mathrm{comp}\ (\lambda i. \mathrm{List}(A)) [\phi \to xs]\ as)$$ | |||||
### Dependent functions | |||||
Starting with the full reduction rule for composition in functions would be a lot, so I'll build it up incrementally. First, I'll explain transport in simple functions. Then, transport in dependent functions. After I've explained those two we can add back the sides to get the full composition for functions. | |||||
So, consider for starters transport in a line of $A \to B$, where both are functions $\mathbb{I} \to \mathscr{U}$. We're given a function $f : A(i0) \to B(i0)$ and want to compute a function $f : A(i1) \to B(i1)$. Start by introducing a $\lambda$ abstraction binding a single variable $x : A(i1)$, under which we'll work. | |||||
Since to get _any_ sort of element of $B$ we need to apply $f$, we must first transport $x$ to get an element of $A(i0)$, to be the argument of $f$. The line $\lambda i. A(\neg i)$ connects $A(i1)$ and $A(i0)$, so that's what we transport over. Take $x\prime = \mathrm{comp}\ (\lambda i. A (\neg i))\ (\lambda k [])\ x$. | |||||
The application $f\ x\prime$ has type $B(i0)$, and we need to transport that to an element of $B(i1)$. Again we invoke the trivial composition to get $y = \mathrm{comp}\ B\ (\lambda k [])\ (f\ x\prime)$. Since we have computed an element of $B(i1)$, we're done; Define the composition Thus, we can take $\mathrm{comp}\ (\lambda i. A \to B)\ (\lambda k [])\ f = \lambda x. y$. | |||||
To see the details of how composition generalises to dependent functions, consult the [appendix](#appendix), since it's a bit verbose to be here. | |||||
### Dependent pairs | |||||
The composition for pairs is what you'd expect. We have to transport the first element of the pair, and use a filler when transporting the second element to make sure the endpoints line up. Again, the details are in the [appendix](#appendix) if knowing more about composition strikes your fancy, but it's not too necessary to follow the proofs. | |||||
To be concise here, a simple equation that should clarify the behaviour of transport on pairs is the simply-typed definition of transport: | |||||
$$ | |||||
\mathrm{transp}\ (\lambda i. A \times B)\ (x, y) =\\ | |||||
(\mathrm{transp}\ (\lambda i. A)\ x, \mathrm{transp}\ (\lambda i. B)\ y) | |||||
$$ | |||||
### Paths | |||||
In the case of paths, composition is composition. We're given a path $p0 : Path\ A\ u\ v$, where all of $A$, $u$ and $v$ can depend on a variable $i : \mathbb{I}$, which is the direction of composition. Furthermore we have a family of partial paths $p$ with which $p0$ agrees, and with which the result must also agree. | |||||
We start by assuming the existence of a dimension $j : \mathbb{I}$, which will be bound later. When $j = i0$, the resulting composition has to have value $u(i)$, and when $j = i1$, the result must be $v(i)$. Furthermore, when $phi$, the result must have the same value as $p(j)$. We can package these constraints straightforwardly in the partial element $[ \phi \to p(j), (j = i0) \to u, (j = i1) \to v ]$, again abusing notation for the applications of $u(i)$ and $v(i)$. | |||||
$$\mathrm{comp}\ (\lambda i. \mathrm{Path}\ A(i)\ u(i)\ v(i))\ [\phi \to p]\ p0 =\\ | |||||
\lambda j. \mathrm{comp}\ A\ [ \phi \to p(j), (j = i0) \to u, (j = i1) \to v ]\ (p0(j))$$ | |||||
### A note on naming: Pretypes | |||||
All of the types we explained composition for above are, well, types. In cubical type theory, or at least in this presentation, we reserve the word _type_ for those objects which have a composition structure. The ones which _don't_ have a composition structure are called pretypes. | |||||
Alternatively we could call the types for which we have composition the _fibrant_ types, since they have a fibrancy structure, as in the [CHM paper](https://arxiv.org/abs/1802.01170): They have a transport structure and a homogenous composition structure, with which we can assemble a composition structure as above. | |||||
All of the type formers inherited from MLTT ($\prod$ and $\sum$), the path types, and every inductive and higher inductive type made out of types are fibrant, leaving only the cubical primitives (the interval, partial elements, and cubical subtypes) as pretypes. However, we could consider an extension of type theory where both sorts are given equal importance: This would be a two-level type theory, a realisation of Voevodsky's Homotopy Type System. | |||||
## Auxiliary Definitions | |||||
In this section we're going to talk about a handful of operations, which can be defined in terms of what we have so far, which will be used in discussing the $\mathrm{Glue}$ types, which are used in interpreting the univalence axiom. In contrast to [the CCHM paper](https://arxiv.org/abs/1611.02108), I'll only talk about the notions which are mandatory for defining the glueing operation. Composition for glue is very complex, and needlessly detailed for the purposes of this post. | |||||
### Contractible Types | |||||
We define a type $A$ to be _contractible_ if, and only if, there exists an element $x : A$ (called the centre of contraction) to which all other elements $y : A$ are Path-equal. Cubically, we can give an alternative formulation of contractibility: $A$ is contractible iff. every partial element $u : \mathrm{Partial}\ \phi\ A$ is extensible. | |||||
Let $p$ be the proof that $A$ is contractible, a pair containing the centre of contraction and the proof that any element of the type is equal to the centre. We define $\mathrm{contr}\ [\phi \to u] = \mathrm{comp}\ (\lambda i. A)\ [\phi \to (p.2\ u)(i)]\ p.1$. | |||||
Conversely, if we have an extension for any partial element, we can prove that type is contractible in the typical sense: Take the centre of contraction to be $\mathrm{contr}\ []$ and the proof that any $y$ is equal to that is given by extending the partial element $[ (i = i0) \to \mathrm{contr}\ [], (i = i1) \to y]$. | |||||
As an example of contractible types, we have already seen `Singl A a`, the type of "elements of A equal to a". This has a centre at `(a, refl)`, which can be proven by a connection. The unit (or top) type is also contractible, having `tt` as a centre, which can be proven by induction. It can be proven that any contractible type is equivalent to the unit type, making all of them maximally uninteresting. | |||||
### Equivalences | |||||
Since we have the univalence axiom, it is important for soundness that we define a notion of equivalence for which "being an equivalence" is a mere proposition: Either a function is an equivalence, or it isn't. We choose one which is cubically convenient, namely that of "contractible fibers". | |||||
The fiber of a function $f : A \to B$ at a point $y : B$ is a pair of an input $x : A$ together with a proof that $f(x) \equiv y$. We define $f$ to be an equivalence if for every element $y : B$, the fiber $\mathrm{fiber}\ f\ y$ is contractible. That means that, for every element in the range, there is a corresponding element in the domain, and this element is unique. | |||||
Using this notion of equivalence we can prove that every equivalence has an inverse, by taking the first element of the centre of contraction for every fiber: | |||||
``` | |||||
inverse : {A : Type} {B : Type} {f : A -> B} -> isEquiv f -> B -> A | |||||
inverse eqv y = (eqv y) .1 .1 | |||||
``` | |||||
Furthermore, this function is an actual inverse: | |||||
``` | |||||
section : {A : Type} {B : Type} (f : A -> B) (eqv : isEquiv f) | |||||
-> Path (\x -> f (inverse eqv x)) id | |||||
section f eqv i y = (eqv y) .1 .2 i | |||||
``` | |||||
We can also formulate the requirement that a function has contractible fibers cubically: A function is an equivalence iff every one of its partial fibers is extensible. | |||||
## Glueing & Univalence | |||||
Since I like quoting the impenetrable definitions of the paper, glueing expresses that "extensibility is invariant under equivalence". Concretely, though, it's better to think that the $\mathrm{Glue}$ operation "glues" together a partial type $T$ onto a total type $A$ (which we call the base) to get a total type which extends $T$. We can't do this freely, though, so we require an extra datum: A (partial) equivalence between $T$ and $A$. | |||||
``` | |||||
Glue : (A : Type) {phi : I} -> Partial phi ((T : Type) * Equiv T A) -> Type | |||||
``` | |||||
The type $\mathrm{Glue}\ A\ [\phi \to (T, f)]$ extends $T$ in the sense that, when $\phi = i1$, $\mathrm{Glue}\ A\ [\phi \to (T, f)] = T$. | |||||
The "user-friendly" typing rule for Glue is as presented above. Internally we separate the type $T$ from the equivalences $f$ to make defining composition in Glue simpler. These types come with a constructor, $\mathrm{glue}$, which says that, given an inhabitant $t : \mathrm{PartialP}\ \phi\ T$, and a total element $a : A$ which extends the image of $f\ t$ (the equivalence), we can make an inhabitatnt of $\mathrm{Glue}\ A\ [\phi \to (T, f)]$. | |||||
Conversely there is a projection, $\mathrm{unglue}$, which extracts a value of $A$ from a value of $\mathrm{Glue}\ A\ [\phi \to (T, f)]$. When applied to an element constructed with $\mathrm{glue}$, unglueing simply extracts it; When applied to a neutral value, as long as $\phi = i1$, the value of the glued type will be a value of $T$, and the equivalence is defined; We can then apply the equivalence to get a value of $A$. | |||||
Using the boundary conditions for $\mathrm{Glue}$ we can define, from any equivalence $A \simeq B$, a path $A \equiv B$. | |||||
``` | |||||
univalence : {A : Type} {B : Type} -> Equiv A B -> Path A B | |||||
univalence {A} {B} equiv i = | |||||
Glue B (\[ (i = i0) -> (A, equiv), | |||||
(i = i1) -> (B, the B, idEquiv {B}) ]) | |||||
``` | |||||
For the proof that transporting along this path has the effect of applying the equivalence, I'll need to handwave some stuff about the behaviour of transport in $\mathrm{Glue}$. First, we can illustrate the Glue done above as the dotted line in the square below: | |||||
<figure> | |||||
<img src="/diagrams/ctt/univalence.svg" alt="reflexivity" style="min-width: 250px;" /> | |||||
<figcaption>This square represents the glueing used for univalence. The left and right sides are equivalences.</figcaption> | |||||
</figure> | |||||
How would one go about transporting an element across the dotted line there? Well, I have a three-step program, which, since we're talking about squares, has to be rounded up to a neat 4. Suppose we have an element $x : A$ which we want to turn into an inhabitant of $B$. | |||||
- First, we can apply the equivalence $\mathrm{equiv}$ to $x$, getting us an element $\mathrm{equiv}.1\ x : B$. In the ideal world we'd be done here, but, in a more general case, we still have to do the other three filled-in lines. | |||||
- We transport $\mathrm{equiv}.1\ x$ along the path $\lambda i. B$ to get an element $\mathrm{comp}\ (\lambda i. B)\ (\lambda i [])\ (\mathrm{equiv}.1\ x) : B$ | |||||
- Finally we can apply the inverse of the identity equivalence (which is, again, the identity) which does not alter what we've done so far. | |||||
We'd be done here, but since transport is a special case of composition, we need to compose along the line $\lambda i. B$ with the faces of the overall composition to get a _proper_ element of the type $B$. Of course, in this case, the faces are trivial and the system is empty, but we still have to do it. | |||||
To construct a $\mathrm{Path}\ (\mathrm{transp}\ (\lambda i. \mathrm{univalence}\ f\ i))\ f.1$, there is a bit of cubical trickery which needs to be done. This proof is commented in the repository [here], so I recommend you read it there for the details. The short of it is that $\mathrm{univalence}$ plus this path, which we call $\mathrm{univalence}\beta$, implies the full univalence axiom, namely that $(A \simeq B) \simeq (A \equiv B)$. | |||||
### Proofs using univalence | |||||
With univalence, and a proof that isomorphisms give rise to equivalences, we can get to proving some stuff about types! That's exciting, right? I'm excited. The proof that isomorphisms give rise to equivalences is, uh, very complicated, so I won't explain it here. Full disclosure, it seems like this proof is a bit of folklore: I got it from the [cubicaltt repo], and I think the version in [Cubical Agda]'s base library is the same! | |||||
[cubicaltt repo]: https://github.com/mortberg/cubicaltt/blob/a331f1d355c5d2fc608a59c1cbbf016ea09d6deb/experiments/isoToEquiv.ctt#L7-L63 | |||||
[Cubical Agda]: https://github.com/agda/cubical/blob/3fbd0eb908474181606977f2a5f58363fceba1db/Cubical/Foundations/Isomorphism.agda#L55-L101 | |||||
One very simple use of univalence, which doesn't require more fancy types, is proving that the universe $\mathscr{U}$ is not a set, in the sense of HoTT. Recall that a set (or h-set, to be more precise), is a type where any parallel equalities are themselves equal. In a type: | |||||
``` | |||||
isHSet : Type -> Type | |||||
isHSet A = {x : A} {y : A} (p : Path x y) (q : Path x y) -> Path p q | |||||
``` | |||||
We are going to prove that any inhabitant of $\mathrm{isHSet}\ \mathscr{U}$ is baloney. For this, we must define the type of booleans, the discrete space with two points: | |||||
``` | |||||
data Bool : Type where | |||||
true : Bool | |||||
false : Bool | |||||
``` | |||||
First, we can prove that $\mathrm{true} \not\equiv \mathrm{false}$. For this, suppose it were: Given a proof $p : \mathrm{true} \equiv \mathrm{false}$, we can build the path $\lambda i. \mathrm{if}\ p(i)\ \mathrm{then}\ Bool\ \mathrm{else}\ \bot$, which connects $\mathrm{Bool}$ (an arbitrary choice) and $\bot$. Transporting $\mathrm{true}$ (another arbitrary choice) along this path gives us an inhabitant $\mathrm{transp}\ (\lambda i. \dots)\ true : \bot$, which is what we wanted.^[No, there is no reason to use the QED symbol here. It's my blog, though!] <span class="qed">$\blacksquare$</span> | |||||
Define the function $\mathrm{not}\ x = \mathrm{if}\ x\ \mathrm{then}\ \mathrm{false}\ \mathrm{else}\ \mathrm{true}$. By induction, one can prove that $\mathrm{not}\ (\mathrm{not}\ x) \equiv x$ for any boolean, and thus $\mathrm{not}$ is its own inverse. Appealing to the fact that isomorphisms are equivalences, and then to univalence, we get a path $\mathrm{notp} : \mathrm{Bool} \equiv \mathrm{Bool}$ such that $\mathrm{transp}\ \mathrm{notp}\ x = \mathrm{not}\ x$. | |||||
Now we assume an inhabitant $\mathrm{sure}$ (to be read in a very sarcastic voice) of $\mathrm{isHSet}\ \mathscr{U}$ and derive a contradiction, that is, an inhabitant of $\bot$. The path $\mathrm{sure}\ \mathrm{notp}\ \mathrm{refl}$ connects $\mathrm{notp}$ and $\mathrm{refl}$ in the direction $i$. From this we build the path $\lambda i. \mathrm{transp}\ (\mathrm{sure}\ \mathrm{notp}\ \mathrm{refl})(i)\ \mathrm{false}$, which has as endpoints $true$ and $false$. To see this, compute: | |||||
- For $i = i0$, we have $\mathrm{transp}\ \mathrm{notp}\ \mathrm{false} = \mathrm{not}\ \mathrm{false} = \mathrm{true}$. | |||||
- For $i = i1$, we have $\mathrm{transp}\ \mathrm{refl}\ \mathrm{false} = \mathrm{false}$. | |||||
Applying the proof that $\mathrm{true} \not\equiv \mathrm{false}$ we have a contradiction, which is exactly what we wanted.^[I know, I know, I have to stop. Did you know I had to add the word "exactly" there so the paragraph overflew onto the next line and the QED symbol would show up right? It's terrible!]<span class="qed">$\blacksquare$</span> | |||||
"Big deal," I hear you say. "So what, the universe isn't a set?" Well, you're right. This isn't an exciting fact, or an exciting proof. To read. Getting this to go through was incredibly satisfying. But if we want to prove non-trivial facts using univalence, we're going to need a bigger ~~boat~~ universe. Ours doesn't have enough types. | |||||
## Higher Induction | |||||
To say that our universe $\mathscr{U}$ with its infinitely many types is lacking some is... weird, I'll admit. However, it's missing a lot of them! A countably infinite amount, in fact. While we have all inductive types, we only have the zero-dimensional inductive types, and not the higher inductive types! | |||||
I've written about these before a bit in the previous post, about induction. In short, while inductive types allow us to define types with points, higher inductive types let us define types with points and paths. Full disclosure, of time of writing, the implementation of HITs in [cubical](https://git.amelia.how/amelia/cubical) is partial, in that their fibrancy structure is a big `error`. However we can still write some simple proofs involving them. | |||||
### The Interval | |||||
Wait, didn't we talk about this before? No, no, this is the right interval. We're still on track. | |||||
The $\mathrm{Interval}$ is the inductive type freely generator by two constructors, $\mathrm{ii0}$ and $\mathrm{ii1}$, and a path $\mathrm{seg}$ connecting them. Well, that's the theory, but the reality is a bit different. In order to support eliminating (read: pattern matching on) inductive types, we can't simply assume paths exist, even in cubical type theory. What we end up with instead is a constructor parametrised by some interval (that's $\mathbb{I}$) variables, and an attached _boundary_ condition. | |||||
In the case of the Interval, we have this definition: | |||||
``` | |||||
data Interval : Type where | |||||
ii0 : Interval | |||||
ii1 : Interval | |||||
seg i : Interval [ (i = i0) -> ii0 | |||||
, (i = i1) -> ii1 | |||||
] | |||||
``` | |||||
This says that `seg i0` is definitionally equal to `ii0`, and `seg i1` is definitionally equal to `ii1`. We can get a path connecting them by abstracting over the $i$ variable: $\lambda i. \mathrm{seg}\ i : \mathrm{Path}\ \mathrm{ii0}\ \mathrm{ii1}$. To pattern match on an element of the interval we need three (really, four, but one is details---and automated) things: | |||||
- A case for `c0 : P ii0` | |||||
- A case for `c1 : P i11` | |||||
- A proof `cseg` which says the cases for `c0` and `c1` agree. | |||||
To express the type of `cseg`, we need to power up our path types a bit. Conceptually, just like a $\mathrm{Path}$ is a specialised version of $\mathbb{I} \to A$, we need a _dependent_ path, called $\mathrm{PathP}$, which specialises $\prod{(i : \mathrm{I})} A\ i$, that is, the type of the endpoints is allowed to depend on the interval variable. With that, the type of `p` becomes `PathP (\i -> P (seg i)) c0 c1`, since `c0 : P (seg i0)` and `c1 : P (seg i1)`. | |||||
As for that fourth thing I mentioned? In addition to preserving each of the constructor data, a map between Interval-algebras needs to be _fibrancy preserving_: Compositions in the domain are mapped to the "appropriate" compositions in the range. In implementations of cubical type theory, this is automatic, since the range has a fibrancy structure (since it is in $\mathscr{U}$), and preserving compositions can be done automatically and uniformly. | |||||
Since we already have an interval pretype $\mathbb{I}$, having an interval _type_ isn't too interesting. One thing we can do is prove function extensionality... again... reproducing an argument from the HoTT book. | |||||
``` | |||||
iFunext : {A : Type} {B : A -> Type} | |||||
(f : (x : A) -> B x) | |||||
(g : (x : A) -> B x) | |||||
-> ((x : A) -> Path (f x) (g x)) -> Path f g | |||||
iFunext f g p i = h' (seg i) where | |||||
h : (x : A) -> Interval -> B x | |||||
h x = \case | |||||
ii0 -> f x | |||||
ii1 -> g x | |||||
seg i -> p x i | |||||
h' : Interval -> (x : A) -> B x | |||||
h' i x = h x i | |||||
``` | |||||
I'm pretty sure that I had reproduced this proof in the previous blog post as well, so you can check there for a more thorough explanation. Let's move on to some more exciting higher inductive types. | |||||
### Synthetic Homotopy Theory: $\mathbb{S}^1$ | |||||
I am not a _homotopy type_ theorist, but I am a homotopy _type theorist_, which means I am qualified to prove some facts about spaces. A particularly simple space, which is nonetheless non trivial, is the circle, $\mathbb{S}^1$, the type freely generated by a point and a loop. | |||||
``` | |||||
data S1 : Type where | |||||
base : S1 | |||||
loop i : S1 [ (i = i1) -> base, (i = i0) -> base ] | |||||
``` | |||||
We can illustrate this type like this: | |||||
<figure> | |||||
<img src="/diagrams/ctt/circle.svg" alt="The circle" style="min-width: 150px;" /> | |||||
<figcaption>The circle.</figcaption> | |||||
</figure> | |||||
The elimination principle for this is just like for the interval. We need a point `b : P base` and a dependent path `l : PathP (\i -> P (loop i)) b b` (since `loop i0 = loop i1 = base` the dependent path is not strictly necessary). For example, to define a function $\mathbb{S}^1 \to \mathscr{U}$, we need to pick a type $X : \mathscr{U}$ and a path $X \equiv X$. All non-trivial paths in types are going to be generated by univalence on some interesting equivalence. | |||||
Allow me one paragraph's worth of digression before we get to the point. The type of integers is defined as the coproduct of $\mathbb{N} + \mathbb{N}$^[In the implementation, this definition is unfolded], were $\mathrm{inl}\ x$ is interpreted as $+x$ and $\mathrm{inr}\ x$ is $-(x + 1)$. With this representation, one can define the functions $\mathrm{sucZ} = x + 1$ and $\mathrm{predZ} = x - 1$, and prove that they are inverses, such that $\mathrm{sucZ}$ is an autoequivalence of $\mathbb{Z}$. | |||||
Consider the function $\mathrm{helix} : \mathbb{S}^1 \to \mathscr{U}$ which maps $\mathrm{base}$ to $\mathbb{Z}$ and $\mathrm{loop}(i)$ to $(\mathrm{univalence}\ sucZ)(i)$. It's easy to check that this definition is type-correct (and boundary-correct), so we can apply it to elements of the circle and get back types and equivalences. Now we can define the function $winding : \mathrm{base} \equiv \mathrm{base} \to \mathbb{Z}$ by | |||||
``` | |||||
winding : Path base base -> Int | |||||
winding p = transp (\i -> helix (p i)) (pos zero) | |||||
``` | |||||
This map counts, for any loop $x : \mathrm{base} \equiv \mathrm{base}$, the number of times x "goes around" the $\mathrm{loop}$. For example, going around it once: | |||||
``` | |||||
windingLoop : Path (winding (\i -> loop i)) (pos (succ zero)) | |||||
windingLoop = refl | |||||
``` | |||||
or once in the other direction: | |||||
``` | |||||
windingSymLoop : Path (winding (\i -> loop (inot i))) (neg zero) | |||||
windingSymLoop = refl | |||||
``` | |||||
or no times at all: | |||||
``` | |||||
windingBase : Path (winding (\i -> base)) (pos zero) | |||||
windingBase = refl | |||||
``` | |||||
If we also write a function $wind : \mathbb{Z} \to \mathrm{base} \equiv \mathrm{base}$ and prove that they are inverses, what we end up with is a fully synthetic, machine-checked proof that $\Omega(\mathbb{S}^1) \equiv \mathbb{Z}$. Of course, we could also _define_ $\mathbb{Z}$ as $\Omega(\mathbb{S}^1)$, but in that case the proof is a lot less interesting! | |||||
### Category Theory: The Homotopy Pushout | |||||
Category theory has the notion of limits and colimits of diagrams, which give rise to lots of important concepts. A full explanation of colimits is not due here, but it should suffice to say that if we want to do mathematics internally to cubical type theory, a complete and co-complete category is a fine setting to do it. Given a diagram like the one on the left, a _cocone_ under it is a diagram like the one on the right, which commutes. The _pushout_ of a span is its colimt, that is, the "smallest" such cocone. | |||||
<div class="mathpar"> | |||||
<figure style="width: 50%;"> | |||||
<img src="/diagrams/ctt/span.svg" alt="Span" style="min-width: 250px;" /> | |||||
<figcaption>A **span** is a triple of types $A$, $B$, $C$ with maps $f : A \to B$ and $g : A \to C$</figcaption> | |||||
</figure> | |||||
<figure style="width: 50%;"> | |||||
<img src="/diagrams/ctt/span_colimit.svg" alt="Colimit of a span" style="min-width: 250px;" /> | |||||
<figcaption>A **cocone under a span** is a type $P$ and inclusions $i_1 : B \to P$ and $i_2 : C \to P$ such that $i_1 \circ f = i_2 \circ g$</figcaption> | |||||
</figure> | |||||
</div> | |||||
Normal Martin-Löf type theory does not give us the tools to define pushouts, but, as you will have guessed, cubical type theory does. We can define pushouts as a higher inductive type, like this: | |||||
``` | |||||
data Pushout {A B C : Type} (f : A -> B) (g : A -> C) : Type where | |||||
inl : (x : B) -> Pushout f g | |||||
inr : (y : C) -> Pushout f g | |||||
push i : (a : A) -> Pushout f g [ (i = i0) -> inl (f a) | |||||
, (i = i1) -> inr (g a) ] | |||||
``` | |||||
The `push` path constructor is parametrised by an element $a : A$ and an endpoint $i : \mathbb{I}$. Applying function extensionality, one can turn this into a path between $f \circ \mathrm{inl}$ and $g \circ \mathrm{inr}$, which is what we need for the diagram to commute. Homotopy pushouts are very general and can be used to define a number of homotopy-theoretic constructions. Quoting the HoTT book, section 6.8, we have: | |||||
> - The pushout of $1 \leftarrow A \to 1$ is the **suspension** $\Sigma A$ | |||||
> - The pushout of $A \leftarrow A \times B \to B$ is the **join** of $A$ and $B$, written $A * B$ | |||||
> - The pushout of $1 \leftarrow A \xrightarrow{f} B$ is the **cone** or **cofiber** of $f$ | |||||
The big file with all the proofs in [cubical](https://git.amelia.how/amelia/cubical) features a proof that the suspension $\Sigma A$ defined directly as a HIT is the same as the one defined by the pushout of $1 \leftarrow A \to 1$. | |||||
## But Why? | |||||
The motivation for cubical type theory was made explicit two posts ago, when I was talking about equality for the first time, but it's worth mentioning it again, especially after all^[Actually, the most complex part of Cubical Type Theory is the definition of composition for $\mathrm{Glue}$, which is far too hardcore for a blog post, even for its appendix.] of its complexity has been exposed like this. And let me be clear, it is _very_ complex. No amount of handwaving away details can make cubical type theory seem like a "natural" extension: It's not something we found, like the groupoid interpretation of type theory. It's something we found. | |||||
And what _did_ we find? A type system with great computational behaviour for all of Homotopy Type Theory. In particular, an argument based on the cubical set _model_ of type theory, rather than on the syntax, proves that cubical type theory enjoys _canonicity_: Every boolean in the empty context is _strictly_ equal to either $\mathrm{true}$ or $\mathrm{false}$, and other types enjoy similar properties for their canonical elements. | |||||
The big failing of Homotopy Type Theory before the cubes came to save us was that there were closed inhabitants of types not equal to any of their constructors. In particular, any construction with path induction would get stuck on the terms $\mathrm{ua}(e)$ of the univalence axiom. Cubical type theory solves this twofold: It gives us ways of working with paths _directly_, using operations on the interval and composition, _and_ it explains what the computational behaviour of $\mathrm{univalence}$ is. | |||||
So, if you ask me, the complexity is justified. It's one of those things that took me a while to get my head around, but where the learning process _and_ the result (knowing about cubes) were beneficial. And god, did it take a while. The first time I encountered the cubical type theory paper was in mid 2019, almost two years ago! It took me _that long_ to go from "what the hell is this" to "this is neat but it confuses me" to "I understand this" to "I can implement this" (we are here). | |||||
Writing about it has been my white whale for that long---I'll need a new one, suggestions welcome! Maybe I should write a monad tutorial? Heard those are complicated, too. | |||||
If you made it this far, I thank you deeply. This post is a behemoth! In fact the next **word** is the 7000th, which almost makes this post longer than my two previous longest posts _combined_! If you haven't abandoned me yet, I swear: I will never make you read this much again. However, if you made it this far and understood everything, I only have one thing to say: Go forth, dear reader, and fill those cubes. | |||||
Well, that sounded weird. I won't say it again. | |||||
---- | |||||
## Appendix: Blog/EXTRA CCC {#appendix} | |||||
<sup><sup>_CCC stands for "computing cubical compositions", I'm so sorry_</sup></sup> | |||||
### Functions | |||||
Now we add one step of generalisation, and consider transport in a line of $\prod{(x : A)} B\ x$, where $A : \mathbb{I} \to \mathscr{U}$ as before but $B : \prod{(i : \mathbb{I})} \to A(i) \to \mathscr{U}$. A given $f : (x : A(i0)) \to B(i0)\ x$ will become, through a trick of magic, a function $(x : A(i1)) \to B(i1)\ x$. | |||||
The first step is to define $x\prime : A(i0)$ as before, apply $f$ to get an element $B(i0)\ x\prime$, then cast the result of the application along $\lambda i. B(i)$... Wait. The function is dependent. Can we cast along $\lambda. B(i)\ x$? No, not quite. $x : A(i1)$, but we need an element of $A(i)$. $\lambda. B(i)\ x\prime$ won't do either, since that has type $A(i0)$. | |||||
What we need is a line, dependent on $i$, which connects $x\prime$ and $x$, call it $p$; Then we can transport along $\lambda i. B(i)\ p(i)$ to get the element of $B(i1)\ x\prime$ which we want. The filler of the composition which generated $x\prime$ is _exactly_ what we need. Define $v(i) = \mathrm{fill}\ (\lambda i. A (\neg i))\ (\lambda j [])\ x$, so that we may define $y = \mathrm{comp}\ (\lambda i. B(i)\ v(i)) \ (\lambda k [])\ (f\ x\prime)$, and the composition is $\lambda x. y$ as before. | |||||
To generalise this to non-empty compositions only requires a very small change. If you think of functions as extensional black boxes, like we do, one thing to realise is that it doesn't really matter _how_ we turn $x$ into an argument to the function, as long as we do; The only thing which needs to respect the constraints of the composition is the overall function, that is, its result. So we can simply take $x\prime$, $v(i)$ as in the case for dependent compositions and define the full composition to be: | |||||
$$ | |||||
\mathrm{comp}\ (\lambda i. \prod{(x : A(i))} B(i)\ x)\ [\phi \to u]\ a0 =\\ | |||||
\lambda x. \mathrm{comp}\ (\lambda i. B(i)\ v(i))\ [\phi \to u\ v]\ (a0\ x\prime) | |||||
$$ | |||||
Note the light abuse of notation we use in the mathematics; More properly, the system of sides in the resulting composition would be written | |||||
$\lambda i\ x. u(i)(x)\ v(i)$. | |||||
### Pairs | |||||
Assume, we're given an element $p : \sum{(x : A)} B\ x$, and take $x = p.1$ and $y = p.2$. Just like in the case for dependent functions, $A$ is a line and $B$ is a dependent line; What we want is an element $p\prime : \sum{(x : A(i1))} B(i1)\ x\prime$ for some $x\prime$. | |||||
To define $\mathrm{comp}\ (\lambda i. \sum{(x : A(i))} B(i)\ x)\ [\phi \to u]\ p$, first define $v(i) = \mathrm{fill}\ A\ [\phi \to u.1]\ x$, which is a line connecting $x$ and $\mathrm{comp}\ A\ [\phi \to u.1]\ x$. For the second element we'll do the same thing as we did for dependent functions, and define $y\prime = \mathrm{comp}\ (\lambda i. B(i)\ v(i))\ [\phi \to u.2]\ y$. Then we can define composition as follows: | |||||
$$\mathrm{comp}\ (\lambda i. \textstyle\sum{(x : A(i))} B(i)\ x)\ [\phi \to u]\ p = (v(i1), y\prime)$$ | |||||
[here]: https://git.amelia.how/amelia/cubical/src/branch/master/intro.tt#L436-L460 | |||||
[line 114]: https://git.amelia.how/amelia/cubical/src/commit/fb87b16429fdd54f7e71b653ffaed115015066cc/intro.tt#L110-L114 | |||||
[line 667]: https://git.amelia.how/amelia/cubical/src/commit/fb87b16429fdd54f7e71b653ffaed115015066cc/intro.tt#L667 | |||||
[line 675]: https://git.amelia.how/amelia/cubical/src/commit/fb87b16429fdd54f7e71b653ffaed115015066cc/intro.tt#L675 |
@ -0,0 +1,118 @@ | |||||
--- | |||||
title: "A quickie: Axiom J" | |||||
date: June 7th, 2021 | |||||
synopsys: 1 | |||||
--- | |||||
Hey y'all, it's been three months since my last blog post! You know what that means.. or should mean, at least. Yes, I'd quite like to have another long blog post done, but... Life is kinda trash right now, no motivation for writing, whatever. So over the coming week(s) or so, as a coping mechanism for the chaos that is the end of the semester, I'm gonna write a couple of really short posts (like this one) that might not even be coherent at all---this sentence sure isn't. | |||||
Today's note is about what is perhaps the most confusing rule of Groupoid Martin-Löf's dependent type theory, the _J_ eliminator. For starters, its name means basically nothing: as far as I can tell its name comes from the fact that **I**dentity is another word for equality and J is the letter that comes after I. | |||||
First, let's recall how the identity type is defined, or rather, the two ways in which it can be defined. The first has two _parameters_, `A` and `x`, and a single _index_ (of type `A`), while the latter has a single `A` _parameter_ and two _indices_ of type `A`. Using Agda syntax: | |||||
<div class="mathpar"> | |||||
```agda | |||||
data _=_ {A : Type} (x : A) : A -> Type where | |||||
refl : x = x | |||||
``` | |||||
```agda | |||||
data _=_ {A : Type} : A -> A -> Type where | |||||
refl : {x : A} -> x = x | |||||
``` | |||||
</div> | |||||
These definitions give rise to subtly different (but equivalent — see section §1.12.2 of Homotopy Type Theory if you're curious about the details) elimination rules. We'll consider the one on the right (or above, if your screen is narrow), since that one is _based_[^1]. | |||||
[^1]: Which implies the other is cringe. | |||||
One decomposition which is (sometimes) helpful when an induction principle is confusing is to break it down into a simply typed _recursion_ principle and a propositional _uniqueness_ principle. Let's visit the recursion principle first. | |||||
It's actually something you're already familiar with, even if you don't have a background in type theory: Indiscernibility of identicals. We're going to assume a rather big type theory, with arrows and universes, so we can consider a family of propositions indexed by `A` to be a type family `P : A -> Type`. I ambiguously use Type to refer to some universe and leave it to the reader to find a consistent assignment of levels. Best of luck. | |||||
Where does `A` come from? It's an argument to the recursor since it's a _parameter_ to the inductive family. Similarly, `x` is also a parameter, but we make it implicit for convenience (in a theory without implicit arguments this, of coruse, doesn't happen). Let's write down what we have so far. | |||||
```agda | |||||
=-rec : {A : Type} {x : A} -> (P : A -> Type) -> ... | |||||
``` | |||||
I'm using "Agda" as a language marker but I'm adding extra arrows for clarity. After the proposition we're proving, comes one hypothesis for each constructor. Above I wrote it in infix form, `refl : x = x`{.agda}, but you can alternatively consider this as `refl : (_=_ x) x`{.agda} — i.e., the family `(_=_ x)`{.agda} applied to its index `x`. | |||||
For each constructor, the hypothesis returns a term in `P` applied to each of the indices of the constructor---so in this case, `P x`---and is a function of any arguments to our constructor. `refl` doesn't have any arguments, so the hypothesis is _just_ `P x`. | |||||
```agda | |||||
=-rec : {A : Type} {x : A} -> (P : A -> Type) -> P x -> ... | |||||
``` | |||||
And now, the conclusion! Literally. We introduce new variables with the same types as our indices---let's call this one `y : A`---and one argument which has the type "our inductive type applied to those new indices". Our inductive type is `(_=_ x)`, so that applied to our new indices is `(_=_ x) y`: `x = y`. And the conclusion? `P` applied to those indices! | |||||
```agda | |||||
=-rec : {A : Type} {x : A} -> (P : A -> Type) -> P x | |||||
-> {y : A} -> x = y -> P y | |||||
``` | |||||
We can shuffle the parameters around a bit to make it more familiar, and, indeed, give it a better name, too: | |||||
```agda | |||||
subst : {A : Type} {x y : A} (P : A -> Type) -> x = y -> P x -> P y | |||||
``` | |||||
The recursion principle for `(_=_ x)` says that, if `x = y`, then any property that's true of `x`---that is, an inhabitant `P x`---is also true of `y`! | |||||
Now let's consider the uniqueness principle. I think this is the hardest one to wrap your head around, since it's _really_ counterintuitive. The first guess anyone would make is that the uniqueness principle says that the only term of `x = x` is `refl`, since, well, just look at the type definition! However.. | |||||
What we've defined is not a type. It's a _family_ of types, indexed by an `y : A`. So we can't state an uniqueness principle for some specific `x = y`, we need to consider the "whole family". The, uh, _total space_ of the family, if you'll forgive my HoTT accent. That's a sigma type, a dependent sum, of all the indices and only _then_ our inductive family. | |||||
The uniqueness principle for `(_=_ x)` says something about `Σ A \y -> x = y`, or `(y : A) * x = y`, or $\sum_{y : A} x = y$, depending on how much of my terrible syntax decisions you can tolerate. It says this type is _contractible_, i.e., only has one inhabitant up to equality, and the centre of contraction is `(x, refl)`. | |||||
The name for this principle is _contractibility of singletons_, since it speaks about singleton types: The, for a fixed A and x, "subset of A equal to x". If `x = y` were a proposition, this would indeed be a subset, but we can't in general expect `x = y` to be a proposition. | |||||
I claim: J = `subst` + contractibility of singletons. Let's see how. Here's the full type of the J axiom, just for reference: | |||||
```agda | |||||
J : {A : Type} {x : A} | |||||
-> (P : (y : A) -> x = y -> Type) | |||||
-> P x refl | |||||
-> {y : A} (p : x = y) | |||||
-> P y p | |||||
``` | |||||
Let's, uh, look at the type of `P` there. It's a function of two arguments... mmm.. What happens if we curry it? | |||||
```agda | |||||
J : {A : Type} {x : A} | |||||
-> (P : (Σ A λ y -> x = y) -> Type) | |||||
-> P (x, refl) | |||||
-> {z : Σ A λ y -> x = y} | |||||
-> P z | |||||
``` | |||||
Now we're getting somewhere interesting. J say something about the type `(y : A) * x = y` (or `Σ A λ y -> x = y` in the _cursed_ "Agda" notation) — The total space of the family `(_=_ x)`. In particular, it says that, if we want to prove `P` about any inhabitant `z` of that space, it's sufficient to prove `P (x, refl)`. This looks suspiciously like the principle of contractibility of singletons I was talking about before! In fact, let's see how we can derive J from contractibility of singletons and substitution. | |||||
To recap, we assume: | |||||
```agda | |||||
contract : {A : Type} {x : A} (z : (Σ A λ y -> x = y)) -> z = (x, refl) | |||||
subst : {P : A -> Type} {x y : A} -> x = y -> P x -> P y | |||||
``` | |||||
Suppose our proof of `P (x, refl)` is called `pr`, for simplicity, and the other inhabitant is called, well, `z`. By `contract z` we have `z = (x, refl)`, so the inverse of that is a path `(x, refl) = z`. By `subst {P} {(x, refl)} {z} (sym (contract z))` we have a function `P (x, refl) -> P z`, which we can apply to `pr` to get a `P z`, like we wanted. | |||||
This decomposition might sound a bit useless, since, well, we can get both substitution and contractibility of singletons from J, but it's actually super handy! It's how I _prove_ J in cubical type theory. Here, substitution is a derived operation from a primitive called _composition_ (read my last blog post!), and contractibility of singletons can be proven using a _connection_ (also in the last post!). So `J` looks like: | |||||
```cubical | |||||
J : {A : Type} {x : A} | |||||
(P : (y : A) -> Path x y -> Type) | |||||
(d : P x (\i -> x)) | |||||
{y : A} (p : Path x y) | |||||
-> P y p | |||||
J P d p = transp (\i -> P (p i) (\j -> p (iand i j))) d | |||||
``` | |||||
--- | |||||
I think that's it for what I can write for today. I didn't really have a conclusion in mind, I just see a lot of talk about Martin-Löf's equality and wanted to throw my own two cents out into the internet. I guess writing about J is like the monad tutorial of dependent type theory? Though I'd like to think of myself as a bit more advanced than "writing a monad tutorial", since, you know, I wrote my own cubical type theory, but whatever.. | |||||
I'm still thinking of writing up a complete introduction to type theory, like, the whole thing: What it is, how to read inference rules, the empty and unit types, products, functions, dependent products, dependent sums, coproducts, naturals, inductive types, equality, and possibly the axioms HoTT makes you postulate on top of that. Of course, it's a _lot_ of work, and the sheer scale of what I want to write is.. kinda paralysing. Let's see whether or not it'll happen. |
@ -0,0 +1,407 @@ | |||||
--- | |||||
title: Cubical Sets | |||||
date: June 21th, 2021 | |||||
abbreviations: | |||||
cube: 🧊 | |||||
globe: 🌎 | |||||
yo: よ | |||||
--- | |||||
<div class="warning"> | |||||
Throughout this post I'll use the ice cube emoji, &cube;, to stand for the _category of cubes_, which is more traditionally written □ (a blank square). The reason for this is that I have a really convenient emoji picker, so when I write about cubes on Twitter, it's a lot easier to call the category &cube; (maybe 4 keystrokes to select) rather than looking up the proper symbol on Google. | |||||
If you can't see this symbol - &cube; - then you should probably download an emoji font. | |||||
</div> | |||||
In which I try to write about semantics. This is not gonna go well, but I'm gonna try my best. I've heard it on good authority that the best way to learn something is to explain it to someone else, so in this post I'm going to use you, dear reader, as my rubber duck while I try to understand _cubical sets_. These are important (to me) because they provide a semantic model of cubical type theory (which I have written about previously), and since we have a semantic model, that theory is (semantically) consistent. | |||||
Personally, I like to think of a cubical set as.. a set that has opinions about cubes. You can ask some cubical set $F$ (the letter $F$ is meant to be indicative of its nature, as we will see) about its opinion on _the_ 0-cube, and it'll give us a set of points. We could ask it about the line, and it'll give us a set of lines, and so on and so forth. There are also, as we shall see, _maps_ between cubes, and asking $F$ about these will give us maps between what it thinks those cubes are. | |||||
A disclaimer (which I won't put in one of the big red warning boxes like above) is that _**I am not a category theorist**_! Most of this stuff I absorbed from reading various papers about the semantics of cubical type theory. This post is not really what I would call "a good reference". | |||||
The Category of Cubes &cube; | |||||
---------------------------- | |||||
The category of cubes has a concise description in category-theoretic terms but I would rather describe it like this: It's the category in which the objects are all powers of the set of booleans, $\{0,1\}^n$, which we abbreviate to $[n]$. To describe the maps in the category of cubes, I'm going to upset category theorists and describe them concretely, as functions of sets, written in a "pattern matching" notation similar to Haskell. However, we will only "pattern match" on products. | |||||
- The _faces_, which exhibit a cube as a _face_ of a larger cube. Concretely, a face map inserts either a 0 or a 1 somewhere along the tuple, taking an $n$-cube to an $(n+1)$-cube. The two most basic face maps take the 0-cube (a point) to either endpoint of the 1-cube (a line), defined by $\delta^0(()) = 0$ and $\delta^1(()) = 1$. | |||||
As further examples, we have functions $\delta^i_j$ (for $0 \le i, j \le 1$) which map the 1-cube (a line) into the 2-cube (a square), as any of its 4 faces. These are, explicitly, $\delta^0_0(j) = (0, j)$, $\delta^0_1(i) = (i, 0)$, $\delta^1_0(j) = (1, j)$, and $\delta^1_1(i) = (i, 1)$. | |||||
These also compose. For instance, the map $\delta^0_0 \circ \delta^1 : [0] \to [2]$ exhibits the point `()` as `(0, 1)`-th corner of a square. If we take the first coordinate to be the left-right direction (0 = left) and the second coordinate to be the up-down (0 = up) direction, then this composite map can be pictured as follows: | |||||
<figure> | |||||
<img height=200px alt="A diagram meant to indicate the inclusion of a point as a 0-face in the 2-cube. Colours are used to indicate how the point is mapped to the endpoint of a line, and then the corner of a square." src="/diagrams/cubicalsets/delta10_delta0.svg" /> | |||||
<figcaption>Including the $\color{red}{\bullet}$ point as a 0-face in the 2-cube </figcaption> | |||||
</figure> | |||||
The actual, _concrete_ effect of $\delta^0_0 \circ \delta^1 : [0] \to [2]$ can be seen by evaluating the composite at the unique inhabitant of $[0]$, which is $()$ (the empty tuple). We have $(\delta^0_0 \circ \delta^1)(()) = \delta^0_0(1) = (0, 1)$. | |||||
- The _degeneracies_, which "collapse" an $(n+1)$-cube to an $n$-cube by deleting a dimension. The most basic degeneracy is given by the unique map $\sigma : [1] \to [0]$. There are two degeneracies $[2] \to [1]$, mapping a square to a line, by deleting either coordinate. These... don't have very nice pictoral representations, at least in the category of _cubes_. We'll see that when it comes to a cubical _set_, though, they are quite easy to diagram. | |||||
We also have the trivial map $1_n : [n] \to [n]$ which returns its argument $n$-tuple (cube) unchanged. It's easy to see that this is the identity for composition, which is as in $\mathbf{Set}$. Since &cube; is a category, we can consider standard category-theoretical operations _on_ &cube;, like taking its opposite category, $\cube^{op}$. The category $\cube^{op}$ has as its objects the same cubes $[n]$ as before, but all of the maps are backwards, so that the face maps in $\cube^{op}$ _project_ a face and the degeneracies _expand_ a cube, by inserting trivial faces. | |||||
We can also consider functors which map out of $\cube$ --- and its opposite category! --- and that's what we're interested in today. The functors we will talk about, those $X : \cube^{op} \to \mathbf{Set}$, are called _cubical sets_, and we'll talk about them shortly, but first, a note: | |||||
_Crucially_, the category of cubes does not have any maps other than the faces and degeneracies (and identities), and importantly, any map $p : [m] \to [n]$ factors as a series of degeneracies followed by a series of faces. This means we can specify a cubical set _entirely_ by how it acts on the faces and the degeneracies. | |||||
Cubical Sets | |||||
------------ | |||||
- To each object $c$ in &cube;, a _set_ $X(c)$. Since the objects of &cube; are all $[n]$, these are alternatively notated $X_n$. | |||||
- To each map $f : [n] \to [m]$ in &cube;, an arrow $X(f) : X_m \to X_n$. Specifically, these are all composites of the faces $X(\delta^i_j) : X_{n + 1} \to X_n$ and the degeneracies $X(\sigma_j) : X_n \to X_{n + 1}$. | |||||
Hold up - aren't those backwards? Yes, they are! Remember, a cubical set is not a functor out of the category &cube;, it's a functor out of $\cube{}^{op}$, so all of the arrows are backwards. To work more conveniently with cubical sets (and, in fact, more concretely) we need to take a _very_ abstract detour through even more category theory. | |||||
### The Unit Interval | |||||
Being functors $[\cube^{op}, \mathbf{Set}]$, we can also form maps _between_ cubical sets, which are _natural transformations_ $\alpha : X \to Y$. Specifically, a map between cubical sets assigns to each cube $c \in \cube$ a map $\alpha_c : F(c) \to G(c)$, such that for any morphism $f : c \to c\prime \in \cube$, the equation $\alpha_{c} \circ F(f) = G(f) \circ \alpha_{c\prime}$ holds. This condition is captured by the following diagram: | |||||
<figure> | |||||
<img height=200px alt="A naturality square" src="/diagrams/cubicalsets/naturality.svg" /> | |||||
<figcaption>Is it even possible to talk about category theory without drawing a naturality square?</figcaption> | |||||
</figure> | |||||
A standard construction in category theory is the _Yoneda embedding_ functor, written $\yo$ (the hiragana character for "yo"), which maps an object of (in our case) $\cube$ into the category of cubical sets. It maps objects $c \in \cube$ to the _hom-set_ functor $\mathrm{Hom}_{\cube}(-, c)$, which takes each object $d \in \cube^{op}$ to the set of morphisms $d \to c$ in &cube;. | |||||
It takes the morphism $f : c \to d$ to the natural transformation $\yo(f) : \mathrm{Hom}_{\cube}(-, c) \to \mathrm{Hom}_{\cube}(-, d)$. | |||||
Let's look at $\yo(f)$ some more. It's a natural transformation between $\mathrm{Hom}(-, c)$ and $\mathrm{Hom}(-, d)$, so we can read that as a set of maps indexed by some $e \in \cube^{op}$. Since $\yo(f)_e : \mathrm{Hom}(e, c) \to \mathrm{Hom}(e, d)$, we can understand that the value $\yo(f)_e(g)$ takes for $g \in \mathrm{Hom}(e, c)$ is $f \circ g$. | |||||
The $\yo$ functor gives us, for an object $[n]$ in the category of cubes, an object.. well, $\yo([n])$ in the category of cubical sets. We'll abbreviate $\yo([n])$ as $\square^n$ for time, since, as we'll see, this object is very special indeed. Let's consider, for simplicity, the _unit interval_ cubical set, $\square^1$. We know it's a functor from $\cube^{op}$ to $\mathbf{Set}$ --- and more, we know exactly what it maps each cube to. The _set of all maps from other cubes to $[1]$_. Turns out, above $[1]$ this set only contains trivial cubes, so let's look at what $\square^1_0$ and $\square^1_1$ are: | |||||
- For $\square^1_0$ we have to consider all ways of mapping the $0$-cube to the $1$-cube. These are the two "base" face maps $\delta^0$ and $\delta^1$. | |||||
- For $\square^1_1$, we have to consider all ways of mapping the $1$-cube to itself. You might think that this set is trivial, but think again (if you do): Yes, we do have the identity map $1_{[1]} : [1] \to [1]$, but we also have the compositions $\delta^0 \circ \sigma$ and $\delta^1 \circ \sigma$. Since we know what the objects in the category of cubes look like, you can think of these as the constant function `f(x) = 0` and `g(x) = 1` respectively, since that's what they work out to: | |||||
$$ | |||||
(\delta^0 \circ \sigma)(x) = (\delta^0)(\sigma(x)) = \delta^0(()) = (0) | |||||
$$ | |||||
- For $\square^1_j, j > 1$ we only have degeneracies (and compositions of degeneracies) mapping $[j] \to [1]$. | |||||
Now, the standard cubes $\square^n$ don't look very interesting. But you see, this is where I pulled a sneaky on you! Because of a result about $\yo$ --- the _Yoneda lemma_. Specialised to our case, it says that for _any n_ and _any X_, the sets $X_n$ and $\mathrm{Hom}(\square^n, X)$ correspond exactly: we can probe the structure of a cubical set $X$ by examining the classes of maps $\square^n$ to $X$. | |||||
<details> | |||||
<summary>The Yoneda lemma</summary> | |||||
The Yoneda lemma is a result about an arbitrary category $C$, its category of _presheaves_ $\mathbf{PSh}(C) = [C^{op}, \mathbf{Set}]$, and the functor $\yo(c)$ we just defined. Its statement is as follows: | |||||
$$ | |||||
\mathrm{Hom}(\yo(c), F) \simeq F(c) | |||||
$$ | |||||
In our case, it's interesting particularly because it says that we can explore the structure of a cubical set --- a presheaf on $\cube$ --- by analysing the maps from the standard $n$-cube $\square^n$ into $X$. Furthermore, it implies the Yoneda embedding $\yo$ is _fully faithful_, by the following calculation: | |||||
$$ | |||||
\mathrm{Hom}_{\mathbf{PSh}(C)}(\yo(c), \yo(d)) \simeq \yo(d)(c) \simeq \mathrm{Hom}_{C}(c, d) | |||||
$$ | |||||
It thus realises $C$ as a full subcategory of $\mathbf{PSh}(C)$ - in our case, the category $\cube$ as a subcategory of the category of cubical sets. This is useful because $\mathbb{PSh}(C)$ is a category with a _lot_ of structure (as we shall see), even when $C$ doesn't have any structure. | |||||
This also means that we can study maps between cubes by studying maps of standard cubical sets, which is good, because degeneracies in the category of cubes confuse me to death! | |||||
</details> | |||||
### Cubes in Sets | |||||
Let's look at what the maps $\square^n \to X$ impart on $X$, shall we? But first, let's reason a bit to identify how we can represent diagramatically the cubical set $\square^n$, by extrapolating our knowledge about the unit interval cubical set. For that case, $\square^1_0$ was the set containing both "endpoints" of the unit interval, and the set $\square^1_1$ contained two degenerate lines (for either endpoint --- we'll see how to think about these in the next section) and one non-degenerate line, which we think of as "the" unit interval. | |||||
So, in general, we think of $\square^n$ as consisting of the set $\square^n_0$ of _vertices_ of X, the set $\square^n_1$ of _lines_ of X, the set $\square^n_2$ of _squares_ of X, the set $\square^n_3$ of _cubes_ of X (cube in the sense of high school geometry), etc, all the way up to the set $\square^n_n$ of $n$-cubes of X, and all $\square^n_m, m > n$ are degenerate. We can represent these using.. diagrams! Diagrams of points, lines, squares, cubes, etc. Let's look at the first few: | |||||
<figure> | |||||
<img height=200px alt="The image of the first 3 objects of the category of cubes under the Yoneda embedding are cubical sets representing familiar shapes: a point, a line, a square, and a (solid) cube." src="/diagrams/cubicalsets/first_ncubes.svg" /> | |||||
<figcaption>The cubical sets $\square^0$, $\square^1$, $\square^2$, $\square^3$.</figcaption> | |||||
</figure> | |||||
Now we can investigate a particular $n$-cube in $X$ as being a diagram in $X$ with the same shape as one of the diagrams above! | |||||
- A $0$-cube in X is just a point in X. | |||||
- A $1$-cube in X can be parsed to mean an arrow $f : x_0 \to x_1$. The points $x_0$ and $x_1$ are understood to be the cubes $f \circ \yo(\delta^0)$ and $f \circ \yo(\delta^1)$, which we call the _endpoints_ of $f$. By composing with the image of a face map under $\yo$, we can project a lower-dimensional cube from a higher dimensional cube, by the action of $\yo$ on morphisms. | |||||
- A $2$-cube in X is a _square_ $\sigma$ like | |||||
<figure> | |||||
<img alt="A diagrammatic representation of a particular square in a cubical set." height=200px src="/diagrams/cubicalsets/asquare.svg" /> | |||||
<figcaption>A square $\sigma$.</figcaption> | |||||
</figure> | |||||
In this diagram too we can understand the lower-dimensional cubes contained in $\sigma$ to be compositions $\sigma \circ \yo(p)$ for some composition of face maps $p : [m] \to [2], m \le 2$. As an example (the same example as in the section on &cube;), the arrow $p$ is the map $\sigma \circ \yo(\delta^0_0)$, and the point $b$ is the map $\sigma \circ \yo(\delta^0_0) \circ \yo(\delta^1)$. By functoriality of $\yo$, that composite is the same thing as $\sigma \circ \yo(\delta^0_0 \circ \delta^1)$. | |||||
- A $3$-cube in X is a map $\aleph : \square^3 \to X$, which could be visualized as the proper _cube_ below, and has 6 2-faces (squares), 12 1-faces (edges) and 8 0-faces (vertices). As an exercise, work out which sequence of face maps in the underlying cube category leads leads to each of the possible 24 faces you can project. Honestly, the drawing of the $3$-cube isn't even _that_ enlightening, I just wanted to be fancy. | |||||
Like, check out this absolute _flex_ of a diagram, it's god damn useless. Wow. | |||||
As an a quick aside, can we talk about how god damn confusing this projection is? I can never tell whether I'm looking top-down at a truncated square pyramid ($\kappa$ is the top face) or if I'm looking _through_ a normal solid 3-cube whose front face is transparent ($\kappa$ is the back face). | |||||
<figure class="wraparound"> | |||||
<img alt="A diagrammatic representation of a particular cube in a cubical set. The diagram is incredibly busy and not very helpful." height=200px src="/diagrams/cubicalsets/acube.svg" /> | |||||
<figcaption>A _proper_ cube, finally!</figcaption> | |||||
</figure> | |||||
In case it's not clear (it's not clear, I know), the 2-cubes present in the 3-cube $\aleph$ -- yes, $\aleph$, that's how hard I'm running out of letters over here -- are these: | |||||
- $\kappa$ is the square spanned by $w \to x \to z \leftarrow y \leftarrow w$. | |||||
- $\lambda$ is the square spanned by $a \to w \to y \leftarrow c \leftarrow a$. | |||||
- $\mu$ is the square spanned by $a \to b \to x \leftarrow w \leftarrow a$. | |||||
- $\nu$ is the square spanned by $b \to x \to z \leftarrow d \leftarrow b$. | |||||
- $\epsilon$ is the square spanned by $c \to y \to z \leftarrow d \leftarrow g$. | |||||
- There is one more square, obscured by $\kappa$, which is spanned by $a \to b \to d \leftarrow c \leftarrow a$. | |||||
Yeah, this item is padding. Fight me. | |||||
Now that we know we can represent particular cubes in a cubical set X by diagrams, I can also finally show you what a degeneracy actually looks like! For instance, we know $X(\sigma)$ maps from the set of points of $X$ to the set of lines of $X$ (since $X$ is contravariant, it inverts the direction of $\sigma$ -- remember that). | |||||
If $x$ is a particular point in $X$, its image under $X(\sigma)$ is a degenerate line connecting $x \to x$. Connections on lines $l$ turn them into degenerate squares where two opposing faces are $l$ and the other two faces are degenerate, and so on. | |||||
<figure> | |||||
<img height=200px alt="Diagrammatic representations of the degeneracy which expresses a point as a degenerate line, and one of the ways of expressing a line as a degenerate square." src="/diagrams/cubicalsets/degeneracies.svg" /> | |||||
<figcaption>Some degeneracies in cubical sets, diagrammed.</figcaption> | |||||
</figure> | |||||
In both diagrams above, the dashed arrow from the $n$-cube to the inside of $(n+1)$-cube is meant to be understood as $a \circ \yo(\sigma)$, where $a$ is a map $\square^n \to X$. $\sigma_0 \circ \sigma$ is the map which collapses a square to a point by first removing the _first_ coordinate, which is understood to be left-right; Thus, the cells in the up-down direction in $f \circ \yo(\sigma_0) \circ \yo(\sigma)$ are thin, and the left-right cells are full. | |||||
### More examples of Cubical Sets | |||||
The simplest way of making a cubical set is by taking a normal set, say $A$, and ignoring the cubes, thus making the _discrete cubical set_ $K(A)$, which has $K_n = A$ for every $n$; $K(δ^i_j) = 1$ and $K(\sigma_i) = 1$. | |||||
It's easy to see that $K(A)$ is a functor, since: | |||||
* $K(1) = 1$ | |||||
* $K(g \circ f) = 1$, and $K(g) \circ K(f) = 1 \circ 1 = 1$. | |||||
And thus $K(A)$ is a cubical set. It doesn't have a lot of interesting _structure_, but some discrete cubical sets will have important roles to play when discussing the _category_ of cubical sets. For instance, $K(\mathbb{N})$ plays the same role in $\mathbf{cSet}$ as it does in $\mathbf{Set}$! | |||||
If $A$ and $B$ are cubical sets, we can form their product $A \times B$, which is _also_ a cubical set. Every $(A \times B)_n$ is $A_n \times B_n$, and maps $(A \times B)(f) : A_n \times B_n \to A_m \times B_m$ are taken to products of morphisms $A(f) \times B(f)$.[^1] | |||||
[^1]: Where $(f \times g)(x, y) = (f(x), g(y))$ in $\mathbf{Set}$. | |||||
Describing individual constructions on cubical sets (like their product) isn't very enlightening, though, and it's a lot more fruitful to describe most of them in one go. So, with that goal, I'll describe.. | |||||
### The Category of Cubical Sets, $\mathbf{PSh}(\cube)$ | |||||
Cubical sets are, of course, objects of a category, like all good things. We call a functor $X^{op} \to \mathbf{Set}$ a _presheaf on $X$_, and we denote the category of presheaves on $X$ by $\mathbf{PSh}(X)$. Thus, since a cubical set is a functor $\cube^{op} \to \mathbf{Set}$, we can also call it a _presheaf on &cube;_, and thus, an object of $\mathbf{PSh}(\cube)$. To reduce the number of ice cube emoji on the screen, we'll denote this category by $\mathbf{cSet}$. | |||||
The word "presheaf", rigorously, only means "contravariant functor into $\mathbf{Set}$." However, it's what the nLab calls a "concept with an attitude": If you call something a "presheaf category" instead of a "functor category", it's likely that you're interested in the properties of $\mathbf{PSh}(C)$ as a presheaf _topos_, and, indeed, that's what we're interested in. | |||||
A topos is a "particularly nice category to do mathematics", in which "nice" means "has a lot of structure". Let's look at some of the structure $\mathbf{cSet}$ (and, indeed, _any_ $\mathbf{PSh}(C)$) has for "free": | |||||
- **Completeness** Every _small limit_ exists in $\mathbf{cSet}$, and is computed pointwise as a limit in $\mathbf{Set}$. This is an extension of the product of cubical sets mentioned above: a product is just a small, _discrete_ limit. In particular, this also includes a _terminal object_ in cubical sets, which is the discrete cubical set $K(1)$. | |||||
- **Cocompleteness** Every _small colimit_ exists in $\mathbf{cSet}$. In particular, if $C$ is a category, $\mathbf{PSh}(C)$ is often referred to as the "free cocompletion" of $C$ --- C plus all small colimits thrown in. These are also computed pointwise as colimits in $\mathbf{cSet}$. Don't know what a colimit is? One particularly important example is the _coproduct_ $A + B$. In $\mathbf{Set}$, this is the disjoint union. | |||||
Another important colimit is the _initial object_ in cubical sets, which is the discrete cubical set $K(0)$. | |||||
- **Cartesian closure** This one merits a little more explanation than a paragraph. Fix a cubical set $X$. To say $\mathbf{cSet}$ is Cartesian closed is to say the functor $- \times X$ ("product with X", called "tensor") has a _right adjoint_ functor $[X, -]$, called "hom" (also read "function from X", at least by me) - That is, $\mathrm{Hom}(A \times X, B) \simeq \mathrm{Hom}(A, [X, B])$. | |||||
We can try to imagine what a would-be $[A, B]$ would be like by fixing a third cubical set $Z$ and seeing that if $[A, B]$ exists, then it must satisfy the equation | |||||
$$ | |||||
\mathrm{Hom}_{\mathbf{cSet}}(Z, [X, Y]) \simeq \mathrm{Hom}_{\mathbf{cSet}}(Z \times X, Y). | |||||
$$ | |||||
This equation holds when $c \in \cube$ and $Z = \yo(c)$, so by the Yoneda lemma we have | |||||
$$\mathrm{Hom}_{\mathbf{cSet}}(y(c) \times X, Y) \simeq \mathrm{Hom}_{\mathbf{cSet}}(\yo(c), [X, Y]) \simeq [X, Y](c)$$ | |||||
By defining an "evaluation" map, $\mathrm{ev} : X \times [X, Y] \to Y$, and showing that for every $f : X \times A \to Y$ there is a $\lambda{}(f) : A \to [X, Y]$, we can prove that $\mathrm{ev}$ is the counit of the tensor-hom adjunction we want in $\mathbf{PSh}(\cube)$, and thus that the definition posed above is indeed the correct definition of $[X, Y]$ for cubical sets. For the details of this construction, check out [the nLab](https://ncatlab.org/nlab/show/closed+monoidal+structure+on+presheaves). | |||||
- And a wealth of other properties, like **local cartesian closure** ("has dependent products"), having a **subobject classifier** (a "type of propositions"), having **power objects** (a generalisation of power sets), among _many_ others. | |||||
Kan Cubical Sets | |||||
---------------- | |||||
The category of cubical sets is pretty neat by itself, but.. it's kinda useless. I'm sure there exist applications of cubical sets by themselves, but I can't think of any. The cubical sets, just like the simplicial sets, come into their own when we consider the subcategory of $\mathbf{cSet}$ (resp. $\mathbf{sSet}$) consisting of the _Kan complexes_. Since the term Kan complex is generally used to mean "Kan simplicial set", we're generally left to use either "Kan cubical set" or "Cubical complex" for the objects of our subcategory. Let's go with the former. | |||||
Fix a cubical set $X$ throughout. We define the boundary of an $n$-cube $x$, $\partial x$, to be the union of all of its faces. This can be pictured diagramatically as below: The faces of $\sigma$ are all of the points and arrows spanning it, and the union of these is $\partial \sigma$. | |||||
<div class=mathpar> | |||||
<figure> | |||||
<img height=200px alt="The same square in a cubical set as before." src="/diagrams/cubicalsets/asquare.svg" /> | |||||
<figcaption>The same ol' square $\sigma$.</figcaption> | |||||
</figure> | |||||
<figure> | |||||
<img height=200px alt="The square, but with its inside (σ) removed." src="/diagrams/cubicalsets/del_asquare.svg" /> | |||||
<figcaption>The boundary of the square $\sigma$.</figcaption> | |||||
</figure> | |||||
</div> | |||||
We still have the same 0-cubes and 1-cubes spanning $\sigma$, but the 2-cube $\sigma$ _itself_ is no longer under consideration. We are principally interested in the boundaries of the standard $n$-cubes, which will be denoted $\partial \square^n$. Considering that boundary, we can define a box _open_ in $\square^n$ as being the subset of $\partial \square^n$ with one (the face in the image of $\delta^\varepsilon_i$) of its $n-1$ faces removed. This we denote by $\sqcap^{n,i,\varepsilon}$. | |||||
Just like in the case of an $n$-cube in $X$, we understand the phrase "$(n,i,\varepsilon)$-open box in $X$" to mean a map $\sqcap^{n,i,\varepsilon} \to X$. Here are diagrams of all the open boxes in the same $\sigma$ as before. | |||||
<figure> | |||||
<img height=200px alt="All possible open boxes of the square σ, which you get by removing one of the faces. In the diagram, the missing face was replaced with a dotted line." src="/diagrams/cubicalsets/open_boxes.svg" /> | |||||
<figcaption>All of the open boxes in $\sigma$.</figcaption> | |||||
</figure> | |||||
A cubical set satisfies the Kan condition if every open box in X can be extended to a cube, or, more formally, if there exists a dotted arrow $g$ which factors the map $f$ through the inclusion from $\sqcap^{n,i}$ into $\square^n$. | |||||
<figure> | |||||
<img height=200px alt="A commutative triangle representing the Kan condition." src="/diagrams/cubicalsets/kan_condition.svg" /> | |||||
<figcaption>The Kan condition on the cubical sets $X$.</figcaption> | |||||
</figure> | |||||
### The Cubical Nerve | |||||
First, recall the definition of a groupoid. A groupoid $\mathcal{G}$ is a category in which for every arrow $f : A \to B$ there exists an arrow $f^{-1}$, such that ${f \circ f^{-1}} = {f^{-1} \circ f} = 1$. That is: a groupoid is a category in which every arrow is invertible. There is a (2-)category of groupoids, $\mathbf{Grpd}$, in which the objects are groupoids and the morphisms are functors (and the 2-morphisms are natural isos). | |||||
We specify a functor $N^{\le 2} : \mathbf{Cat} \to \mathbf{cSet}$, the _truncated nerve_ functor, which assigns to every groupoid a cubical set in which every $n\ge{}3$-cube is degenerate, as follows: | |||||
- The points in $N^{\le 2}(A)$ are the objects in $A$, | |||||
- The lines $f : a_0 \to a_1$ in $N^{\le 2}(A)$ are the arrows $f : a_0 \to a_1$ in $A$; The lines induced by degeneracy maps are the identity arrows. | |||||
- The squares in $N^{\le 2}(A)$ are the squares with corners $a, b, c, d$ spanned by $f : a \to c$, $p : a \to b$, $q : c \to d$, $g : b \to d$, such that $g \circ p = q \circ f$ - that is, the commutative squares with that boundary. | |||||
The degenerate squares in $N^{\le 2}(A)$ are the squares as below, and they exist for every $a, b$, and $f : a \to b$ in $A$: | |||||
<figure> | |||||
<img height=200px alt="Degenerate squares in the truncated cubical nerve of a category." src="/diagrams/cubicalsets/thin_squares.svg" /> | |||||
<figcaption>Thin squares in $N^{\le 2}(A)$</figcaption> | |||||
</figure> | |||||
I claim: If $A$ is a groupoid, then its nerve $N^{\le 2}(A)$ is always Kan. I will not show this with a lot of rigour, but to convince yourself of this fact, deliberate on what it means to fill boundaries of our non-degenerate cubes: the lines and squares. | |||||
* In the case of lines, an open box $\sqcap^{1,0,i}$ is just a point $x_i$; We can extend this to a line $1_{x_i} : x_i \to x_i$, as desired. | |||||
* In the case of squares, an open box $\sqcap^{2,i,\varepsilon}$ is a diagram like the one below, in which all of the corners are objects of $A$ and the lines are maps in $A$. The maps in $A$ are invertible, so if we have $q$, we also have $q^{-1}$ (for instance). | |||||
<figure> | |||||
<img height=200px alt="A particular open box in the truncated cubical nerve of a groupoid." src="/diagrams/cubicalsets/open_box.svg" /> | |||||
<figcaption>A representative example of open boxes in $f$.</figcaption> | |||||
</figure> | |||||
We're looking for the map $f : a \to c$. The strategy to use here is to try to "follow" the source of the missing arrow "around" the edges of the cube, and, if you get stuck, invert the arrow you got stuck on. We take $a$ to $b$ through $p$, then to $d$ through $g$, and now we're stuck. A priori, there's no arrow $d \to c$ we can follow, but since $A$ is a groupoid, we can invert $q$ to get $q^{-1} : d \to c$. Thus the composite $q^{-1} \circ g \circ p$ connects $a$ and $c$, like we wanted. | |||||
Moreover, this diagram must commute, i.e., we must check that $g \circ p = q \circ (q^{-1} \circ g \circ p)$. But this is automatic from the axioms of a category (which say we can ignore the parentheses), and the axioms for a groupoid, which imply that $q \circ q^{-1} \circ f = f$ (for any f). | |||||
We have established that the truncated nerve of a groupoid is Kan. Why _truncated_? Because we only consider **1-**categories in the construction of $N^{\le 2}$, and, as the superscript implies, only have non degenerate cubes for levels 2 and below. We could consider an _untruncated_ $N$ functor from $\infty$-categories to cubical sets; In that case, the nerve of an $\infty$-groupoid is Kan, just like in the 1-categorical case. | |||||
More surprising, the converse implication is also true! If the nerve $N^{\le 2}(A)$ of a category is Kan, then $A$ is a groupoid. Adapting the analogous argument from [Kerodon](https://kerodon.net/tag/0037) about Kan complexes to our Kan cubical sets, we're given an $f : a \to b \in A$, and we build left and right inverses $g, h : b \to a$ to $f$. | |||||
This can be done by defining a pair of partial squares in $N^{\le 2}(A)$, in which the missing faces represent left and right inverses to our map $f \in A$. Here they are: | |||||
<div class=mathpar> | |||||
<figure style="width:48%;"> | |||||
<img height=200px alt="The open box which computes the left inverse of a map f." src="/diagrams/cubicalsets/left_inv.svg" /> | |||||
<figcaption>If this open box had a filler, it would witness in $A$ the equation $g \circ f = 1$.</figcaption> | |||||
</figure> | |||||
<figure style="width:48%;"> | |||||
<img height=200px alt="The open box which computes the right inverse of a map f." src="/diagrams/cubicalsets/right_inv.svg" /> | |||||
<figcaption>If this open box had a filler, it would witness in $A$ the equation $f \circ h = 1$.</figcaption> | |||||
</figure> | |||||
</div> | |||||
By assumption, $N^{\le 2}(A)$ is Kan, which means these open boxes _do_ have fillers, and thus the equations $g \circ f = 1$ and $f \circ h = 1$ hold in $A$. We calculate: $g = 1 \circ g = h \circ f \circ g = h \circ 1 = 1$, leaving implicit the applications of associativity of $\circ$, thus concluding that $g = h$ is an inverse to $f$. | |||||
In either case, we're considering a _globular_ construction (a groupoid) as a _cubical_ construction. This consideration can be interpreted for any "level" of higher structure which you might want: above, we only had 1-cells in our groupoid, but, for example, here's what a two-cell $\alpha$ in a globular $n$-category might look like interpreted cubically: | |||||
<div class=mathpar> | |||||
<figure style="width:48%;"> | |||||
<img height=200px alt="A globular cell in a 2-category is an α with 0-cells a, b and 1-cells f, g as a boundary." src="/diagrams/cubicalsets/globular_2cell.svg" /> | |||||
<figcaption>A globular cell in a 2-category is an $\alpha$ with 0-cells $a, b$ and 1-cells $f, g$ as a boundary.</figcaption> | |||||
</figure> | |||||
<figure style="width:48%;"> | |||||
<img height=200px alt="The same globular cell, stretched into a square." src="/diagrams/cubicalsets/cubical_2cell.svg" /> | |||||
<figcaption>We can interpret it as a cubical 2-cell where two faces are thin.</figcaption> | |||||
</figure> | |||||
</div> | |||||
Similarly, if we take our good old square $\sigma$, we can interpret that as a _globular_ 2-cell, by "stretching" the diagram vertically, inserting degenerate cells where appropriate. | |||||
<div class=mathpar> | |||||
<figure style="width:48%;"> | |||||
<img height=200px alt="The same square you've read about 3 times now." src="/diagrams/cubicalsets/asquare.svg" /> | |||||
<figcaption>Our good friend $\sigma$.</figcaption> | |||||
</figure> | |||||
<figure style="width:48%;"> | |||||
<img height=200px alt="Squishing the square into a globe by composing the adjacent morphisms." src="/diagrams/cubicalsets/aglobe.svg" /> | |||||
<figcaption>We collapse it into a globular $\sigma_\globe$ by composing the composable arrows.</figcaption> | |||||
</figure> | |||||
</div> | |||||
For more details on the connection between thin cubes and globes, in the particular case of double categories and 2-categories, the paper [Double categories, 2-categories, thin structures and connections](http://www.tac.mta.ca/tac/volumes/1999/n7/5-07abs.html) by Brown and Mosa is excellent. It also contains pictorial represntations of the equations which have to hold between faces, degeneracies and connections, which I have entirely neglected! | |||||
### The discrete cubical set $K(A)$ is Kan | |||||
This follows from the name, it's a $K(A)_n$ complex, duh! | |||||
That was a joke, but it was a bad one. As a quick reminder, every $K(A)_n$ is taken to be the set $A$, and all of the restrictions (faces and degeneracies) are the identity on $A$. We want to show that every open box $\sqcap^{n,i,\varepsilon}$ in $K(A)$ has a unique filler. But consider (by way of handwaving) what an open box $\sqcap^{n,i,\varepsilon}$ is: A cube $\square^n \to K(A)$ with "one of its faces removed". | |||||
Any cube $b : \square^n \to K(A)$ is an element of $K(A)_n$ (by the Yoneda lemma), which is the set $A$, by definition. The cube $b$ has *however many* $(n - 1)$-dimensional faces, which we can compute by applying the appropriate face maps. Since the face maps in $K(A)$ are all the identity function, we learn that all of <span class=together>$b$'s</span> faces have to be the *same* element $b \in A$, regarded as $(n-1)$-dimensional cubes. | |||||
By this argument, every open box $o$ of dimension $n$ in $K(A)$ is made up of the same element $b \in A$ in all of its faces. We can extend this box to an $n$-cube which is just the element $b$: It is a complete $n$-cube of $K(A)$, and it has all of the same faces as $o$ where both are defined. | |||||
We can consider the category made up of only those cubical sets which are Kan complexes. This turns out to be a very interesting category! Specifically, I'm talking about the _full subcategory of $\mathbf{cSet}$ on the Kan complexes_, which turns out to be equivalent to the $(\infty,1)$-topos of $\mathbf{\infty{}Grpd}$ of $\infty$-groupoids. [A recent result](https://arxiv.org/abs/1904.07004) by Shulman shows that this category, which I guess can be called $\mathbf{cSet}_{\mathrm{Kan}}$, models... Homotopy Type Theory. | |||||
### Cubical Types | |||||
Aha! That paragraph was a twist, but it wouldn't be a post on this blog if I didn't manage to write in type theory somehow! However, if you went back to the first paragraph, you'd have seen this coming. My interest in Kan cubical sets is _entirely_ due to, well, these two papers, in which a model of MLTT + the univalence axiom (modulo the computation rule for J only holding up to a path): | |||||
- [A Model of Type Theory in Cubical Sets](http://www.cse.chalmers.se/~coquand/mod1.pdf) by Bezem, Coquand and Huber in which they exhibit a model, which was later refined to | |||||
- [Cubical Type Theory: a Constructive Interpretation of the Univalence Axiom](https://arxiv.org/abs/1611.02108) by **C**ohen, **C**oquand, **H**uber and **M**örtberg. | |||||
By _definition_, a type in these theories is a Kan cubical set. A type in a context like $i, j, k : \mathbb{I} \vdash A$ is _roughly_ like the set $A([3])$, if you ignore that their cube category is completely different from the one presented here! They're (roughly) equivalent, though, except for the cube category of CCHM having operations called _reversals_ (which invert one dimension) and special kinds of degeneracies called _connections_. A _connection_ is a degeneracy like in the diagram below, which I am stealing from [my own post about CCHM](/posts/cubical-type-theory.html): | |||||
<blockquote> | |||||
<div class="mathpar"> | |||||
<figure style="width: 48%;"> | |||||
<img src="/diagrams/ctt/land_connection.svg" alt="And connection" style="min-width: 250px;" /> | |||||
<figcaption>The square generated by $\lambda i\ j. p(i \land j)$</figcaption> | |||||
</figure> | |||||
<figure style="width: 48%;"> | |||||
<img src="/diagrams/ctt/lor_connection.svg" alt="Or connection" style="min-width: 250px;" /> | |||||
<figcaption>The square generated by $\lambda i\ j. p(i \lor j)$</figcaption> | |||||
</figure> | |||||
</div> | |||||
</blockquote> | |||||
_Ahem_, please forgive past-me's type theoretic accent. These are, like the normal degeneracies, 2-cubes in which 2 faces are thin (these are the $\lambda i. p\ ik$ faces in the diagram), and the two other faces are the 1-cube we degenerated (the line $p$). Connections are a very natural extension to the theory of Kan cubical sets, since in a sense they say that an $n$-cube is regarded as a degenerate $(n+1)$-cube in all of the possible ways. | |||||
This extra structure of connections turns out to be very important when considering a category of cubical sets as an alternative to the category of simplicial sets, $\mathbf{sSet}$, when doing homotopy theory. This is because cubes without connection are not a _strict test category_, a property which is... complicated to describe. But _very roughly_, it says that the canonical way of mapping between cubical sets and homotopy types does not preserve products. | |||||
The perspective we can get that from this particular application of (Kan) cubical sets is that they provide a systematic way to represent the elements of a type ($X_0$), the equalities between elements of that type ($X_1$), the homotopies between equalities in that type ($X_2$), and so forth. In that sense it's not surprising that Kan cubical sets can be used to (almost) model HoTT! | |||||
Conclusion | |||||
---------- | |||||
I don't know why I write conclusions; These aren't high school essays. However, in this case I do feel compelled to apologise for how technical and disjointed this post was, and how it seems like I needlessly elaborated on things which (to some) might be trivial while not going into enough detail about highly non-trivial things. | |||||
Like I said in the first paragraph, I was writing this to learn more about cubical sets. So, unlike my other posts, which are explaining concepts I already had an understanding of --- for instance, my last proper post was talking about _my implementation_ of cubical type theory, not cubical type theory in general --- this post is explaining something I had a fuzzy understanding of, and touches on some category-theoretical concepts I didn't have the faintest clue about, like the Yoneda embedding. | |||||
Several people had tried to explain the Yoneda embedding to me before, but it had never stuck. It was only when I actually wrote out the definition, worked through its effect on objects and maps, and explored a bit of the structure of the unit interval cubical set. I guess explaining something really is the best way to learn it! | |||||
This was my shortest interval between blog posts maybe.. ever. Don't get used to it! This is the blog post I should've written instead of whatever filler about Axiom J I wrote about last time, but motivation works in mysterious ways when you struggle with depression. In reality, it's not that mysterious --- I'm writing this on the last week of the first academic semester of 2021, which means the deadline anxiety has _finally_ been lifted. God damn, I hate university. | |||||
References | |||||
---------- | |||||
Since this post is a lot more technical than my others, and it's about something I don't know a lot about, I figured I should cite my sources so you can know I'm not spewing complete baloney. I don't know how people cite things in English-speaking countries, and, to be perfectly honest, I've done a terrible job of keeping track of where I got all this stuff, but here are the papers and pages and textbooks I consulted along the way: | |||||
The nLab. Seriously. So many nLab pages. I think these three are the ones I visited most often while writing this post, though: | |||||
- [closed monoidal structure on presheaves](https://ncatlab.org/nlab/show/closed+monoidal+structure+on+presheaves) - Definition of $[X,Y]$ | |||||
- [fundamental groupoid of a cubical set and the cubical nerve of a groupoid](https://ncatlab.org/nlab/show/fundamental+groupoid+of+a+cubical+set+and+the+cubical+nerve+of+a+groupoid#nerve_functor) - Direct definition of $N^{\le 2}$ | |||||
- [cubical set](https://ncatlab.org/nlab/show/cubical+set#in_higher_category_theory) - guess :) | |||||
The following papers: | |||||
- [A Model of Type Theory in Cubical Sets](http://www.cse.chalmers.se/~coquand/mod1.pdf) | |||||
- [Cubical Type Theory: a Constructive Interpretation of the Univalence Axiom](https://arxiv.org/abs/1611.02108) | |||||
- [An Elementary Illustrated Introduction to Simplicial Sets](https://arxiv.org/abs/0809.4221) | |||||
- [Varieties of Cubical Sets](https://www2.mathematik.tu-darmstadt.de/~buchholtz/varieties-of-cubical-sets.pdf) | |||||
- [All $(\infty,1)$-toposes have strict univalent universes](https://arxiv.org/abs/1904.07004) | |||||
The following pages from Kerodon: | |||||
- [The Homotopy Coherent Nerve](https://kerodon.net/tag/00KM) | |||||
- [The Nerve of a Groupoid](https://kerodon.net/tag/0035) |
@ -0,0 +1,884 @@ | |||||
--- | |||||
title: "Parsing Layout, or: Haskell's Syntax is a Mess" | |||||
date: September 3rd, 2021 | |||||
abbreviations: | |||||
sparkles: ✨ | |||||
--- | |||||
Hello! Today we're going to talk about something I'm actually _good_ at, for a change: writing compilers. Specifically, I'm going to demonstrate how to wrangle [Alex] and [Happy] to implement a parser for a simple language with the same indentation sensitive parsing behaviour as Haskell, the _layout rule_. | |||||
[Alex]: https://www.haskell.org/alex/ | |||||
[Happy]: https://www.haskell.org/happy/ | |||||
Alex and Happy are incredibly important parts of the Haskell ecosystem. If you're a Haskeller, you use a program using an Alex lexer and a Happy parser _every single day_ - every single working day, at least - GHC! Despite this fundamental importance, Alex and Happy are... _sparsely_ documented, to say the least. Hopefully this post can serve as an example of how to do something non-trivial using them. | |||||
However! While I'm going to talk about Alex and Happy here, it would be entirely possible to write a layout parser using Alex and whatever flavour of Parsec is popular this week, as long as your combinators are expressed on top of a monad transformer. It's also entirely possible to write a layout parser without Alex at all, but that's beyond my abilities. I am a mere mortal, after all. | |||||
Get ready to read the word "layout" a lot. Layout layout layout. How's your semantic satiation going? Should I say layout a couple more times? | |||||
# The Offside Rule | |||||
So, how does Haskell layout work? A small subset of tokens (`where`, `of`, `let`, `do`[^1]), called _layout keywords_, are followed by a _laid out block_ (my terminology). The happiest (hah) case is where one of these keywords is followed by a `{` token. In this case, layout parsing doesn't happen at all! | |||||
[^1]: GHC extends this set to also contain the "token" `\case`. However, `LambdaCase` isn't a single token! The &sparkles; correct &sparkles; specification is that `case` is a layout keyword if the preceding token is `\`. | |||||
```{.haskell .notag} | |||||
main = do { putStrLn | |||||
"foo" | |||||
; putStrLn "bar" | |||||
; putStrLn "quux" } | |||||
``` | |||||
This _abomination_ is perfectly valid Haskell code, since layout is disabled in a context that was started with a `{`. Great success though, since this is a very simple thing to support in a parser. The unhappy case is when we actually have to do layout parsing. In that case, the starting column of the token immediately following the layout token becomes the _reference column_ (again my terminology), we emit a (virtual) opening brace, and the **offside rule** applies. | |||||
The offside rule says that a player must have at least two opposing players, counting the goalkeep- No no, that's not right. Give me a second. Ah! Yes. The offside rule governs automatic insertion of (virtual) semicolons and closing braces. When we encounter the first token of a new line, we are burdened to compare its starting column with the reference: | |||||
- If it's on the same column as the reference column, we emit a semicolon. This is a new statement/declaration/case. | |||||
<div class=mathpar> | |||||
```haskell | |||||
do foo | |||||
bar | |||||
-- ^ same column, insert ; before. | |||||
``` | |||||
```haskell | |||||
do | |||||
foo | |||||
bar | |||||
-- ^ same column, insert ; before. | |||||
-- yes, three spaces | |||||
``` | |||||
</div> | |||||
The two token streams above have the same prefix as `do { foo; bar }`{.haskell}. | |||||
- If it's further indented than the reference column, we.. do nothing! Just go back to normal lexing. Tokens indented to the right of the reference column are interpreted as continuing the statement in the previous line. That's why you can do this: | |||||
```haskell | |||||
do | |||||
putStrLn $ | |||||
wavy | |||||
function | |||||
application | |||||
please | |||||
don't | |||||
though | |||||
``` | |||||
_All_ of those tokens are (in addition to being the first token in a line) indented further than `putStrLn`, which is our reference column. This block has no semicolons at all! | |||||
- If it's less indented than the reference column, we emit a virtual closing `}` (to end the block) and _**apply the rule again**_. This last bit is crucial: it says a single token can end all of the layout contexts it's leaving. For instance: | |||||
```haskell | |||||
foo = do a -- context 1 | |||||
do b -- context 2 | |||||
do c -- context 3 | |||||
do d -- context 4 | |||||
e | |||||
bar = 123 | |||||
``` | |||||
Assuming there was a layout context at the first column, i.e., we're in a module, then the token `bar` will be responsible for closing 4 whole layout contexts: | |||||
- It's to the left of `d`, so it closes context 4; | |||||
- It's to the left of `c`, so it closes context 3; | |||||
- It's to the left of `b`, so it closes context 2; | |||||
- It's to the left of `a`, so it closes context 1. | |||||
With all the semicolons we have a right to, the code above is this: | |||||
``` haskell | |||||
; foo = do { a -- context 1 | |||||
; do { b -- context 2 | |||||
; do { c -- context 3 | |||||
; do { d -- context 4 | |||||
; e | |||||
} | |||||
} | |||||
} | |||||
} | |||||
; bar = 123 | |||||
``` | |||||
Why do we have semicolons before `foo` and `bar`? Why, because they're in the same column as the reference token, which was presumably an import or something. | |||||
# Laid-out blocks | |||||
With that, the parser productions for laid out blocks should be clear - or, at least, easily approximable. Right? | |||||
Wrong. | |||||
You might think the production for `do` blocks is something like the following, and you'd be forgiven for doing so. It's clean, it's reasonable, it's not _actually_ Happy syntax, but it's a close enough approximation. Except that it's way incorrect! | |||||
``` | |||||
expr | |||||
: ... | |||||
| 'do' '{' statement ';' ... '}' { ... } | |||||
| 'do' VOpen statement VSemi ... VClose { ... } | |||||
``` | |||||
Well, for `do` you might be able to get away with that. But consider the laid-out code on the left, and what the lexer naïvely produces for us on the right. | |||||
<div class=mathpar> | |||||
```haskell | |||||
foo = let x = 1 in x | |||||
``` | |||||
```haskell | |||||
; foo = let { x = 1 in x | |||||
``` | |||||
</div> | |||||
You see it, right? Since no token was on a column before that of the token `x` (the reference token for the layout context started by `let`), no close brace was emitted before `in`. Woe is us! However, the Haskell report has a way around this. They write it cryptically, like this: | |||||
> | |||||
``` | |||||
... | |||||
L (t : ts) (m : ms) = } : (L (t : ts) ms) if m ≠ 0 and parse-error(t) | |||||
... | |||||
``` | |||||
> The side condition `parse-error(t)` is to be interpreted as follows: if the tokens generated so far by `L` together with the next token `t` represent an invalid prefix of the Haskell grammar, and the tokens generated so far by `L` followed by the token `}` represent a valid prefix of the Haskell grammar, then `parse-error(t)` is true. | |||||
> | |||||
> The test `m ≠ 0` checks that an implicitly-added closing brace would match an implicit open brace. | |||||
I'll translate, since I'm fluent in standardese: Parse errors are allowed to terminate layout blocks, as long as no explicit `{` was given. This is the entire reason that Happy has an `error` token, which "matches parse errors"! For further reference, `L` is a function `[Token] -> [Int] -> [Token]`{.haskell} which is responsible for inserting virtual `{`, `;` and `}` tokens. The `[Int]`{.haskell} argument is the stack of reference columns. | |||||
So a better approximation of the grammar is: | |||||
``` | |||||
expr | |||||
: ... | |||||
| 'do' '{' statement ';' ... '}' { ... } | |||||
| 'do' VOpen statement VSemi ... LClose { ... } | |||||
LClose | |||||
: VClose {- lexer inserted '}' -} | |||||
| error {- parse error generated '}' -} | |||||
``` | |||||
We have unfortunately introduced some dragons, since the parser now needs to finesse the lexer state, meaning they must be interleaved _explicitly_, instead of being run in sequence (using a lazy list of tokens or similar). They must be in the same Monad. | |||||
So. How do we implement this? | |||||
# How we implement this | |||||
## Preliminaries | |||||
To start with, we create a new Haskell project. I'd normally gloss over this, but in this case, there are adjustments to the Cabal file that must be made to inform our build of the dependencies on `alex` and `happy`. I use Stack; You can use whatever. | |||||
```bash | |||||
% stack new layout simple | |||||
``` | |||||
To our Cabal file, we add a `build-tool-depends` on Alex and Happy. Cabal (the build system) comes with built-in rules to detect `.x` and `.y` files and compile these as Ale**x** and Happ**y** respectively. | |||||
```{.haskell tag="layout.cabal"} | |||||
build-tool-depends: alex:alex >= 3.2.4 && < 4.0 | |||||
, happy:happy >= 1.19.12 && < 2.0 | |||||
build-depends: base >= 4.7 && < 5 | |||||
, array >= 0.5 && < 0.6 | |||||
``` | |||||
This has been the recommended way of depending on build tools since Cabal 2. The syntax of build-tool-depends entries is `package:executable [version bound]`, where the version bound is optional but good style. With this, running `stack build` (and/or `cabal build`) will automatically compile parser and lexer specifications **listed in your `other-modules` field** to Haskell files. | |||||
Alex generated code has a dependency on the `array` package. | |||||
## What are we parsing | |||||
For the language we're parsing, I've chosen to go with a representative subset of Haskell's grammar: Variables, lambda expressions, `let` expressions, and application. For the top-level, we'll support function definitions, where the lhs must be a sequence of variables, and the rhs can optionally have a `where` clause. | |||||
```{ .haskell tag="src/Syntax.hs" } | |||||
module Syntax (Expr(..), Decl(..), Program) where | |||||
data Expr | |||||
= Var String | |||||
| App Expr Expr | |||||
| Lam String Expr | |||||
| Let [Decl] Expr | |||||
deriving (Eq, Show) | |||||
data Decl | |||||
= Decl { declName :: String | |||||
, declRhs :: Expr | |||||
, declWhere :: Maybe [Decl] | |||||
} | |||||
deriving (Eq, Show) | |||||
type Program = [Decl] | |||||
``` | |||||
For simplicity, identifiers will be ASCII only. We're also using strings and lists everywhere, instead of more appropriate data structures (`Text` and `Seq`), for clarity. Don't forget to add the `Syntax` module to the `other-modules` field in `layout.cabal`. | |||||
## The Lexer | |||||
Before we can parse, we must lex. But before we can lex, we must know the type of tokens. We create a separate Haskell module to contain the definition of the token type and `Lexer` monad. This is mostly done because HIE does not support Alex and Happy, and I've become dependent on HIE for writing correct code fast. | |||||
We'll call this new module `Lexer.Support`, just because. Our type of tokens must contain our keywords, but also punctuation (`=`, `{`, `;`, `}`, `\\`, `->`) and _virtual_ punctuation (tokens inserted by layout). We declare: | |||||
```{.haskell tag="src/Lexer/Support.hs"} | |||||
module Lexer.Support where | |||||
data Token | |||||
= TkIdent String -- identifiers | |||||
-- Keywords | |||||
| TkLet | TkIn | TkWhere | |||||
-- Punctuation | |||||
| TkEqual | TkOpen | TkSemi | TkClose | |||||
| TkLParen | TkRParen | |||||
| TkBackslash | TkArrow | |||||
-- Layout punctuation | |||||
| TkVOpen | TkVSemi | TkVClose | |||||
-- End of file | |||||
| TkEOF | |||||
deriving (Eq, Show) | |||||
``` | |||||
### An Alex file | |||||
Alex modules always start with a Haskell header, between braces. In general, braces in Alex code represent a bit of Haskell we're inserting: The header, lexer actions, and the footer. | |||||
```{.alex tag="src/Lexer.x"} | |||||
{ | |||||
module Lexer where | |||||
import Lexer.Support | |||||
} | |||||
%encoding "latin1" | |||||
``` | |||||
After the header, we can also include magical incantations: `%wrapper` will tell Alex to include a support code template with our lexer, and `%encoding` will tell it whether to work with bytes or with Unicode. _Nobody uses the Unicode support_, not even GHC: The community wisdom is to trick Alex into reading Unicode by compressing Unicode classes down into high byte characters. Yeah, **yikes**. | |||||
Our file can then have some macro definitions. Macros with the `$` sigil are character classes, and `@` macros are complete regular expressions. | |||||
```{.alex tag="src/Lexer.x"} | |||||
$lower = [ a-z ] | |||||
$upper = [ A-Z ] | |||||
@ident = $lower [ $lower $upper _ ' ]* | |||||
``` | |||||
And, finally, comes the actual lexer specification. We include the final magic word `:-` on a line by itself, and then list a bunch of lexing rules. Lexing rules are specified by: | |||||
- A _startcode_, which names a _state_. These are written `<ident>` or `<0>`, where `<0>` is taken to be the "default" startcode. Rules are by default enabled in all states, and can be enabled in many; | |||||
- A _left context_, which is a regular expression matched against the character immediately preceding the token; | |||||
- A _regular expression_, describing the actual token; | |||||
- A _right context_, which can be a regular expression to be matched after the token or a fragment of Haskell code, called a _predicate_. If the predicate is present, it must have the following type: | |||||
```{.haskell .notag} | |||||
{ ... } :: user -- predicate state | |||||
-> AlexInput -- input stream before the token | |||||
-> Int -- length of the token | |||||
-> AlexInput -- input stream after the token | |||||
-> Bool -- True <=> accept the token | |||||
``` | |||||
- An _action_, which can be `;`, causing the lexer to skip the token, or some Haskell code, which can be any expression, as long as every action has the same type. | |||||
Here's a couple rules so we can get started. Don't worry - `emit` is a secret tool that will help us later. | |||||
```{.alex tag="src/Lexer.x"} | |||||
:- | |||||
[\ \t]+ ; | |||||
<0> @ident { emit TkIdent } | |||||
``` | |||||
Alright, let's compile this code and see what we get! Oh, we get some type errors. Okay. Let's see what's up: | |||||
``` | |||||
Not in scope: type constructor or class ‘AlexInput’ | |||||
| | |||||
264 | | AlexLastSkip !AlexInput !Int | |||||
| ^^^^^^^^^ | |||||
``` | |||||
### Making our own wrapper | |||||
Right. That's probably related to that `%wrapper` thing I told you about. You'd be correct: The wrappers solve this problem by including a handful of common patterns pre-made, but we can very well supply our own! The interface to an Alex-generated lexer is documented [here](https://www.haskell.org/alex/doc/html/api.html), but we're interested in §5.1 specifically. We have to provide the following definitions: | |||||
```{.haskell .notag} | |||||
type AlexInput | |||||
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput) | |||||
alexInputPrevChar :: AlexInput -> Char | |||||
``` | |||||
And we get in return a lexing function, whose type and interface I'm not going to copy-paste here. The `alexGetByte` function is called by the lexer whenever it wants input, so that's the natural place to do position handling, which, yes, we have to do ourselves. Let's fill in these definitions in the `Lexer.Support` module. | |||||
Here's an okay choice for `AlexInput`: | |||||
```{.haskell tag="src/Lexer/Support.hs"} | |||||
data AlexInput | |||||
= Input { inpLine :: {-# UNPACK #-} !Int | |||||
, inpColumn :: {-# UNPACK #-} !Int | |||||
, inpLast :: {-# UNPACK #-} !Char | |||||
, inpStream :: String | |||||
} | |||||
deriving (Eq, Show) | |||||
``` | |||||
We can immediately take `alexInputPrevChar = inpLast` as the definition of that function and be done with it, which is fantastic. `alexGetByte`, on the other hand, is a bit more involved, since it needs to update the position based on what character was read. The column _must_ be set properly, otherwise layout won't work! The line counter is less important, though. | |||||
```haskell | |||||
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput) | |||||
alexGetByte inp@Input{inpStream = str} = advance <$> uncons str where | |||||
advance ('\n', rest) = | |||||
( fromIntegral (ord '\n') | |||||
, Input { inpLine = inpLine inp + 1 | |||||
, inpColumn = 1 | |||||
, inpLast = '\n' | |||||
, inpStream = rest } | |||||
) | |||||
advance (c, rest) = | |||||
( fromIntegral (ord c) | |||||
, Input { inpLine = inpLine inp | |||||
, inpColumn = inpColumn inp + 1 | |||||
, inpLast = c | |||||
, inpStream = rest } | |||||
) | |||||
``` | |||||
Now, our lexer has a lot of state. We have the start codes, which form a stack. We have the stack of reference columns, and we have the input. Let's use a State monad to keep track of this, with an `Either String` base to keep track of errors. | |||||
```{.haskell tag="src/Lexer/Support.hs"} | |||||
newtype Lexer a = Lexer { _getLexer :: StateT LexerState (Either String) a } | |||||
deriving | |||||
( Functor | |||||
, Applicative | |||||
, Monad | |||||
, MonadState LexerState | |||||
, MonadError String | |||||
) | |||||
data Layout = ExplicitLayout | LayoutColumn Int | |||||
deriving (Eq, Show, Ord) | |||||
data LexerState | |||||
= LS { lexerInput :: {-# UNPACK #-} !AlexInput | |||||
, lexerStartCodes :: {-# UNPACK #-} !(NonEmpty Int) | |||||
, lexerLayout :: [Layout] | |||||
} | |||||
deriving (Eq, Show) | |||||
initState :: String -> LexerState | |||||
initState str = LS { lexerInput = Input 0 1 '\n' str | |||||
, lexerStartCodes = 0 :| [] | |||||
, lexerLayout = [] | |||||
} | |||||
runLexer :: Lexer a -> String -> Either String a | |||||
runLexer act s = fst <$> runStateT (_getLexer act) (initState s) | |||||
``` | |||||
<details> | |||||
<summary> I'll spare you the boring stack manipulation stuff by putting it in one of these \<details\> elements you can expand: </summary> | |||||
```haskell | |||||
startCode :: Lexer Int | |||||
startCode = gets (NE.head . lexerStartCodes) | |||||
pushStartCode :: Int -> Lexer () | |||||
pushStartCode i = modify' $ \st -> | |||||
st { lexerStartCodes = NE.cons i (lexerStartCodes st ) | |||||
} | |||||
-- If there is no start code to go back to, we go back to the 0 start code. | |||||
popStartCode :: Lexer () | |||||
popStartCode = modify' $ \st -> | |||||
st { lexerStartCodes = | |||||
case lexerStartCodes st of | |||||
_ :| [] -> 0 :| [] | |||||
_ :| (x:xs) -> x :| xs | |||||
} | |||||
layout :: Lexer (Maybe Layout) | |||||
layout = gets (fmap fst . uncons . lexerLayout) | |||||
pushLayout :: Layout -> Lexer () | |||||
pushLayout i = modify' $ \st -> | |||||
st { lexerLayout = i:lexerLayout st } | |||||
popLayout :: Lexer () | |||||
popLayout = modify' $ \st -> | |||||
st { lexerLayout = | |||||
case lexerLayout st of | |||||
_:xs -> xs | |||||
[] -> [] | |||||
} | |||||
``` | |||||
</details> | |||||
### Putting it all together | |||||
It's up to us to specify what an action is - remember, the action is the code block following a lexer rule - so we'll go with `String -> Lexer Token`. The `String` argument is the lexed token, and we'll have to take this slice ourselves when we implement the interface between the Alex lexer and our `Lexer` monad. The `emit` action is simple, and we'll throw in `token` for no extra cost: | |||||
```haskell | |||||
emit :: (String -> Token) -> String -> Lexer Token | |||||
emit = (pure .) | |||||
token :: Token -> String -> Lexer Token | |||||
token = const . pure | |||||
``` | |||||
Back to our `Lexer.x`, we have to write the function to interpret Alex lexer results as `Lexer` monad actions. It goes like this: | |||||
```{.haskell tag="src/Lexer.x, add at the bottom" } | |||||
{ | |||||
handleEOF = do | |||||
-- TODO: handle layout | |||||
pure TkEOF | |||||
scan :: Lexer Token | |||||
scan = do | |||||
input@(Input _ _ _ string) <- gets lexerInput | |||||
startcode <- startCode | |||||
case alexScan input startcode of | |||||
AlexEOF -> handleEOF | |||||
AlexError (Input _ _ _ inp) -> | |||||
throwError $ "Lexical error: " ++ show (head inp) | |||||
AlexSkip input' _ -> do | |||||
modify' $ \s -> s { lexerInput = input' } | |||||
scan | |||||
AlexToken input' tokl action -> do | |||||
modify' $ \s -> s { lexerInput = input' } | |||||
action (take tokl string) | |||||
} | |||||
``` | |||||
Now we can do a `stack build` to compile the lexer and `stack repl` to play around with it! | |||||
```{.haskell tag="Did you know my Myers-Briggs type is GHCI?"} | |||||
λ runLexer scan "abc" | |||||
Right (TkIdent "abc") | |||||
λ runLexer scan " abc" | |||||
Right (TkIdent "abc") | |||||
λ runLexer scan " {" | |||||
Left "Lexical error: '{'" | |||||
``` | |||||
Okay, yeah, let's fill out our lexer a bit more. | |||||
```{.alex tag="src/Lexer.x, lexing rules"} | |||||
<0> in { token TkIn } | |||||
<0> \\ { token TkBackslash } | |||||
<0> "->" { token TkArrow } | |||||
<0> \= { token TkEqual } | |||||
<0> \( { token TkLParen } | |||||
<0> \) { token TkRParen } | |||||
<0> \{ { token TkOpen } | |||||
<0> \} { token TkClose } | |||||
``` | |||||
That's all of the easy rules we can do - All of the others interact with the layout state, which we'll see how to do in the paragraph immediately following this one. I'm writing a bit of padding here so you can take a breather and prepare yourself for the lexer states that we'll deal with now. But, please believe me when I say we're doing this lexer madness so our parser can be sane. | |||||
### Actually Doing Layout (trademark pending) | |||||
We'll need two rules for the layout keywords. Alex rules are matched in order, top-to-bottom, so **make sure your keywords are before your identifier rule**. | |||||
```{.alex tag="src/Lexer.x"} | |||||
<0> let { layoutKw TkLet } | |||||
<0> where { layoutKw TkWhere } | |||||
``` | |||||
And the action for layout keywords, which has to go in the lexer since it'll refer to a startcode. Alex automatically generates definitions for all the startcodes we mention. | |||||
```haskell | |||||
layoutKw t _ = do | |||||
pushStartCode layout | |||||
pure t | |||||
``` | |||||
The interesting rules for handling layout are in the `layout` startcode, which we'll declare as a block to keep things a bit tidier. When in this startcode, we need to handle either an explicitly laid-out block (that is, `{`), or the start of a layout context: The indentation of the next token determines where we start. | |||||
```{.alex tag="src/Lexer.x"} | |||||
<layout> { | |||||
-- Skip comments and whitespace | |||||
"--" .* \n ; | |||||
\n ; | |||||
\{ { openBrace } | |||||
() { startLayout } | |||||
} | |||||
``` | |||||
The `openBrace` and `startLayout` lexer actions are also simple: | |||||
```haskell | |||||
openBrace _ = do | |||||
popStartCode | |||||
pushLayout ExplicitLayout | |||||
pure TkOpen | |||||
startLayout _ = do | |||||
popStartCode | |||||
reference <- Lexer.Support.layout | |||||
col <- gets (inpColumn . lexerInput) | |||||
if Just (LayoutColumn col) <= reference | |||||
then pushStartCode empty_layout | |||||
else pushLayout (LayoutColumn col) | |||||
pure TkVOpen | |||||
``` | |||||
Here's another rule. suppose we have: | |||||
```haskell | |||||
foo = bar where | |||||
spam = ham | |||||
``` | |||||
If we just apply the rule that the next token after a layout keyword determines the column for the layout context, then we're starting another layout context at column 1! that's definitely not what we want. | |||||
The fix: A new layout context only starts if the first token is to the right of the previous layout context. That is: a block only starts if it's on the same column as the layout context, or indented further. | |||||
But! We still need to emit a closing `}` for the one that `openBrace` generated! This is the sole function of the `empty_layout` startcode: | |||||
<div class=mathpar> | |||||
``` | |||||
<empty_layout> () { emptyLayout } | |||||
``` | |||||
```haskell | |||||
emptyLayout _ = do | |||||
popStartCode | |||||
pushStartCode newline | |||||
pure TkVClose | |||||
``` | |||||
</div> | |||||
We're on the home stretch. I mentioned another startcode - `newline`. It's where we do the offside rule, and our lexer will finally be complete. | |||||
### The Offside Rule, again | |||||
The `newline` state is entered in two places: After an empty layout block (as a short-circuit), and after, well, a new line character. Comments also count as newline characters, by the way. | |||||
```{.alex tag="src/Lexer.x, rule"} | |||||
<0> "--" .* \n { \_ -> pushStartCode newline *> scan } | |||||
<0> \n { \_ -> pushStartCode newline *> scan } | |||||
``` | |||||
In the `newline` state, we again scan for a token, and call for an action, just like for `layout`. The difference is only in the action: Whenever _any_ token is encountered, we perform the offside rule, _if_ we're in a layout context that mandates it. | |||||
```{.alex tag="src/Lexer.x, rule"} | |||||
<newline> { | |||||
\n ; | |||||
"--" .* \n ; | |||||
() { offsideRule } | |||||
} | |||||
``` | |||||
The code for the offside rule is a bit hairy, but follows from the spec: | |||||
```{.haskell tag="src/Lexer.x, epilogue code"} | |||||
offsideRule _ = do | |||||
context <- Lexer.Support.layout | |||||
col <- gets (inpColumn . lexerInput) | |||||
let continue = popStartCode *> scan | |||||
case context of | |||||
Just (LayoutColumn col') -> do | |||||
case col `compare` col' of | |||||
EQ -> do | |||||
popStartCode | |||||
pure TkVSemi | |||||
GT -> continue | |||||
LT -> do | |||||
popLayout | |||||
pure TkVClose | |||||
_ -> continue | |||||
``` | |||||
Check out how cleanly those three cases map to the rules I described [way back when](#h0). We `compare`{.haskell} the current column with the reference, and: | |||||
- If it's `EQ`, add a semicolon. | |||||
- If it's `GT`, continue lexing. | |||||
- If it's `LT`, close as many layout contexts as possible. | |||||
<details> | |||||
<summary> | |||||
**Exercise**: In the `handleEOF` action, close all the pending layout contexts. As a hint, the easiest way to emit a token that doesn't is using a startcode and a lexer action. Figuring out when we've run out is part of the challenge :) | |||||
</summary> | |||||
The rule: | |||||
```{.alex tag="src/Lexer.x, rule"} | |||||
<eof> () { doEOF } | |||||
``` | |||||
The action: | |||||
```{.haskell tag="src/Lexer.x, epilogue code"} | |||||
handleEOF = pushStartCode eof *> scan | |||||
doEOF _ = do | |||||
t <- Lexer.Support.layout | |||||
case t of | |||||
Nothing -> do | |||||
popStartCode | |||||
pure TkEOF | |||||
_ -> do | |||||
popLayout | |||||
pure TkVClose | |||||
``` | |||||
</details> | |||||
We can write a `Lexer` action (not a lexer action!) to lex and `Debug.Trace.trace`{.haskell} - sue me - as many tokens as the lexer wants to give us, until an EOF is reached: | |||||
```haskell | |||||
lexAll :: Lexer () | |||||
lexAll = do | |||||
tok <- scan | |||||
case tok of | |||||
TkEOF -> pure () | |||||
x -> do | |||||
traceM (show x) | |||||
lexAll | |||||
``` | |||||
Now we can actually lex some Haskell code! Well, not much of it. Forget numbers, strings, and most keywords, but we _can_ lex this: | |||||
<div class="mathpar"> | |||||
```haskell | |||||
foo = let | |||||
x = let | |||||
y = z | |||||
in y | |||||
in x | |||||
``` | |||||
```haskell | |||||
TkIdent "foo" | |||||
TkEqual | |||||
TkLet | |||||
TkVOpen | |||||
TkIdent "x" | |||||
TkEqual | |||||
TkLet | |||||
TkVOpen | |||||
TkIdent "y" | |||||
TkEqual | |||||
TkIdent "z" | |||||
TkVSemi | |||||
TkIn | |||||
TkIdent "y" | |||||
TkVClose | |||||
TkVClose | |||||
TkIn | |||||
TkIdent "x" | |||||
``` | |||||
</div> | |||||
That is, that code is lexed as if it had been written: | |||||
```{.haskell tag="Hmm..."} | |||||
foo = let { | |||||
x = let { | |||||
y = z | |||||
; in y | |||||
}} in x | |||||
``` | |||||
That's... Yeah. Hmm. That's _not right_. What are we forgetting? Ah, who am I kidding, you've guessed this bit. I even said it myself! | |||||
> Parse errors are allowed to terminate layout blocks. | |||||
We don't have a parser to get errors from, so our layout blocks are terminating too late. Let's write a parser! | |||||
## The Parser | |||||
Happy is, fortunately, less picky about how to generate code. Instead of appealing to some magic symbols that it just hopes really hard are in scope, Happy asks us how we want it to interface with the lexer. We'll do it &sparkles; Monadically &sparkles;, of course. | |||||
Happy files start the same way as Alex files: A Haskell code block, between braces, and some magic words. You can look up what the magic words do in the documentation, or you can guess - I'm just gonna include all the header here: | |||||
```{.happy tag="src/Parser.y"} | |||||
{ | |||||
module Parser where | |||||
import Control.Monad.Error | |||||
import Lexer.Support | |||||
} | |||||
%name parseExpr Expr | |||||
%tokentype { Token } | |||||
%monad { Lexer } | |||||
%lexer { lexer } { TkEOF } | |||||
%errorhandlertype explist | |||||
%error { parseError } | |||||
``` | |||||
After these magic incantations (by the way, if you can't find the docs for errorhandlertype, that's because the docs you're looking at are out of date. See [here](https://monlih.github.io/happy-docs/)), we list our tokens in the `%token` directive. In the braces we write Haskell - not an expression, but a pattern. | |||||
```{.happy tag="src/Parser.y, after the directives"} | |||||
%token | |||||
VAR { TkIdent $$ } | |||||
'let' { TkLet } | |||||
'in' { TkIn } | |||||
'where' { TkWhere } | |||||
'=' { TkEqual } | |||||
'{' { TkOpen } | |||||
';' { TkSemi } | |||||
'}' { TkClose } | |||||
'\\' { TkBackslash } | |||||
'->' { TkArrow } | |||||
'(' { TkLParen } | |||||
')' { TkRParen } | |||||
OPEN { TkVOpen } | |||||
SEMI { TkVSemi } | |||||
CLOSE { TkVClose } | |||||
%% | |||||
``` | |||||
The special `$$` pattern says that if we use a `VAR` token in a production, its value should be the string contained in the token, rather than the token itself. We write productions after the `%%`, and they have this general syntax: | |||||
```happy | |||||
Production :: { Type } | |||||
: rule1 { code1 } | |||||
| rule2 { code2 } | |||||
| ... | |||||
``` | |||||
For starters, we have these productions. You can see that in the code associated with a rule, we can refer to the tokens parsed using `$1, $2, $3, ...`. | |||||
```{.happy tag="src/Parser.y, after the %%"} | |||||
Atom :: { Expr } | |||||
: VAR { Var $1 } | |||||
| '(' Expr ')' { $2 } | |||||
Expr :: { Expr } | |||||
: '\\' VAR '->' Expr { Lam $2 $4 } | |||||
| FuncExpr { $1 } | |||||
FuncExpr :: { Expr } | |||||
: FuncExpr Atom { App $1 $2 } | |||||
| Atom { $1 } | |||||
``` | |||||
In the epilogue, we need to define two functions, since I mentioned them way up there in the directives. The `lexer` function is a continuation-passing style function that needs to call `cont` with the next token from the lexer. The `parseError` function is how we should deal with parser errors. | |||||
```{.happy tag="src/Parser.y, on the very bottom"} | |||||
{ | |||||
lexer cont = scan >>= cont | |||||
parseError = throwError . show | |||||
} | |||||
``` | |||||
By using the `%name` directive we can export a parser production as an action in the `Lexer` monad (since that's what we told Happy to use). Combining that with our `runLexer`, we can parse some expressions, yay! | |||||
```haskell | |||||
λ runLexer parseExpr "(\\x -> x) (\\y -> y)" | |||||
Right (App (Lam "x" (Var "x")) (Lam "y" (Var "y"))) | |||||
``` | |||||
### Laid-out productions | |||||
Now we'll introduce some productions for parsing laid-out lists of declarations, then we'll circle back and finish with the parser for declarations itself. | |||||
```{.happy tag="src/Parser.y, add wherever"} | |||||
DeclBlock :: { [Decl] } | |||||
: '{' DeclListSemi '}' { $2 } | |||||
| OPEN DeclListSEMI Close { $2 } | |||||
DeclListSemi :: { [Decl] } | |||||
: Decl ';' DeclListSemi { $1:$3 } | |||||
| Decl { [$1] } | |||||
| {- empty -} { [] } | |||||
DeclListSEMI :: { [Decl] } | |||||
: Decl SEMI DeclListSemi { $1:$3 } | |||||
| Decl { [$1] } | |||||
| {- empty -} { [] } | |||||
``` | |||||
That is, a block of declarations is either surrounded by `{ ... }` or by `OPEN ... Close`. But what's `Close`? That's right, you've guessed this bit too: | |||||
```{.happy tag="src/Parser.y, add wherever"} | |||||
Close | |||||
: CLOSE { () } | |||||
| error {% popLayout } | |||||
``` | |||||
Say it louder for the folks in the cheap seats - Parse! Errors! Can! End! Layout! Blocks! Isn't that just magical? | |||||
Now we can write a production for `let` (in `Expr`): | |||||
```{.happy tag="src/Parser.y, add to Expr"} | |||||
| 'let' DeclBlock 'in' Expr { Let $2 $4 } | |||||
``` | |||||
And one for declarations: | |||||
```{.happy tag="src/Parser.y, add wherever"} | |||||
Decl | |||||
: VAR '=' Expr { Decl $1 $3 Nothing } | |||||
| VAR '=' Expr 'where' DeclBlock { Decl $1 $3 (Just $5) } | |||||
``` | |||||
Add a name directive for `Decl` and.. | |||||
```{.happy tag="src/Parser.y, add to the directives"} | |||||
%name parseDecl Decl | |||||
``` | |||||
We're done! | |||||
# No, seriously, that's it. | |||||
Yeah, 3000 words is all it takes to implement a parser for Haskell layout. Running this on the example where the lexer dropped the ball from earlier, we can see that the parser has correctly inserted all the missing `}`s in the right place because of the `Close` production, and the AST we get is what we expect: | |||||
```haskell | |||||
λ runLexer parseDecl <$> readFile "that-code-from-before.hs" | |||||
Right | |||||
(Decl { declName = "foo" | |||||
, declRhs = | |||||
Let [ Decl { declName = "x" | |||||
, declRhs = | |||||
Let | |||||
[ Decl { declName = "y", declRhs = Var "z" | |||||
, declWhere = Nothing} ] | |||||
(Var "y") | |||||
, declWhere = Nothing | |||||
} | |||||
] | |||||
(Var "x") | |||||
, declWhere = Nothing | |||||
}) | |||||
``` | |||||
I've thrown the code from this post up in an organised manner on [my Gitea](https://git.amelia.how/amelia/layout-parser/). The lexer worked out to be 130 lines, and the parser - just 81. | |||||
Here's why I favour this approach: | |||||
- It's maintainable. Apart from the rendezvous in `Close`, the lexer and the parser are completely independent. They're also entirely declarative - Reading the lexer rules tells you exactly what the lexer does, without having to drop down to how the actions are implemented. | |||||
- It cleanly extends to supporting ASTs with annotations - you'd change our current `Token`{.haskell} type to a `TokenClass`{.haskell} type, and a `Token` would be finished using the line and column from the lexer state. Annotating the AST with these positions can be done by projecting from `$N` in the Happy rules. | |||||
- It's performant. Obviously the implementation here, using `String`, is not, but by changing how the `AlexInput` type behaves internally, we can optimise by using e.g. a lazy ByteString, a lazy Text, or some other kind of crazy performant stream type. I don't think anyone's ever complained about parsing being their bottleneck with GHC. | |||||
- It's popular! The code implemented here is a simplification (wild simplification) of the approach used in GHC and Agda. | |||||
Thank you for reading this post. I have no idea what I'm going to write about next! |
@ -0,0 +1,420 @@ | |||||
--- | |||||
title: Typing (GHC) Haskell in Haskell | |||||
subtitle: The OutsideIn(X) Elaboration Algorithm | |||||
date: September 5th, 2021 | |||||
public: false | |||||
--- | |||||
Typing Haskell in Haskell, in addition to being a solved problem, is the name of [a paper] by Mark P. Jones that constructs, in detail, a solution to that problem. The goal of that paper is noble: a complete specification of Haskell's type system as an executable Haskell program. And, indeed, in 2000, when that paper was published, it _was_ a complete specification of Haskell's type system, depending on what you mean by Haskell. However, most people do not mean "Haskell 2010" when they say Haskell, let alone Haskell 98 - what the paper implements. Further, it's been 21 years! | |||||
[a paper]: https://web.cecs.pdx.edu/~mpj/thih/thih.pdf | |||||
When I say Haskell, personally, I mean "GHC's default language", and possibly throw in some 20 extensions on top anyway. Here's a small list of conveniences 2021 Haskell programmers are used to, but were implemented in the two decades since _Typing Haskell in Haskell_ was first published - or, in the case of FunDeps, were simply not standardised: | |||||
- Rank-N types, a limited implementation of first-class polymorphism, let a Haskell programmer write `forall`s to the left of as many arrows as she wants. For a motivating example, take the ST monad, from which a value can be extracted using `runST`: | |||||
```haskell | |||||
runST :: (forall s. ST s a) -> a | |||||
``` | |||||
Since the type of the state token - `s` - is universally quantified, it's not "chosen" by the ST computation, but rather by `runST` itself, making sure that the computation can't adversarially "choose" an instantiation of `s` that violates referential transparency. | |||||
Rank-N types were first implemented in GHC in November 2001, in [this commit](https://gitlab.haskell.org/ghc/ghc/-/commit/5e3f005d3012472e422d4ffd7dca5c21a80fca80). | |||||
[rankn]: https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/putting.pdf | |||||
- Generalised algebraic data types (GADTs), which let us introduce local _equational constraints_ between types by means of pattern matching. I'm a big fan of GADTs, so much so that I paid 20 bucks to register the domain [gadt.fans](https://gadt.fans). The classic example of GADTs is a well-typed interpreter, where the type of each constructor constrains the return type of the interpreter: | |||||
```haskell | |||||
data Exp a where | |||||
Add :: Exp Int -> Exp Int -> Exp Int | |||||
IsZ :: Exp Int -> Exp Bool | |||||
If :: Exp Bool -> Exp a -> Exp a -> Exp a | |||||
Lit :: Int -> Exp Int | |||||
eval :: Exp a -> a | |||||
eval (Lit i) = i | |||||
{- most cases omitted for brevity -} | |||||
``` | |||||
GADTs were first implemented in GHC in September 2004, in [this commit](https://gitlab.haskell.org/ghc/ghc/-/commit/23f40f0e9be6d4aa5cf9ea31d73f4013f8e7b4bd). | |||||
- Functional dependencies, inspired by database theory, let a programmer specify that some of the arguments to one of their type classes is entirely determined by the value of some other argument. If that's a bit abstract, a more operational reading is that functional dependencies improve inferred types by adding new equalities. The classic example is this: | |||||
```haskell | |||||
class Collects c e | c -> e where | |||||
singleton :: e -> c | |||||
union :: c -> c -> c | |||||
``` | |||||
Without the functional dependency, the inferred type for the function `bagTwo` below would be `(Collects c e1, Collects c e2) => e1 -> e2 -> c`{.haskell}, implying that bagTwo is capable of placing two values of different types in the same collection `c`. | |||||
```haskell | |||||
bagTwo x y = singleton x `union` singleton y | |||||
``` | |||||
With the functional dependency `c -> e` in place, the two inferered constraints `(Collects c e1, Collects c e2)` _interact_ to introduce an equality `e1 ~ e2`, improving the inferred type of the function to | |||||
```haskell | |||||
bagTwo :: Collects c a => a -> a -> c | |||||
``` | |||||
Functional dependencies were first implemented in GHC in December 1999, in [this commit](https://gitlab.haskell.org/ghc/ghc/-/commit/297f714906efa8a76378c6fa6db3cd592f896749). The connection between database theory and type systems, integral in the design of functional dependencies for Haskell type classes, is made clear in [the original paper](http://web.cecs.pdx.edu/~mpj/pubs/fundeps-esop2000.pdf), section 5. | |||||
- Type families, originally introduced as _associated types_, are, as [Richard Eisenberg put it](https://gitlab.haskell.org/ghc/ghc/-/issues/11080), "non-generative, non-injective symbols whose equational theory is given by GHC". Put another way, they're almost-but-not-quite functions between types. Type families are _weird_, and complicate type checking massively. For instance, consider the following program, taken from Storalek et al.'s "Injective Type Families for Haskell": | |||||
```haskell | |||||
class Manifold a where | |||||
type Base a | |||||
project :: a -> Base a | |||||
unproject :: Base a -> a | |||||
id :: Manifold a => Base a -> Base a | |||||
id = project . unproject | |||||
``` | |||||
Does this program type check? Surprisingly, the answer is no! The reason is that the type variable `a` only appears under type families, and in the set of constraints, so GHC reports the function's type as ambiguous. | |||||
To understand why this is problematic, imagine that we have two types `X`{.haskell} and `Y`{.haskell} such that `Base X = Base Y = [Double]`{.haskell}. Given a `vec :: [Double]`, what instance of `Manifold` should the call `id vec` use? We can't choose - we can only guess, and runtime behaviour that depends on a compiler guess is very much frowned upon! | |||||
Type families were originally implemented ca. 2006, but I've been unable to track down the precise commit. I believe it was done as part of the patch which changed GHC's intermediate representation to System $F_C$ (we'll get to it) - this is backed up by this sentence from the conclusion of the $F_C$ paper: "At the same time, we re-implemented GHC’s support for newtypes and GADTs to work as outlined in §2 and added support for associated (data) types". | |||||
All of these features interact with eachother in entirely non-trivial ways, creating a powerful source of GHC infelicities with $n^2$ magnitude. The interaction between GADTs and type families, for instance, mandates an elaboration algorithm which can cope with _local assumptions_ in a principled way, since GADTs can introduce equalities between existentials which interact with type family axioms non-trivially. Wobbly types just won't cut it. | |||||
That's where $\operatorname{OutsideIn}$ comes in - or, more specifically, $\operatorname{OutsideIn}(X)$, since the elaboration algorithm is parametrised over the constraint domain $X$. This post is intended as a companion to [the JFP paper introducing OutsideIn](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/jfp-outsidein.pdf), not as a replacement. The core idea is that we can record where the local assumptions are introduced in a tree of _implication constraints_, built out of the constraints in our domain $X$, and these can be reduced - outside-in - to an $X$-specific solver. | |||||
Diverging from the paper slightly, I'll implement the elaborator as a _bidirectional_ algorithm, which lets us take advantage of programmer-written type signatures. The signatures are there for a reason! It's silly to use type signatures as a source of complication (infer a type for the binding, then match it against the signature) rather than as a source of _simplification_. Plus - bidirectional type checking makes higher-rank types almost trivial - I think we can all agree that's a good thing, yeah? | |||||
# The Problem Statement | |||||
We're given a Haskell program - well, a program written in a proper subset of a proper superset of Haskell - and we want to tell whether it's type correct. Our superset extends Haskell 2010 to feature type families, GADTs, rank-N types and functional dependencies, but our subset doesn't contain most of Haskell's niceties, like definitions by equations, guards, or even `if`{.haskell}: you get `case`{.haskell}, and _you're going to like it_. | |||||
Well, more than just telling whether or not the program is type correct, we want to produce an _elaborated program_ in a simpler language - GHC calls this "Core" - if and only if the program is correct, and report a (set of) good type errors otherwise. The elaborated program also has to be type correct, and, ideally, we have a _second_, much smaller type checker over the Core language that calls the big, complicated elaborator out on its bullshit. Because of this, the elaborator has to produce _evidence_ justifying its wilder claims. | |||||
There are two kinds of evidence we need to produce: _coercions_ are inserted where the expected type of an expression is equal to its actual type in a non-trivial way. Consider the program below, and its elaboration to the right: | |||||
<div class=mathpar> | |||||
```haskell | |||||
data T1 a where | |||||
TI :: T1 Int | |||||
TB :: T1 Bool | |||||
foo :: T1 a -> a | |||||
foo x = case x of | |||||
TI -> 123 | |||||
TB -> True | |||||
``` | |||||
```haskell | |||||
data T1 a where | |||||
TI :: (a ~# Int) => T1 a | |||||
TB :: (a ~# Bool) => T1 a | |||||
foo :: T1 a -> a | |||||
foo x = case x of | |||||
TI phi -> 123 |> Sym phi | |||||
TB phi -> True |> Sym phi | |||||
``` | |||||
</div> | |||||
This program, which uses GADTs (see `data ... where`{.haskell}), has two non-trivial equalities between types. In the `TI -> 123`{.haskell} case, we used an `Int`{.haskell}[^1] literal where a value of type `a` was expected. But in that branch, `a` is equal to `Int`{.haskell}! In the elaborated output, this non-trivial local equality is explicitly witnessed by a _coercion variable_ `phi :: a ~# Int`{.haskell}, and the use of `123 :: Int`{.haskell} at type `a` has to be mediated by a _cast_. | |||||
[^1]: Actually, if you know how numeric literals desugar, you might know the actual elaboration produced here is different: `123` becomes `fromInteger @a ($NumInt |> Num (Sym phi)) (123 :: Integer)`{.haskell}. This is because it's totally legit to cast the `Num Int`{.haskell} dictionary to a `Num a`{.haskell} dictionary using the local equality, and, since `123`{.haskell} is sugar for `fromInteger @α (123 :: Integer)`{.haskell}, `α` gets solved to `a`, not `Int`{.haskell}. | |||||
The other kind of evidence is not specific to GADTs, type families, or any other type fanciness: _dictionaries_ witness the existence of a type class `instance`{.haskell}, but, unlike coercions (which only exist to make the second type checker happy), exist at runtime. Consider the program below and its elaboration: | |||||
<div class=mathpar> | |||||
```haskell | |||||
class S a where | |||||
s :: a -> String | |||||
instance S Int where | |||||
s = show | |||||
foo :: Int -> String | |||||
foo x = s (x + 123) | |||||
``` | |||||
```haskell | |||||
data $S a = | |||||
$MkS { s :: a -> String } | |||||
$dSInt :: S Int | |||||
$dSInt = $MkS @Int (show @Int $dShowInt) | |||||
foo :: Int -> String | |||||
foo x = s @Int $dSInt ((+) @Int $dNumInt x 123) | |||||
``` | |||||
</div> | |||||
Type `class`{.haskell}es are elaborated to `data`{.haskell} types, and `instance`{.haskell}s are compiled to actual proper values of those data types. When you apply a function with overloaded type - like `s`, `show` and `(+)` - the compiler inserts the value corresponding to the `instance`{.haskell} that was selected to satisfy the class constraint. Further, `instance`{.haskell}s with superclass constraints become functions from dictionaries to dictionaries, and superclasses on `class`{.haskell}es become values embedded in the dictionary, just like class methods. | |||||
You'll also notice another artifact of elaboration here: the use of `s` at type `Int`{.haskell} became a _visible type application_ `s @Int`{.haskell}. This is, again, to satisfy the second type checker, but it can in principle be used as an actual implementation of polymorphism - one that doesn't box. See [Sixten](https://github.com/ollef/sixten) for a language that exploits this type passing to implement polymorphism without monomorphisation. Type applications are used in every polymorphic function application, not just those with class constraints. | |||||
## Why it's hard | |||||
GADTs complicate the problem of type inference in a way that's become rather famous: GADTs destroy the _principal types_ property. Recall: A **principal type** for a function $f$ is a type $\tau$ such that, $\Gamma \vdash f : \tau$ and, if $\Gamma \vdash f : \sigma$, then $\sigma$ is a substitution instance of $\tau$. Using less KaTeX, a principal type for a function is a _most general type_ for a type. For instance, the functions below are annotated with their principal types: | |||||
```haskell | |||||
id :: a -> a | |||||
id x = x | |||||
const :: a -> b -> a | |||||
const x _ = x | |||||
``` | |||||
But now consider this program using GADTs: | |||||
```haskell | |||||
data T a where | |||||
T1 :: Int -> T Bool | |||||
T2 :: T a | |||||
test x y = case x of | |||||
T1 n -> n > 0 | |||||
T2 -> y | |||||
``` | |||||
One can verify - and we will - that the function test types as either `test :: forall a. T a -> Bool -> Bool`{.haskell} or as `forall a. T a -> a -> a`{.haskell}, but neither of these types is an instance of the other! Let's look at why `test` checks with either of those types, with a _lot_ of detail - mimicking by hand the execution of the algorithm. Don't worry about all the terms I'll be throwing around: they'll all be explained later, I promise! | |||||
<details class=blockquote> | |||||
<summary> **`test :: forall a. T a -> Bool -> Bool`{.haskell}** </summary> | |||||
The algorithm is in checking mode, since we have a type signature. | |||||
1. Introduce a binding `x :: T a`{.haskell} into scope. We must check the body of the function against the type `Bool -> Bool`{.haskell} | |||||
2. Introduce a binding `y :: Bool` into scope. We must check the body of the function against the type `Bool`{.haskell}. | |||||
3. Check the `case`{.haskell} expression against the type `Bool`{.haskell}. There are two branches. | |||||
* `T1 n -> n > 0`{.haskell}: | |||||
* Instantiate the type of the constructor `T1 :: forall a. (a ~ Bool) => Int -> T a`{.haskell} with `a := X`{.haskell} to get the type `T1 :: a ~ Bool => Int -> T a`{.haskell}, where `a` is a _skolem_ type variable. The type variable `a` becomes a skolem and not a unification variable because it is an _existential_ of T1. | |||||
* Introduce the local equality assumption `phi :: a ~ Bool`{.haskell} and the variable `n :: Int`{.haskell}. | |||||
* Check that `n > 0 :: Bool`{.haskell}. For brevity, we'll take this to be one atomic step, which succeeds, but the real algorithm must treat all of those subexpressions independently. | |||||
* `T2 -> y`{.haskell}. We must check that `y :: Bool`{.haskell}, which succeeds. | |||||
Since all of these steps succeed (most of them are introducing variables and can't fail) - the program is type-correct. Note that in the branch with a local equality, our assumption that `a ~ Bool`{.haskell} wasn't used. | |||||
</details> | |||||
<details class=blockquote> | |||||
<summary> **`test :: forall a. T a -> a -> a`{.haskell}** </summary> | |||||
The algorithm is in checking mode, since we have a type signature. | |||||
1. Introduce a binding `x :: T a`{.haskell} into scope. We must check the body of the function against the type `a -> a`{.haskell} | |||||
2. Introduce a binding `y :: a` into scope. We must check the body of the function against the type `a`{.haskell}. | |||||
3. Check the `case`{.haskell} expression against the type `Bool`{.haskell}. There are two branches. | |||||
* `T1 n -> n > 0`{.haskell}: | |||||
* Instantiate the type of the constructor `T1 :: forall a. (a ~ Bool) => Int -> T a`{.haskell} with `a := X`{.haskell} to get the type `T1 :: a ~ Bool => Int -> T a`{.haskell}, where `a` is a _skolem_ type variable. The type variable `a` becomes a skolem and not a unification variable because it is an _existential_ of T1. | |||||
* Introduce the local equality assumption `phi :: a ~ Bool`{.haskell} and the variable `n :: Int`{.haskell}. | |||||
* Check that `n > 0 :: a`{.haskell}. We infer that `n > 0 :: Bool`{.haskell}, and we must unify `Bool ~ a`{.haskell}. This unification succeeds because of the given equality `phi :: a ~ Bool`{.haskell}, which we are free to invert. | |||||
* `T2 -> y`{.haskell}. We must check that `y :: a`{.haskell}, which succeeds. | |||||
Since all of these steps succeed (most of them are introducing variables and can't fail) - the program is type-correct. In this typing, compared with the previous, we made use of the assumption `phi :: a ~ Bool` brought into scope by the match against the constructor `T1 n`. | |||||
</details> | |||||
The execution trace for both cases is remarkably similar - the only difference is in that if the function is typed as `T a -> a -> a`{.haskell}, we must make use of the local equality brought into scope to justify that we're allowed to use a value nominally of type `Bool`{.haskell} as one of type `a`. We are free to do this, but it's not obvious if, without a type annotation to guide us, we should. Consider now the following very minor alteration to test: | |||||
```haskell | |||||
test x y = case x of | |||||
T1 n -> n > 0 | |||||
T2 -> not y | |||||
``` | |||||
The only possible type for this program is `T a -> Bool -> Bool`{.haskell}, and so, we can decide without any major complications that the GADT equality should _not_ be used. | |||||
# How To Check Types | |||||
In this section we'll solve the infinitely simpler problem of elaborating a language with rank-N types and type classes - including functional dependencies - but crucially, no GADTs. To do this we'll use a _bidirectional_, _constraint-based_ elaboration algorithm. | |||||
First, bidirectional means that, unlike in a type _inference_ system, type information flows both in and out of the algorithm. Practically speaking, we have two functions to implement the case where type information is an input to the algorithm (`check`{.haskell}) and one where type information is a return from the algorithm (`infer`{.haskell}). | |||||
```haskell | |||||
infer :: Raw.Expr -> Elab (Core.Term, Core.Type) | |||||
check :: Raw.Expr -> Core.Type -> Elab Core.Term | |||||
``` | |||||
If you know how to infer a type for an expression `e` but you need to check it against a known type `wanted_type`, you can do it by unification, whereas if you know how to check an expression `f` against a type but you need to infer a type for it, you can do it by inventing a new _metavariable_ and checking against that[^2]: | |||||
[^2]: In the OutsideIn(X) paper, metavariables are known as _unification variables_. The term _metavariable_ is common in the dependently-typed world, whereas _unification variable_ is more common among Haskell and ML researchers. | |||||
<div class=mathpar> | |||||
```haskell | |||||
check e wanted_type = do | |||||
(elab, actual_type) <- infer x | |||||
unify wanted_type actual_type | |||||
pure elab | |||||
``` | |||||
```haskell | |||||
infer f = do | |||||
ty <- newMeta | |||||
elab <- check f ty | |||||
pure (elab, ty) | |||||
``` | |||||
</div> | |||||
Constraint-based means that, at least conceptually, the algorithm works by first generating constraints by walking through the AST (we do this bidirectionally), and only later solving the generated constraints. But, as a very fruitful optimisation, there are cases where the constraints need not be stashed away for later: If we want to solve a unification problem, for instance, where a metavariable is being compared against a concrete type, and we're free to solve the variable with that type, we might as well do it inline. | |||||
_Elaboration_ is a natural extension of "type checking" in which the program is both checked and transformed into a simpler intermediate representation in the same step. The name "type checker" sort-of implies that the output is a boolean (or, more realistically, a list of errors): this is rarely true in practice, but I still prefer the use of the name "elaborator" to make clear that the output is a different _language_ from the input, and not merely a type-annotated version of the input. | |||||
I'm going to start by talking about the intermediate language we'll elaborate into, _System $F_C$, first. This is because of an assumption I'm making: I'm assuming most of my readers are familiar with Haskell - at least in passing - but not very familiar with GHC's intermediate language. That's why we start there! | |||||
## Our Target Language | |||||
System $F_C$, as the name kind-of sort-of implies, is a superset of System $F$, the _second-order_ lambda calculus. For those not in the loop, System F has all the same features of a normal typed lambda calculus (variables, lambda abstraction, application, algebraic data types, and pattern matching[^3]), but additionally features _first class polymorphism_. Roughly, this means that in System F, a `forall`{.haskell} type can appear everywhere a "normal" type can appear - you could form the type `[forall a. a -> a]`{.haskell} of "lists of identity functions", for instance. | |||||
[^3]: If you disagree with the inclusion of algebraic data types and pattern matching in the list of features of a "normal typed lambda calculus"---there's nothing you can do about it, this is my blog, lol. | |||||
Now, this doesn't mean that first class polymorphism is available to languages that elaborate into System $F_C$ - GHC, for instance, struggled with what they call "impredicative polymorphism" for years, up until very recently. Amulet did a slightly better job because, being a research toy and not a production compiler (that happens to be also be a research toy), there was less code to move around by implementing support for first-class polymorphism. | |||||
Since `forall`{.haskell} is a new type former, it also has a corresponding introduction form and elimination form. The introduction rule says that if you can build a term `e : t` in a context where `a` is a type variable of kind `k`, then the term `Λ (a :: k). e` has type `forall (a :: k). σ`{.haskell}. To stick with ASCII for "control" symbols, I'm going to write `Λ (a :: k)` as `\ @(a :: k)`{.haskell}, omitting `κ` if it is obvious - Also, I'm sticking with Haskell notation, even if `::` should really mean cons. | |||||
Similarly, the elimination rule says that to consume an expression `e :: forall (a :: k). t`{.haskell}, what we need to do is come up with a _type_ `s :: k`{.haskell}. Then we can _instantiate_ (using a different word so as to not overload "apply") `e`{.haskell} at `s`{.haskell} to get a term `e @s :: t[s/a]`{.haskell} - where `t[s/a]` denotes the substitution of `s` for `a` in `t`, avoiding capture. | |||||
Here's a simple Haskell program, and its translation into the notation I'll use for $F_C$. We'll go over it afterwards. | |||||
<div class=mathpar> | |||||
```haskell | |||||
data List a | |||||
= Nil | |||||
| Cons a (List a) | |||||
map :: (a -> b) -> List a -> List a | |||||
-- this line intentionally left blank | |||||
map f (Cons x xs) = Cons (f x) (map f xs) | |||||
map f Nil = Nil | |||||
``` | |||||
```haskell | |||||
data List :: * -> * where | |||||
Nil :: forall a. List a | |||||
Cons :: forall a. a -> List a -> List a | |||||
map :: forall a b. (a -> b) -> List a -> List a | |||||
map @a @b f x = case x of | |||||
Cons x xs -> Cons @b (f x) (map @a @b f xs) | |||||
Nil -> Nil @b | |||||
``` | |||||
</div> | |||||
Let's go over the differences: | |||||
* In Haskell, we allow datatype declarations using the Haskell 98 syntax, but in $F_C$ all data types are given in GADT syntax. Furthermore, `List`{.haskell} was given a kind annotation when it was elaborated - the kind of `List`{.haskell} says it maps ground types to ground types. By "ground type" I mean something that's potentially inhabited, e.g. `Int`{.haskell} or `Void`{.haskell}, but not `Maybe`. | |||||
Where does the kind annotation come from? Well, we know `List` will have a function kind since it has one argument, and we know its return kind will be `*`{.haskell} since all data types are in `*`{.haskell}. That means we kind-check the constructors with `List :: κ -> a`{.haskell} in scope, where `κ` is a fresh metavariable. The type of `Nil`{.haskell} doesn't fix `κ`, but the type of `Cons`{.haskell} - `a` is used on the left of an arrow, so it must have kind `*`. | |||||
* Haskell has definition by equations, but in $F_C$ we simply have type signatures and definitions. We can translate the equations into a case tree using a rather involved - but mechanical - process, and, to avoid that complication, the subset of Haskell our elaborator works will not support equations. It's mostly immaterial to elaboration, anyway. | |||||
* In Haskell, the type signature `map :: (a -> b) -> List a -> List b`{.haskell} is written with implicit binders for the type variables `a` and `b`, so that they're seemingly free. This is not the case, of course, and so in $F_C$ we must write out what `forall`{.haskell}s we mean. This is less relevant in this case, where there are no free type variables in the environment, but specifying `forall`{.haskell}s is essential when we have `ScopedTypeVariables`. | |||||
* Finally, all of the polymorphism implicit in the Haskell version of the program was made explicit in its elaboration into $F_C$. For instance, the type of the `map`{.haskell} function has two `forall`{.haskell}s, so its definition must begin with a corresponding number of `\@`{.haskell}s (which I moved onto the RHS for presentation purposes - don't want lines getting too wide). | |||||
Similarly, the list `Cons`{.haskell}tructors were used as expressions of type `List a` in Haskell, but their $F_C$ types start with a `forall`{.haskell}, meaning we have to instantiate them - `Nil @b`{.haskell}, `Cons @b`{.haskell} - at the return type of the `map` function. | |||||
We represent the language using a data type. Syntax productions in the language become constructors of our data type. For clarity of presentation, I'll use `Text`{.haskell}s for variable names. This is a bad idea, and it'll make a lot of you very angry - for good reason! Dealing with binders is _hard_, and using strings for identifiers is quite possibly the worst solution. It'd be more principled to use de Bruijn indices, or locally nameless, or something. But - that's a needless complication, so, in the interest of clarity, I'll just use strings. | |||||
Since our language contains type applications, we "need" to define types before expressions. Well, this is a Haskell program, so we don't _need_ to - Haskell programs are not lists of definitions, but rather _directed graphs_ of definitions, so that source order doesn't matter - but for clarity, we define the type of types before the type of expressions. | |||||
```haskell | |||||
module Core where | |||||
import qualified Data.Text as T | |||||
import Data.Text (Text) | |||||
data Kind | |||||
= TypeKi | |||||
-- ^ The kind '*' | |||||
| ConstraintKi | |||||
-- ^ The kind 'Constraint' | |||||
| FunKi Kind Kind | |||||
-- ^ κ → κ | |||||
deriving (Eq, Show) | |||||
data Type | |||||
= VarTy Text Kind | |||||
-- ^ Type variables α | |||||
| AppTy Type Type | |||||
-- ^ The type being applied is never a constructor, | |||||
-- always another AppTy or a VarTy. | |||||
| ConTy Text [Type] | |||||
-- ^ Type constructor applied to some arguments. | |||||
| ForAllTy Text Kind Type | |||||
-- ^ Polymorphic types | |||||
| FunTy Type Type | |||||
-- ^ Function types | |||||
deriving (Eq, Show) | |||||
``` | |||||
Throughout the language, variables (resp. type variables) are annotated with the type (resp. kind) with which they are introduced. More, our type of expressions unifies `\ @a`{.haskell} and `\ x`{.haskell}, as well as both application forms, by delegating to `Binder`{.haskell} and `Arg`{.haskell} types. | |||||
```{.haskell style="padding-bottom: 0"} | |||||
data Binder = TypeBinder Text | ExprBinder Text | |||||
deriving (Eq, Show) | |||||
data Arg = TypeArg Type | ExprArg Expr | |||||
deriving (Eq, Show) | |||||
data Expr | |||||
= Var Text Type | |||||
| App Expr Arg | |||||
| Lam Binder Expr | |||||
-- continues | |||||
``` | |||||
For `Let`{.haskell}, we introduce yet another auxiliary type. A `Bind`{.haskell} represents a _binding group_, a group of mutually recursive definitions. Binding groups do not correspond 1:1 with `let`{.haskell}s in Haskell, for instance, the Haskell program on the left is elaborated into the Core expression on the right: | |||||
<div class="mathpar"> | |||||
```haskell | |||||
let quux x = bar (x - 1) | |||||
foo = 1 | |||||
bar x = quux x + foo | |||||
in foo | |||||
``` | |||||
```haskell | |||||
Let (NonRec "foo" (Lit 1)) $ | |||||
Let (Rec [ ("quux", Lam (ExprBinder ...) ... | |||||
, ("bar", Lam (ExprBinder ...) ...) ] $ | |||||
Var "foo" | |||||
``` | |||||
</div> | |||||
As you can probably imagine, the way I arrived at this definition involves.. Graphs. Yes, it's unfortunate, but it's the only way to correctly describe how Haskell declaration blocks - that includes the top level - are type checked. The Haskell report mandates that declaration groups - in the top level, a `let`{.haskell} expression or a `where`{.haskell} clause - should be sorted into strongly connected components, and type-checked in dependency order. Each of these connected components becomes a `Rec`{.haskell} binding! | |||||
We define the auxiliary `Bind`{.haskell} type.. somewhere else, since we still have cases to add to the `Expr`{.haskell}. It's either a connected graph of mutually recursive binders, containing a list of pairs of names and expressions, or a single binder - in which case we unpack the pair. | |||||
```{.haskell style="padding-top: 0; padding-bottom: 0;"} | |||||
-- continued | |||||
| Let [Bind] Expr | |||||
data Bind | |||||
= NonRec Text Expr | |||||
| Rec [(Text, Expr)] | |||||
deriving (Eq, Show) | |||||
-- continues | |||||
``` |
@ -0,0 +1,256 @@ | |||||
--- | |||||
title: The Complete History of isoToEquiv | |||||
date: December 17th, 2021 | |||||
--- | |||||
# What's isoToEquiv? | |||||
It's a standard fact in (higher) category theory and homotopy theory | |||||
that any equivalence of categories (homotopy equivalence) can be | |||||
improved to an _adjoint_ equivalence of categories (strong homotopy | |||||
equivalence). Adjoint equivalences (and strong homotopy equivalences) | |||||
are "more structured" notions in the sense that the data of an adjoint | |||||
equivalence is _contractible if inhabited_. | |||||
In Homotopy Type Theory, the notion of (half) adjoint equivalence lets | |||||
us split the type $A \simeq B$ into _structure_ and _property_: the | |||||
structure is a function $A \to B$, and the property is "being an | |||||
equivalence". This is in contrast to the type of _isomorphisms_ $A \cong | |||||
B$, which are the _structure_ of maps $f : A \to B$ and $g : B \to A$ | |||||
together with homotopies $f \circ g \sim \mathrm{id}$ and $g \circ f | |||||
\sim \mathrm{id}$. Notably, the type of "isomorphism data" for a | |||||
particular function is [not always a proposition], whereas [being an | |||||
equivalence is]. | |||||
[not always a proposition]: https://cubical.1lab.dev/1Lab.Counterexamples.IsIso.html | |||||
[being an equivalence is]: https://cubical.1lab.dev/1Lab.Equiv.html#isProp-isEquiv | |||||
Recently, I've been working on [the 1Lab], an open-source, formalised | |||||
and explorable resource for univalent mathematics. This means that, | |||||
among other things, I had to write an explanation for [the proof that | |||||
isomorphisms can be made into equivalences][1], i.e., the map | |||||
`isoToEquiv`. Our proof comes essentially from [the Cubical Agda | |||||
library], where it is presented with a single comment: | |||||
```agda | |||||
-- Any iso is an equivalence | |||||
``` | |||||
That's helpful. So where does it come from? | |||||
[the 1Lab]: https://cubical.1lab.dev | |||||
[1]: https://cubical.1lab.dev/1Lab.Equiv.html#equivalences-from-isomorphisms | |||||
[the Cubical Agda library]: https://github.com/agda/cubical/blob/22bea9586b67fa90cf90abea04360080d369c68c/Cubical/Foundations/Isomorphism.agda#L53-L112 | |||||
# Where it comes from | |||||
Since _I_ know where I got it from, I knew where to start looking. On | |||||
the Git log of the Cubical Agda library, we find that the Cubical Agda | |||||
version of `isoToEquiv` was added in [October 31, 2018] by Anders | |||||
Mörtberg, with the same handy comment and no attribution. | |||||
It's reasonable to assume, then, that Mörtberg proved it himself right | |||||
then and there. However, the HoTT book, published in 2013, already has a | |||||
proof that any isomorphism data can be "strengthened" into equivalence | |||||
data, so it seems unreasonable to assume that the first time this was | |||||
formalised was 2018. Fortunately, we're not dead in the water. | |||||
[October 31, 2018]: https://github.com/agda/cubical/commit/e139a6f09ea59da2032be8ebaf07e8b5fc8bc0c4 | |||||
The Cubical Agda implementation of `isoToEquiv` comes from [cubicaltt], | |||||
an older, prototype implementation of cubical type theory. Looking at | |||||
_that_ Git history, we find that it was added by.. Mörtberg again! This | |||||
time on [January 4, 2016], over two years prior. Again, there is no | |||||
attribution, and this is the oldest implementation of cubical type | |||||
theory, so it's reasonable to assume that this is where the proof | |||||
originates, right? Wrong. | |||||
[cubicaltt]: https://github.com/mortberg/cubicaltt/blob/a5c6f94bfc0da84e214641e0b87aa9649ea114ea/examples/equiv.ctt#L177-L225 | |||||
[January 4, 2016]: https://github.com/mortberg/cubicaltt/commit/26b70046ce7e45197f14ead82daae7e0354e9945 | |||||
# The name "gradLemma" | |||||
If you look at the commit that originally introduced a proof that `isIso | |||||
f → isEquiv f` to cubicaltt, you'll see that the map _wasn't_ named | |||||
`isoToEquiv`, it was named `gradLemma`. This name is _especially_ | |||||
unhelpful, [quoting Mike Shulman]: | |||||
[quoting Mike Shulman]: https://github.com/mortberg/cubicaltt/issues/72 | |||||
> Generally there are two ways that theorems and lemmas are named in | |||||
> mathematics: descriptively (giving some information about what the | |||||
> theorem says, e.g. "the intermediate value theorem") and attributively | |||||
> (giving credit to whoever proved it, e.g. "Cauchy's theorem"). Whatever | |||||
> your feelings about the relative merits of the two, the name "grad | |||||
> lemma" achieves neither: it conveys no information about what the lemma | |||||
> says, nor does it give any credit to the people it refers to, instead | |||||
> depersonalizing them as "some nameless graduate students". Moreover it | |||||
> is even factually incorrect, since some of the people in question were | |||||
> actually postdocs at the time. | |||||
Shulman is right! The name is depersonalizing, and it does not credit | |||||
the people who originally came up with the proof. So who _are_ they? | |||||
Where does this proof come from? Well, reading the rest of those GitHub | |||||
issues, we learn two things: | |||||
1. Mörtberg did not come up with the proof out of thin air, though as | |||||
far as I can tell he was the first to adapt it to a direct cubical | |||||
argument; | |||||
2. The name "gradLemma" comes from UniMath. | |||||
# UniMath | |||||
UniMath (**uni**valent **math**ematics) is the second oldest library for | |||||
Homotopy Type Theory in a proof assistant, and the oldest actively | |||||
maintained. It was, in fact, originally written by Voevodsky himself! | |||||
Thus, it comes with its own proof of `isoToEquiv`, which we find [in the | |||||
massive file `Foundations/PartA.v`][3]: | |||||
[3]: https://github.com/UniMath/UniMath/blob/f9645aeb354f34f0841cb796e33ccc0a5cba1d67/UniMath/Foundations/PartA.v | |||||
``` | |||||
(** This is kept to preserve compatibility with publications that use the | |||||
name "gradth" for the "grad theorem". *) | |||||
Definition gradth {X Y : UU} (f : X -> Y) (g : Y -> X) | |||||
(egf: ∏ x : X, g (f x) = x) | |||||
(efg: ∏ y : Y, f (g y) = y) : isweq f := isweq_iso f g egf efg. | |||||
``` | |||||
Good to know that at least the theorem isn't _called_ "grad theorem" | |||||
anymore, but there are still many references to `gradth` in the | |||||
codebase. By the way, the name `isweq_iso`? [Changed by Mörtberg]! It's | |||||
him we have to thank for introducing the _useful_ name `isweq_iso`, and | |||||
the corresponding `isoToEquiv`. Thank goodness we don't call it "grad | |||||
theorem" anymore. | |||||
[Changed by Mörtberg]: https://github.com/UniMath/UniMath/pull/848 | |||||
But wait, the name "grad theorem" refers to *grad*uate students.. And | |||||
graduate students are _people_.. So who are these people? Let's keep | |||||
digging. The README to UniMath mentions that it is based on a previous library, [Foundations]: | |||||
[Foundations]: https://github.com/UniMath/Foundations | |||||
> The UniMath project was started in 2014 by merging the repository | |||||
> Foundations, by Vladimir Voevodsky (written in 2010), \[...\] | |||||
I'll cut to the chase: The history of `gradth` ends with foundations; | |||||
It's been there since [the initial commit]. This means that the trail | |||||
has run cold. Voevodsky certainly wasn't a grad student in 2010, he was | |||||
a fields medalist! Mörtberg didn't name the theorem either. And so, I | |||||
kept digging. I started looking for sources other than code: talks, | |||||
papers, theses, etc. | |||||
# An unlikely source | |||||
[the initial commit]: https://github.com/UniMath/Foundations/commit/d56271180c00a8a545c29db06001ae71a910c1b1#diff-ba65cad07cb794c06ab63e0e04dc95d785fee1374791ea974099e355ff20ff7bR433-R439 | |||||
I found a handful of papers ([here] [are] [some]) which refer to this | |||||
result as "the graduate theorem", but none of them mentioned _who_ the | |||||
graduate students are. However, I did find a source, however unlikely. | |||||
[here]: https://arxiv.org/pdf/1809.11168.pdf | |||||
[are]: https://arxiv.org/pdf/1401.0053.pdf | |||||
[some]: https://arxiv.org/pdf/1210.5658.pdf | |||||
In 2011, Mike Shulman wrote [a series of posts] introducing the | |||||
$n$-category café to the ideas of homotopy type theory. In this post, | |||||
Shulman mentions the notion of equivalence following Voevodsky, but he | |||||
also mentions the notion of [half-adjoint equivalence], and mentions | |||||
where it comes from: | |||||
> This other way to define $\mathrm{IsEquiv}$ should be attributed to a | |||||
> handful of people who came up with it a year ago at an informal | |||||
> gathering at CMU, but I don’t know the full list of names; maybe someone | |||||
> else can supply it. | |||||
[a series of posts]: https://golem.ph.utexas.edu/category/2011/03/homotopy_type_theory_ii.html | |||||
[half-adjoint equivalence]: https://cubical.1lab.dev/1Lab.Equiv.HalfAdjoint.html | |||||
Now, as Shulman says, he does not know the full list of names. However, | |||||
Steve Awodey does, [as is stated in a comment]: | |||||
> Let’s see: Mike Shulman, Peter Lumdaine, Michael Warren, Dan Licata – | |||||
> right? I think it’s called “gradlemma” in VV’s coq files (although only | |||||
> 2 of the 4 were actually still grad students at the time). | |||||
[as is stated in a comment]: https://golem.ph.utexas.edu/category/2011/03/homotopy_type_theory_ii.html#c037120 | |||||
However, note the use of "right?" - this comment isn't a primary source | |||||
(Awodey wasn't involved in coming up with the graduate lemma), and it's | |||||
not even certain on top of that. However, you know what would be a | |||||
primary source? | |||||
Someone who was there. | |||||
And you know who follows me on twitter? | |||||
[Dan Licata]. | |||||
[Dan Licata]: https://twitter.com/admitscut/status/1472014521673859072 | |||||
# The Complete History of isoToEquiv | |||||
With this, we have a full picture of the history of isoToEquiv, albeit | |||||
with a handful of details still fuzzy. Here's a timeline: | |||||
* (2010-??-??) Mike Shulman, Peter Lumsdaine, Michael Warren and Dan | |||||
Licata come up the notion of half-adjoint equivalence in homotopy type | |||||
theory, and adapt a standard result from category theory to show that | |||||
any isomorphism improves to a half-adjoint equivalence. **This is the | |||||
origin of `isoToEquiv`**; The "grad" in "grad theorem" refers to Licata | |||||
and Lumsdaine, who were graduate students at the time. | |||||
* (2010-10-04) Vladimir Voevodsky makes the first commit of Foundations, | |||||
where `isoToEquiv` is present - under the name `gradth`. Nowhere in the | |||||
repository, its entire history, or anywhere in Voevodsky's works are the | |||||
grads from the th mentioned. There are no git logs that can trace the | |||||
history of `isoToEquiv` before this point. | |||||
* (2014-03-21) Foundations becomes UniMath, and the name `gradth` is | |||||
kept. This is the first “leap” in the history of `isoToEquiv`, the first | |||||
non-trivial historical path. | |||||
* (2016-01-04) Mörtberg adapts the proof of `gradth` from UniMath to his | |||||
experimental implementation of Cubbical Type Theory, which at the time | |||||
was brand new. There, the result is called `gradLemma`. | |||||
As far as I can tell, this is the origin of the code for `isoToEquiv` | |||||
that can still be found, alive and kicking, in the Cubical library | |||||
(and the 1lab) to this day. I've emailed Mörtberg to set the record | |||||
straight, but I'm writing this at 11:00pm on a Friday, so I still | |||||
haven't heard back. I'll update the post when (and if) he replies. | |||||
* (2017-08-11) Mike Shulman files an issue in the cubicaltt repository | |||||
complaining about the name `gradLemma`. Mörtberg addresses the issue by | |||||
renaming it to `isoToEquiv`. | |||||
* (2018-10-30) Mörtberg ports `isoToEquiv` from cubicaltt to Cubical | |||||
Agda, which is where I stole it from. | |||||
I am in no position to speculate as to _why_ Voevodsky did not credit | |||||
Licata, Lumsdaine, Shulman and Warren with `gradth`, or why he chose a | |||||
name that mentions the existence of graduate students without naming the | |||||
students. It feels like a great misstep in the history of our community | |||||
that such a fundamental result was never properly accredited. While the | |||||
Wikipedia page for Homotopy Type Theory has mentioned Licata, Lumsdaine, | |||||
Shulman and Warren as the authors of the proof [since 2014], _none_ of | |||||
the primary sources I consulted - the Foundations repo, the UniMath | |||||
repo, the cubicaltt repo, the Cubical Agda repo, or _any_ of the | |||||
associated papers - do. | |||||
[since 2014]: https://en.wikipedia.org/w/index.php?title=Homotopy_type_theory&diff=prev&oldid=638938391 | |||||
I'm glad the name `gradth` is no longer in widespread usage (outside of | |||||
UniMath, which has neglected to remove the deprecated name). If we were | |||||
to name it after people, it should be called the | |||||
Licata-Lumsdaine-Shulman-Warren Theorem, and that's unwieldy. Let's | |||||
stick with `isoToEquiv`, but acknowledge where it comes from. | |||||
I know this post isn't what I usually write - I'm not a historian, after | |||||
all - so thanks for reading it. I wanted to chronicle how I spent the | |||||
afternoon and evening of a Friday in December 2021: Chasing the ghost of | |||||
proper attribution. I'm probably not going to write any technical | |||||
content on this blog for a while yet; I might write a short announcement | |||||
of the 1lab, which otherwise takes up all of my spoons. |
@ -0,0 +1,554 @@ | |||||
{-# LANGUAGE BangPatterns #-} | |||||
{-# LANGUAGE MultiWayIf #-} | |||||
{-# LANGUAGE OverloadedStrings #-} | |||||
{-# LANGUAGE BlockArguments #-} | |||||
{-# LANGUAGE LambdaCase #-} | |||||
{-# LANGUAGE StandaloneDeriving #-} | |||||
{-# LANGUAGE DeriveAnyClass #-} | |||||
import Control.DeepSeq (rnf) | |||||
import Control.Concurrent | |||||
import Control.Exception | |||||
import Control.Monad | |||||
import qualified Data.ByteString.Lazy.Char8 as BS | |||||
import Data.ByteString.Lazy.Char8 (pack, unpack) | |||||
import qualified Data.HashMap.Strict as HMap | |||||
import qualified Data.Text.Encoding as T | |||||
import qualified Data.Map.Strict as Map | |||||
import qualified Data.Text as T | |||||
import Data.Functor | |||||
import Data.Monoid | |||||
import Data.Binary | |||||
import Data.Maybe | |||||
import Data.Aeson | |||||
import Data.List | |||||
import Data.Char | |||||
import Hakyll.Core.Compiler.Internal | |||||
import Hakyll.Core.Compiler | |||||
import Hakyll.Web.Sass | |||||
import Hakyll | |||||
import qualified Network.URI.Encode as URI (encode) | |||||
import qualified Skylighting as Sky | |||||
import System.Directory | |||||
import System.Environment | |||||
import System.Process | |||||
import System.Exit | |||||
import System.IO | |||||
import Text.Pandoc.Walk (query, walkM, walk) | |||||
import Text.Pandoc.Highlighting | |||||
import Text.Pandoc.Definition | |||||
import Text.Pandoc.Options | |||||
import Text.Sass.Functions | |||||
import Data.Text (Text) | |||||
import Data.IORef | |||||
import Data.Hashable (Hashable (hashWithSalt)) | |||||
import GHC.Stack | |||||
import Text.Read (readMaybe) | |||||
import GHC.Show (showCommaSpace) | |||||
import Data.Traversable | |||||
import qualified Data.Text.Lazy as LT | |||||
readerOpts :: ReaderOptions | |||||
readerOpts = def { readerExtensions = pandocExtensions | |||||
, readerIndentedCodeClasses = ["amulet"] } | |||||
writerOptions :: Compiler WriterOptions | |||||
writerOptions = do | |||||
syntaxMap <- loadAllSnapshots "syntax/*.xml" "syntax" | |||||
<&> foldr (Sky.addSyntaxDefinition . itemBody) Sky.defaultSyntaxMap | |||||
pure $ defaultHakyllWriterOptions | |||||
{ writerExtensions = extensionsFromList | |||||
[ Ext_tex_math_dollars | |||||
, Ext_tex_math_double_backslash | |||||
, Ext_latex_macros | |||||
] <> writerExtensions defaultHakyllWriterOptions | |||||
, writerSyntaxMap = syntaxMap | |||||
, writerHighlightStyle = Just kate | |||||
} | |||||
rssfeed :: FeedConfiguration | |||||
rssfeed | |||||
= FeedConfiguration { feedTitle = "Amelia's Blag: Latest articles" | |||||
, feedDescription = "" | |||||
, feedAuthorName = "Amélia" | |||||
, feedAuthorEmail = "[email protected]" | |||||
, feedRoot = "https://amelia.how" | |||||
} | |||||
conf :: Configuration | |||||
conf = def { destinationDirectory = ".site" | |||||
, storeDirectory = ".store" | |||||
, tmpDirectory = ".store/tmp" | |||||
, deployCommand = "./sync" } | |||||
katexFilter :: MVar KatexCache -> Pandoc -> Compiler Pandoc | |||||
katexFilter cacheVar (Pandoc meta doc) = | |||||
do | |||||
id <- compilerUnderlying <$> compilerAsk | |||||
t <- getMetadata id | |||||
invalidateCache id (abbrevs t) cacheVar | |||||
doc <- Pandoc meta <$> walkM (go (show id) (abbrevs t)) doc | |||||
unsafeCompiler $ flushCache cacheVar | |||||
pure doc | |||||
where | |||||
abbrevs :: HMap.HashMap Text Value -> [String] | |||||
abbrevs x = | |||||
case HMap.lookup "abbreviations" x of | |||||
Just (Object map) -> concat $ mapMaybe oneAbbrev (HMap.toList map) | |||||
_ -> [] | |||||
oneAbbrev (x, String t) = Just ["-m", '\\':T.unpack x ++ ':':T.unpack t] | |||||
oneAbbrev _ = Nothing | |||||
go :: String -> [String] -> Inline -> Compiler Inline | |||||
go id abbrevs (Math kind math) = unsafeCompiler $ do | |||||
cache <- readMVar cacheVar | |||||
case HMap.lookup (id, kind, math) (spanMap cache) of | |||||
Just x -> pure (RawInline "html" x) | |||||
Nothing -> do | |||||
let args = flip (:) abbrevs $ case kind of { DisplayMath -> "-td"; InlineMath -> "-t" } | |||||
(contents, _) <- readProcessBS "katex" args . BS.fromStrict . T.encodeUtf8 $ math | |||||
let text = T.init . T.init . T.decodeUtf8 . BS.toStrict $ contents | |||||
modifyMVar cacheVar (\m -> pure (bumpCacheEntry cache id abbrevs kind text math, ())) | |||||
pure $ RawInline "html" text | |||||
go id _ x = pure x | |||||
bumpCacheEntry (KatexCache spans depends abbrevVars) id abbrevs kind text math = | |||||
let | |||||
str = T.unpack text | |||||
usedAbbrevs = map (\x -> (T.pack (takeWhile (/= ':') (tail x)), T.pack (tail (dropWhile (/= ':') (tail x))))) | |||||
$ filter (\x -> (takeWhile (/= ':') (tail x)) `isInfixOf` str) | |||||
$ filter (not . isPrefixOf "-") abbrevs | |||||
addDeps [] x = x | |||||
addDeps ((k, _):xs) vl = HMap.alter (\v -> Just (maybe [(id, kind, math)] ((id, kind, math):) v)) (id, k) $ addDeps xs vl | |||||
recordVars [] x = x | |||||
recordVars ((k, v):xs) x = HMap.insert (id, k) v (recordVars xs x) | |||||
in | |||||
case usedAbbrevs of | |||||
[] -> KatexCache (HMap.insert (id, kind, math) text spans) depends abbrevVars | |||||
xs -> KatexCache (HMap.insert (id, kind, math) text spans) (addDeps xs depends) (recordVars xs abbrevVars) | |||||
abbreviationFilter :: Pandoc -> Compiler Pandoc | |||||
abbreviationFilter (Pandoc meta doc) = | |||||
do | |||||
id <- compilerUnderlying <$> compilerAsk | |||||
t <- getMetadata id | |||||
case HMap.lookup "abbreviations" t of | |||||
Just (Object map) -> do | |||||
pure (Pandoc meta (walk (replace map) doc)) | |||||
_ -> pure (Pandoc meta doc) | |||||
where | |||||
replace map x = | |||||
case x of | |||||
Str t | Just (e, r) <- entity t -> fromMaybe (Str t) (toInline r =<< HMap.lookup e map) | |||||
x -> x | |||||
toInline r (String t) = Just (Str (t <> r)) | |||||
toInline _ _ = Nothing | |||||
entity x | |||||
| T.isPrefixOf "&" x && T.length x >= 3 = | |||||
let | |||||
(name, rest') = T.span (/= ';') (T.tail x) | |||||
rest = T.tail rest' | |||||
in pure (name, rest) | |||||
| otherwise = Nothing | |||||
addLanguageTag :: Pandoc -> Pandoc | |||||
addLanguageTag (Pandoc meta blocks) = Pandoc meta (map go blocks) where | |||||
go :: Block -> Block | |||||
go block@(CodeBlock (identifier, classes@(language:classes'), kv) text) = | |||||
Div | |||||
( mempty | |||||
, "code-container":if haskv then "custom-tag":classes' else classes' | |||||
, [] | |||||
) | |||||
$ [block] ++ maybe [Div (mempty, ["code-tag"], []) [Plain [Span (mempty, [], []) [Str tag]]]] | |||||
where | |||||
language' = case T.uncons language of | |||||
Nothing -> mempty | |||||
Just (c, cs) -> T.cons (toUpper c) cs | |||||
tag = fromMaybe language' (lookup "tag" kv) | |||||
haskv = fromMaybe False (True <$ lookup "tag" kv) | |||||
maybe | |||||
| "notag" `elem` classes' = const [] | |||||
| otherwise = id | |||||
go block@(CodeBlock (identifier, [], kv) text) = | |||||
Div (mempty, ["code-container"], []) | |||||
[block, Div (mempty, ["empty-code-tag"], []) []] | |||||
go x = x | |||||
saveSynopsys :: Pandoc -> Compiler Pandoc | |||||
saveSynopsys (Pandoc meta doc) = | |||||
do | |||||
id <- getUnderlying | |||||
n <- fromMaybe (1 :: Int) . readMaybe . fromMaybe "" . lookupString "synopsys" <$> getMetadata id | |||||
case dropWhile (not . isParagraph) doc of | |||||
p:ps -> do | |||||
saveSnapshot "synopsys-block" =<< makeItem (map removeFootnotes (take n (p:ps))) | |||||
pure () | |||||
[] -> pure () | |||||
pure $ Pandoc meta doc | |||||
where | |||||
isParagraph Para{} = True | |||||
isParagraph _ = False | |||||
removeFootnotes (Para xs) = Para $ filter (\case { Note _ -> False; _ -> True }) xs | |||||
removeFootnotes x = x | |||||
saveWordCount :: Pandoc -> Compiler Pandoc | |||||
saveWordCount (Pandoc meta doc) = | |||||
do | |||||
saveSnapshot "wc" =<< makeItem wordCount | |||||
pure $ Pandoc meta doc | |||||
where | |||||
wordCount = show (getSum (query inlineLen doc)) | |||||
inlineLen (Str s) = Sum (length (T.words s)) | |||||
inlineLen _ = mempty | |||||
saveTableOfContents :: Pandoc -> Compiler Pandoc | |||||
saveTableOfContents (Pandoc meta input) = | |||||
do | |||||
saveSnapshot "table-of-contents" =<< makeItem toc | |||||
pure $ Pandoc meta (fixHeaders 0 doc) | |||||
where | |||||
headers = filter (\case { Header _ _ _ -> True; _ -> False }) doc | |||||
doc = fixHeaders 0 input | |||||
fixHeaders n (Header l (_, ms, mt) x:bs) = | |||||
Header l (anchor, ms, mt) (Link (anchor, ms, mt) [] (T.singleton '#' <> anchor, mempty):x):fixHeaders (n + 1) bs where | |||||
anchor = T.pack ("h" ++ show n) | |||||
fixHeaders k (x:bs) = x:fixHeaders k bs | |||||
fixHeaders _ [] = [] | |||||
into :: [Block] -> [[Block]] | |||||
into (Header l m@(anchor, _, _) x:ms) = | |||||
let | |||||
contained (Header l' _ _) = l' > l | |||||
contained _ = undefined | |||||
(ours, rest) = span contained ms | |||||
in [Para [Link (mempty, mempty, mempty) (tail x) (T.singleton '#' <> anchor, mempty)], list (into ours)]:into rest | |||||
into [] = [] | |||||
into _ = undefined | |||||
list = BulletList | |||||
toc :: Block | |||||
toc = list (into headers) | |||||
setup :: IO (MVar KatexCache) | |||||
setup = do | |||||
setEnv "AMC_LIBRARY_PATH" "/usr/lib/amuletml/lib/" | |||||
loadCache | |||||
compiler :: MVar KatexCache -> Compiler (Item String) | |||||
compiler katexCache = do | |||||
wops <- writerOptions | |||||
pandocCompilerWithTransformM readerOpts wops $ | |||||
katexFilter katexCache | |||||
>=> abbreviationFilter | |||||
>=> saveSynopsys | |||||
>=> saveWordCount | |||||
>=> saveTableOfContents | |||||
main :: IO () | |||||
main = setup >>= \katexCache -> hakyllWith conf $ do | |||||
match "static/*" do | |||||
route idRoute | |||||
compile copyFileCompiler | |||||
match "static/**/*" $ do | |||||
route idRoute | |||||
compile copyFileCompiler | |||||
match "css/**/*" $ do | |||||
route idRoute | |||||
compile copyFileCompiler | |||||
match "css/*.css" $ do | |||||
route idRoute | |||||
compile copyFileCompiler | |||||
match "css/*.scss" $ do | |||||
route $ setExtension "css" | |||||
compile $ do | |||||
imports <- unsafeCompiler $ newMVar ([] :: [(String, String)]) | |||||
let add f p = modifyMVar imports (\x -> pure ((f, p):x, [])) | |||||
body <- sassCompilerWith def | |||||
{ sassOutputStyle = SassStyleCompressed | |||||
, sassImporters = Just [ SassImporter 0 add ] | |||||
} | |||||
list <- unsafeCompiler $ takeMVar imports | |||||
for list $ \(req, path) -> do | |||||
load (fromFilePath ("css/" ++ reverse (dropWhile (/= '.') (reverse req)) ++ "scss")) | |||||
:: Compiler (Item String) | |||||
pure body | |||||
match "diagrams/**/*.tex" $ do | |||||
route $ setExtension "svg" | |||||
compile $ getResourceBody | |||||
>>= loadAndApplyTemplate "templates/tikz.tex" (bodyField "body") | |||||
>>= withItemBody (return . pack | |||||
>=> unixFilterLBS "rubber-pipe" ["--pdf"] | |||||
>=> unixFilterLBS "pdftocairo" ["-svg", "-", "-"] | |||||
>=> return . unpack) | |||||
match "pages/posts/*" do | |||||
route $ metadataRoute pathFromTitle | |||||
compile $ do | |||||
wops <- writerOptions | |||||
id <- getUnderlying | |||||
r <- compiler katexCache | |||||
>>= loadAndApplyTemplate "templates/post.html" postCtx | |||||
>>= saveSnapshot "content" | |||||
>>= loadAndApplyTemplate "templates/default.html" postCtx | |||||
>>= relativizeUrls | |||||
loadSnapshot id "synopsys-block" | |||||
>>= saveSnapshot "synopsys-text" | |||||
. writePandocWith wops | |||||
. fmap (Pandoc mempty) | |||||
pure r | |||||
match "pages/posts/*.lhs" $ version "raw" $ do | |||||
route idRoute | |||||
compile copyFileCompiler | |||||
create ["archive.html"] $ do | |||||
route idRoute | |||||
compile $ do | |||||
posts <- recentFirst =<< onlyPublic =<< loadAll ("pages/posts/*" .&&. hasNoVersion) | |||||
let archiveCtx = | |||||
listField "posts" postCtx (return posts) <> | |||||
constField "title" "Archives" <> | |||||
defaultContext | |||||
makeItem "" | |||||
>>= loadAndApplyTemplate "templates/archive.html" archiveCtx | |||||
>>= loadAndApplyTemplate "templates/default.html" archiveCtx | |||||
>>= relativizeUrls | |||||
match "pages/*.html" $ do | |||||
route $ gsubRoute "pages/" (const "") | |||||
compile $ do | |||||
posts <- fmap (take 5) . recentFirst =<< onlyPublic =<< loadAll ("pages/posts/*" .&&. hasNoVersion) | |||||
let indexCtx = | |||||
listField "posts" postCtx (return posts) <> | |||||
constField "title" "Home" <> | |||||
defaultContext | |||||
getResourceBody | |||||
>>= applyAsTemplate indexCtx | |||||
>>= loadAndApplyTemplate "templates/default.html" indexCtx | |||||
>>= relativizeUrls | |||||
match "pages/*.md" $ do | |||||
route $ setExtension "html" <> gsubRoute "pages/" (const "") | |||||
compile $ compiler katexCache | |||||
>>= loadAndApplyTemplate "templates/page.html" defaultContext | |||||
>>= loadAndApplyTemplate "templates/default.html" defaultContext | |||||
>>= relativizeUrls | |||||
match "syntax/*.xml" $ compile $ do | |||||
path <- toFilePath <$> getUnderlying | |||||
contents <- itemBody <$> getResourceBody | |||||
debugCompiler ("Loaded syntax definition from " ++ show path) | |||||
let res = Sky.parseSyntaxDefinitionFromText path (LT.pack contents) | |||||
_ <- saveSnapshot "syntax" =<< case res of | |||||
Left e -> fail e | |||||
Right x -> makeItem x | |||||
makeItem contents | |||||
match "templates/*" $ compile templateBodyCompiler | |||||
create ["feed.xml"] $ do | |||||
route idRoute | |||||
compile $ do | |||||
let feedCtx = postCtx <> bodyField "description" | |||||
posts <- fmap (take 10) . recentFirst =<< onlyPublic =<< loadAllSnapshots ("pages/posts/*" .&&. hasNoVersion) "synopsys-text" | |||||
renderRss rssfeed feedCtx posts | |||||
onlyPublic :: [Item String] -> Compiler [Item String] | |||||
onlyPublic = filterM isPublic where | |||||
isPublic item = do | |||||
t <- getMetadata (itemIdentifier item) | |||||
case HMap.lookup "public" t of | |||||
Just (Bool False) -> pure False | |||||
_ -> pure True | |||||
postCtx :: Context String | |||||
postCtx = | |||||
dateField "date" "%B %e, %Y" | |||||
<> snapshotField "synopsys" "synopsys-text" | |||||
<> snapshotField "words" "wc" | |||||
<> snapshotField' render "toc" "table-of-contents" | |||||
<> defaultContext | |||||
where | |||||
snapshotField = snapshotField' pure | |||||
snapshotField' f key snap = field key $ \x -> do | |||||
let id = itemIdentifier x | |||||
fmap itemBody . f =<< loadSnapshot id snap | |||||
render x = do | |||||
wops <- writerOptions | |||||
pure . writePandocWith wops . fmap (Pandoc mempty . pure) $ x | |||||
readProcessBS :: FilePath -> [String] -> BS.ByteString -> IO (BS.ByteString, String) | |||||
readProcessBS path args input = | |||||
let process = (proc path args) | |||||
{ std_in = CreatePipe | |||||
, std_out = CreatePipe | |||||
, std_err = CreatePipe | |||||
} | |||||
in withCreateProcess process $ \stdin stdout stderr ph -> | |||||
case (stdin, stdout, stderr) of | |||||
(Nothing, _, _) -> fail "Failed to get a stdin handle." | |||||
(_, Nothing, _) -> fail "Failed to get a stdout handle." | |||||
(_, _, Nothing) -> fail "Failed to get a stderr handle." | |||||
(Just stdin, Just stdout, Just stderr) -> do | |||||
out <- BS.hGetContents stdout | |||||
err <- hGetContents stderr | |||||
withForkWait (evaluate $ rnf out) $ \waitOut -> | |||||
withForkWait (evaluate $ rnf err) $ \waitErr -> do | |||||
-- Write input and close. | |||||
BS.hPutStr stdin input | |||||
hClose stdin | |||||
-- wait on the output | |||||
waitOut | |||||
waitErr | |||||
hClose stdout | |||||
hClose stderr | |||||
-- wait on the process | |||||
ex <- waitForProcess ph | |||||
case ex of | |||||
ExitSuccess -> pure (out, err) | |||||
ExitFailure ex -> fail (err ++ "Exited with " ++ show ex) | |||||
where | |||||
withForkWait :: IO () -> (IO () -> IO a) -> IO a | |||||
withForkWait async body = do | |||||
waitVar <- newEmptyMVar :: IO (MVar (Either SomeException ())) | |||||
mask $ \restore -> do | |||||
tid <- forkIO $ try (restore async) >>= putMVar waitVar | |||||
let wait = takeMVar waitVar >>= either throwIO return | |||||
restore (body wait) `onException` killThread tid | |||||
pathFromTitle :: Metadata -> Routes | |||||
pathFromTitle meta = | |||||
let | |||||
declaredCategory = | |||||
case lookupString "category" meta of | |||||
Just s -> ((s ++ "/") ++) | |||||
Nothing -> ("posts/" <>) | |||||
!titleString = | |||||
case lookupString "title" meta of | |||||
Just s -> s | |||||
Nothing -> error "post has no title?" | |||||
title = filter (/= "") . map (filter isAlphaNum . map toLower) . words $ titleString | |||||
(category, title') = | |||||
if | "or" `elem` title -> (declaredCategory, takeWhile (/= "or") title) | |||||
| ["a", "quickie"] `isPrefixOf` title -> (("quick/" ++), drop 2 title) | |||||
| otherwise -> (declaredCategory, title) | |||||
in | |||||
case lookupString "path" meta of | |||||
Just p -> constRoute (category (p <> ".html")) | |||||
Nothing -> constRoute (category (intercalate "-" title' <> ".html")) | |||||
foldMapM :: (Monad w, Monoid m, Foldable f) => (a -> w m) -> f a -> w m | |||||
foldMapM k = foldr (\x y -> do { m <- k x; (m <>) <$> y }) (pure mempty) | |||||
loadCache :: HasCallStack => IO (MVar KatexCache) | |||||
loadCache = do | |||||
t <- doesFileExist ".katex_cache" | |||||
let fixup (a, b, c) = KatexCache (HMap.fromList a) (HMap.fromList b) (HMap.fromList c) | |||||
map <- if t | |||||
then (fixup <$> decodeFile ".katex_cache") `catch` \e -> | |||||
const (print e *> pure (KatexCache mempty mempty mempty)) (e :: SomeException) | |||||
else pure (KatexCache mempty mempty mempty) | |||||
var <- newMVar map | |||||
pure var | |||||
flushCache :: MVar KatexCache -> IO () | |||||
flushCache var = do | |||||
withMVar var $ \(KatexCache x y z) -> do | |||||
Data.Binary.encodeFile ".katex_cache" (HMap.toList x, HMap.toList y, HMap.toList z) | |||||
invalidateCache :: Identifier -> [String] -> MVar KatexCache -> Compiler KatexCache | |||||
invalidateCache id abbrevs cacheVar = unsafeCompiler $ modifyMVar cacheVar (\x -> pure (go x, go x)) where | |||||
currentValues = map (\x -> (T.pack (takeWhile (/= ':') (tail x)), T.pack (tail (dropWhile (/= ':') (tail x))))) | |||||
$ filter (not . isPrefixOf "-") abbrevs | |||||
ident = show id | |||||
go (KatexCache spanMap abbrKeys abbrText) = | |||||
let | |||||
go (abbr, val) (spanMap, abbrKeys, abbrText) = | |||||
case HMap.lookup (ident, abbr) abbrText of | |||||
Just vl | vl /= val -> | |||||
let l = HMap.lookupDefault [] (ident, abbr) abbrKeys | |||||
in (foldr HMap.delete spanMap l, abbrKeys, abbrText) | |||||
_ -> (spanMap, abbrKeys, abbrText) | |||||
(a, b, c) = foldr go (spanMap, abbrKeys, abbrText) currentValues | |||||
in KatexCache a b c | |||||
data KatexCache | |||||
= KatexCache { spanMap :: HMap.HashMap (String, MathType, Text) Text | |||||
, abbreviationKeys :: HMap.HashMap (String, Text) [(String, MathType, Text)] | |||||
, abbreviationText :: HMap.HashMap (String, Text) Text | |||||
} | |||||
instance Hashable MathType where | |||||
hashWithSalt s DisplayMath = hashWithSalt s (0 :: Int) | |||||
hashWithSalt s InlineMath = hashWithSalt s (1 :: Int) | |||||
deriving instance Binary Block | |||||
deriving instance Binary Inline | |||||
deriving instance Binary Format | |||||
deriving instance Binary ListNumberStyle | |||||
deriving instance Binary ListNumberDelim | |||||
deriving instance Binary Caption | |||||
deriving instance Binary Alignment | |||||
deriving instance Binary ColWidth | |||||
deriving instance Binary TableHead | |||||
deriving instance Binary TableBody | |||||
deriving instance Binary TableFoot | |||||
deriving instance Binary QuoteType | |||||
deriving instance Binary Citation | |||||
deriving instance Binary Row | |||||
deriving instance Binary MathType | |||||
deriving instance Binary RowHeadColumns | |||||
deriving instance Binary CitationMode | |||||
deriving instance Binary Cell | |||||
deriving instance Binary RowSpan | |||||
deriving instance Binary ColSpan |
@ -0,0 +1,140 @@ | |||||
{-# LANGUAGE LambdaCase #-} | |||||
module Parser | |||||
( Parser() | |||||
, module X | |||||
, Parser.any | |||||
, satisfy | |||||
, string | |||||
, digit | |||||
, number | |||||
, spaces | |||||
, reserved | |||||
, lexeme | |||||
, (<?>) | |||||
, runParser | |||||
, between ) where | |||||
import Control.Applicative as X | |||||
import Control.Monad as X | |||||
import Data.Char | |||||
newtype Parser a | |||||
= Parser { parse :: String -> Either String (a, String) } | |||||
runParser :: Parser a -> String -> Either String a | |||||
runParser (Parser p) s = fst <$> p s | |||||
(<?>) :: Parser a -> String -> Parser a | |||||
p <?> err = p <|> fail err | |||||
infixl 2 <?> | |||||
instance Functor Parser where | |||||
fn `fmap` (Parser p) = Parser go where | |||||
go st = case p st of | |||||
Left e -> Left e | |||||
Right (res, str') -> Right (fn res, str') | |||||
instance Applicative Parser where | |||||
pure x = Parser $ \str -> Right (x, str) | |||||
(Parser p) <*> (Parser p') = Parser go where | |||||
go st = case p st of | |||||
Left e -> Left e | |||||
Right (fn, st') -> case p' st' of | |||||
Left e' -> Left e' | |||||
Right (v, st'') -> Right (fn v, st'') | |||||
instance Alternative Parser where | |||||
empty = fail "nothing" | |||||
(Parser p) <|> (Parser p') = Parser go where | |||||
go st = case p st of | |||||
Left _ -> p' st | |||||
Right x -> Right x | |||||
instance Monad Parser where | |||||
return = pure | |||||
(Parser p) >>= f = Parser go where | |||||
go s = case p s of | |||||
Left e -> Left e | |||||
Right (x, s') -> parse (f x) s' | |||||
fail m = Parser go where | |||||
go = Left . go' | |||||
go' [] = "expected " ++ m ++ ", got to the end of stream" | |||||
go' (x:xs) = "expected " ++ m ++ ", got '" ++ x:"'" | |||||
any :: Parser Char | |||||
any = Parser go where | |||||
go [] = Left "any: end of file" | |||||
go (x:xs) = Right (x,xs) | |||||
satisfy :: (Char -> Bool) -> Parser Char | |||||
satisfy f = do | |||||
x <- Parser.any | |||||
if f x | |||||
then return x | |||||
else fail "a solution to the function" | |||||
char :: Char -> Parser Char | |||||
char c = satisfy (c ==) <?> "literal " ++ [c] | |||||
oneOf :: String -> Parser Char | |||||
oneOf s = satisfy (`elem` s) <?> "one of '" ++ s ++ "'" | |||||
string :: String -> Parser String | |||||
string [] = return [] | |||||
string (x:xs) = do | |||||
char x | |||||
string xs | |||||
return $ x:xs | |||||
natural :: Parser Integer | |||||
natural = read <$> some (satisfy isDigit) | |||||
lexeme :: Parser a -> Parser a | |||||
lexeme = (<* spaces) | |||||
reserved :: String -> Parser String | |||||
reserved = lexeme . string | |||||
spaces :: Parser String | |||||
spaces = many $ oneOf " \n\r" | |||||
digit :: Parser Char | |||||
digit = satisfy isDigit | |||||
number :: Parser Int | |||||
number = do | |||||
s <- string "-" <|> empty | |||||
cs <- some digit | |||||
return $ read (s ++ cs) | |||||
between :: Parser b -> Parser c -> Parser a -> Parser a | |||||
between o c x = o *> x <* c | |||||
contents :: Parser a -> Parser a | |||||
contents x = spaces *> x <* spaces | |||||
sep :: Parser b -> Parser a -> Parser [a] | |||||
sep s c = sep1 s c <|> return [] | |||||
sep1 :: Parser b -> Parser a -> Parser [a] | |||||
sep1 s c = do | |||||
x <- c | |||||
xs <- many $ s >> c | |||||
return $ x:xs | |||||
option :: a -> Parser a -> Parser a | |||||
option x p = p <|> return x | |||||
optionMaybe :: Parser a -> Parser (Maybe a) | |||||
optionMaybe p = option Nothing $ Just <$> p | |||||
optional :: Parser a -> Parser () | |||||
optional p = void p <|> return () | |||||
eof :: Parser () | |||||
eof = Parser go where | |||||
go (x:_) = Left $ "expected eof, got '" ++ x:"'" | |||||
go [] = Right ((), []) |
@ -0,0 +1,239 @@ | |||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> | |||||
<html> | |||||
<head> | |||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8"> | |||||
<title>~/Projects/blag/static/Parser.hs.html</title> | |||||
<meta name="Generator" content="Vim/7.4"> | |||||
<meta name="plugin-version" content="vim7.4_v2"> | |||||
<meta name="syntax" content="haskell"> | |||||
<meta name="settings" content="use_css,pre_wrap,no_foldcolumn,expand_tabs,prevent_copy=t"> | |||||
<meta name="colorscheme" content="onedark"> | |||||
<style type="text/css"> | |||||
<!-- | |||||
pre { white-space: pre-wrap; font-family: 'Iosevka', monospace; color: #ABB2BF; background-color: #282C34; } | |||||
body { font-family: 'Iosevka', monospace; color: #ABB2BF; background-color: #282C34; } | |||||
* { font-size: 1em; } | |||||
input { border: none; margin: 0; padding: 0; font-family: 'Iosevka', monospace; } | |||||
input[size='1'] { width: 1em; width: 1ch; } | |||||
input[size='2'] { width: 2em; width: 2ch; } | |||||
input[size='3'] { width: 3em; width: 3ch; } | |||||
input[size='4'] { width: 4em; width: 4ch; } | |||||
input[size='5'] { width: 5em; width: 5ch; } | |||||
input[size='6'] { width: 6em; width: 6ch; } | |||||
input[size='7'] { width: 7em; width: 7ch; } | |||||
input[size='8'] { width: 8em; width: 8ch; } | |||||
input[size='9'] { width: 9em; width: 9ch; } | |||||
input[size='10'] { width: 10em; width: 10ch; } | |||||
input[size='11'] { width: 11em; width: 11ch; } | |||||
input[size='12'] { width: 12em; width: 12ch; } | |||||
input[size='13'] { width: 13em; width: 13ch; } | |||||
input[size='14'] { width: 14em; width: 14ch; } | |||||
input[size='15'] { width: 15em; width: 15ch; } | |||||
input[size='16'] { width: 16em; width: 16ch; } | |||||
input[size='17'] { width: 17em; width: 17ch; } | |||||
input[size='18'] { width: 18em; width: 18ch; } | |||||
input[size='19'] { width: 19em; width: 19ch; } | |||||
input[size='20'] { width: 20em; width: 20ch; } | |||||
#oneCharWidth, #oneEmWidth, #oneInputWidth { padding: 0; margin: 0; position: absolute; left: -999999px; visibility: hidden; } | |||||
.em5 input[size='1'] { width: 0.05em; } .em5 input[size='2'] { width: 0.1em; } .em5 input[size='3'] { width: 0.15em; } .em5 input[size='4'] { width: 0.2em; } .em5 input[size='5'] { width: 0.25em; } .em5 input[size='6'] { width: 0.3em; } .em5 input[size='7'] { width: 0.35em; } .em5 input[size='8'] { width: 0.4em; } .em5 input[size='9'] { width: 0.45em; } .em5 input[size='10'] { width: 0.5em; } .em5 input[size='11'] { width: 0.55em; } .em5 input[size='12'] { width: 0.6em; } .em5 input[size='13'] { width: 0.65em; } .em5 input[size='14'] { width: 0.7em; } .em5 input[size='15'] { width: 0.75em; } .em5 input[size='16'] { width: 0.8em; } .em5 input[size='17'] { width: 0.85em; } .em5 input[size='18'] { width: 0.9em; } .em5 input[size='19'] { width: 0.95em; } .em5 input[size='20'] { width: 1.0em; } | |||||
.em10 input[size='1'] { width: 0.1em; } .em10 input[size='2'] { width: 0.2em; } .em10 input[size='3'] { width: 0.3em; } .em10 input[size='4'] { width: 0.4em; } .em10 input[size='5'] { width: 0.5em; } .em10 input[size='6'] { width: 0.6em; } .em10 input[size='7'] { width: 0.7em; } .em10 input[size='8'] { width: 0.8em; } .em10 input[size='9'] { width: 0.9em; } .em10 input[size='10'] { width: 1.0em; } .em10 input[size='11'] { width: 1.1em; } .em10 input[size='12'] { width: 1.2em; } .em10 input[size='13'] { width: 1.3em; } .em10 input[size='14'] { width: 1.4em; } .em10 input[size='15'] { width: 1.5em; } .em10 input[size='16'] { width: 1.6em; } .em10 input[size='17'] { width: 1.7em; } .em10 input[size='18'] { width: 1.8em; } .em10 input[size='19'] { width: 1.9em; } .em10 input[size='20'] { width: 2.0em; } | |||||
.em15 input[size='1'] { width: 0.15em; } .em15 input[size='2'] { width: 0.3em; } .em15 input[size='3'] { width: 0.45em; } .em15 input[size='4'] { width: 0.6em; } .em15 input[size='5'] { width: 0.75em; } .em15 input[size='6'] { width: 0.9em; } .em15 input[size='7'] { width: 1.05em; } .em15 input[size='8'] { width: 1.2em; } .em15 input[size='9'] { width: 1.35em; } .em15 input[size='10'] { width: 1.5em; } .em15 input[size='11'] { width: 1.65em; } .em15 input[size='12'] { width: 1.8em; } .em15 input[size='13'] { width: 1.95em; } .em15 input[size='14'] { width: 2.1em; } .em15 input[size='15'] { width: 2.25em; } .em15 input[size='16'] { width: 2.4em; } .em15 input[size='17'] { width: 2.55em; } .em15 input[size='18'] { width: 2.7em; } .em15 input[size='19'] { width: 2.85em; } .em15 input[size='20'] { width: 3.0em; } | |||||
.em20 input[size='1'] { width: 0.2em; } .em20 input[size='2'] { width: 0.4em; } .em20 input[size='3'] { width: 0.6em; } .em20 input[size='4'] { width: 0.8em; } .em20 input[size='5'] { width: 1.0em; } .em20 input[size='6'] { width: 1.2em; } .em20 input[size='7'] { width: 1.4em; } .em20 input[size='8'] { width: 1.6em; } .em20 input[size='9'] { width: 1.8em; } .em20 input[size='10'] { width: 2.0em; } .em20 input[size='11'] { width: 2.2em; } .em20 input[size='12'] { width: 2.4em; } .em20 input[size='13'] { width: 2.6em; } .em20 input[size='14'] { width: 2.8em; } .em20 input[size='15'] { width: 3.0em; } .em20 input[size='16'] { width: 3.2em; } .em20 input[size='17'] { width: 3.4em; } .em20 input[size='18'] { width: 3.6em; } .em20 input[size='19'] { width: 3.8em; } .em20 input[size='20'] { width: 4.0em; } | |||||
.em25 input[size='1'] { width: 0.25em; } .em25 input[size='2'] { width: 0.5em; } .em25 input[size='3'] { width: 0.75em; } .em25 input[size='4'] { width: 1.0em; } .em25 input[size='5'] { width: 1.25em; } .em25 input[size='6'] { width: 1.5em; } .em25 input[size='7'] { width: 1.75em; } .em25 input[size='8'] { width: 2.0em; } .em25 input[size='9'] { width: 2.25em; } .em25 input[size='10'] { width: 2.5em; } .em25 input[size='11'] { width: 2.75em; } .em25 input[size='12'] { width: 3.0em; } .em25 input[size='13'] { width: 3.25em; } .em25 input[size='14'] { width: 3.5em; } .em25 input[size='15'] { width: 3.75em; } .em25 input[size='16'] { width: 4.0em; } .em25 input[size='17'] { width: 4.25em; } .em25 input[size='18'] { width: 4.5em; } .em25 input[size='19'] { width: 4.75em; } .em25 input[size='20'] { width: 5.0em; } | |||||
.em30 input[size='1'] { width: 0.3em; } .em30 input[size='2'] { width: 0.6em; } .em30 input[size='3'] { width: 0.9em; } .em30 input[size='4'] { width: 1.2em; } .em30 input[size='5'] { width: 1.5em; } .em30 input[size='6'] { width: 1.8em; } .em30 input[size='7'] { width: 2.1em; } .em30 input[size='8'] { width: 2.4em; } .em30 input[size='9'] { width: 2.7em; } .em30 input[size='10'] { width: 3.0em; } .em30 input[size='11'] { width: 3.3em; } .em30 input[size='12'] { width: 3.6em; } .em30 input[size='13'] { width: 3.9em; } .em30 input[size='14'] { width: 4.2em; } .em30 input[size='15'] { width: 4.5em; } .em30 input[size='16'] { width: 4.8em; } .em30 input[size='17'] { width: 5.1em; } .em30 input[size='18'] { width: 5.4em; } .em30 input[size='19'] { width: 5.7em; } .em30 input[size='20'] { width: 6.0em; } | |||||
.em35 input[size='1'] { width: 0.35em; } .em35 input[size='2'] { width: 0.7em; } .em35 input[size='3'] { width: 1.05em; } .em35 input[size='4'] { width: 1.4em; } .em35 input[size='5'] { width: 1.75em; } .em35 input[size='6'] { width: 2.1em; } .em35 input[size='7'] { width: 2.45em; } .em35 input[size='8'] { width: 2.8em; } .em35 input[size='9'] { width: 3.15em; } .em35 input[size='10'] { width: 3.5em; } .em35 input[size='11'] { width: 3.85em; } .em35 input[size='12'] { width: 4.2em; } .em35 input[size='13'] { width: 4.55em; } .em35 input[size='14'] { width: 4.9em; } .em35 input[size='15'] { width: 5.25em; } .em35 input[size='16'] { width: 5.6em; } .em35 input[size='17'] { width: 5.95em; } .em35 input[size='18'] { width: 6.3em; } .em35 input[size='19'] { width: 6.65em; } .em35 input[size='20'] { width: 7.0em; } | |||||
.em40 input[size='1'] { width: 0.4em; } .em40 input[size='2'] { width: 0.8em; } .em40 input[size='3'] { width: 1.2em; } .em40 input[size='4'] { width: 1.6em; } .em40 input[size='5'] { width: 2.0em; } .em40 input[size='6'] { width: 2.4em; } .em40 input[size='7'] { width: 2.8em; } .em40 input[size='8'] { width: 3.2em; } .em40 input[size='9'] { width: 3.6em; } .em40 input[size='10'] { width: 4.0em; } .em40 input[size='11'] { width: 4.4em; } .em40 input[size='12'] { width: 4.8em; } .em40 input[size='13'] { width: 5.2em; } .em40 input[size='14'] { width: 5.6em; } .em40 input[size='15'] { width: 6.0em; } .em40 input[size='16'] { width: 6.4em; } .em40 input[size='17'] { width: 6.8em; } .em40 input[size='18'] { width: 7.2em; } .em40 input[size='19'] { width: 7.6em; } .em40 input[size='20'] { width: 8.0em; } | |||||
.em45 input[size='1'] { width: 0.45em; } .em45 input[size='2'] { width: 0.9em; } .em45 input[size='3'] { width: 1.35em; } .em45 input[size='4'] { width: 1.8em; } .em45 input[size='5'] { width: 2.25em; } .em45 input[size='6'] { width: 2.7em; } .em45 input[size='7'] { width: 3.15em; } .em45 input[size='8'] { width: 3.6em; } .em45 input[size='9'] { width: 4.05em; } .em45 input[size='10'] { width: 4.5em; } .em45 input[size='11'] { width: 4.95em; } .em45 input[size='12'] { width: 5.4em; } .em45 input[size='13'] { width: 5.85em; } .em45 input[size='14'] { width: 6.3em; } .em45 input[size='15'] { width: 6.75em; } .em45 input[size='16'] { width: 7.2em; } .em45 input[size='17'] { width: 7.65em; } .em45 input[size='18'] { width: 8.1em; } .em45 input[size='19'] { width: 8.55em; } .em45 input[size='20'] { width: 9.0em; } | |||||
.em50 input[size='1'] { width: 0.5em; } .em50 input[size='2'] { width: 1.0em; } .em50 input[size='3'] { width: 1.5em; } .em50 input[size='4'] { width: 2.0em; } .em50 input[size='5'] { width: 2.5em; } .em50 input[size='6'] { width: 3.0em; } .em50 input[size='7'] { width: 3.5em; } .em50 input[size='8'] { width: 4.0em; } .em50 input[size='9'] { width: 4.5em; } .em50 input[size='10'] { width: 5.0em; } .em50 input[size='11'] { width: 5.5em; } .em50 input[size='12'] { width: 6.0em; } .em50 input[size='13'] { width: 6.5em; } .em50 input[size='14'] { width: 7.0em; } .em50 input[size='15'] { width: 7.5em; } .em50 input[size='16'] { width: 8.0em; } .em50 input[size='17'] { width: 8.5em; } .em50 input[size='18'] { width: 9.0em; } .em50 input[size='19'] { width: 9.5em; } .em50 input[size='20'] { width: 10.0em; } | |||||
.em55 input[size='1'] { width: 0.55em; } .em55 input[size='2'] { width: 1.1em; } .em55 input[size='3'] { width: 1.65em; } .em55 input[size='4'] { width: 2.2em; } .em55 input[size='5'] { width: 2.75em; } .em55 input[size='6'] { width: 3.3em; } .em55 input[size='7'] { width: 3.85em; } .em55 input[size='8'] { width: 4.4em; } .em55 input[size='9'] { width: 4.95em; } .em55 input[size='10'] { width: 5.5em; } .em55 input[size='11'] { width: 6.05em; } .em55 input[size='12'] { width: 6.6em; } .em55 input[size='13'] { width: 7.15em; } .em55 input[size='14'] { width: 7.7em; } .em55 input[size='15'] { width: 8.25em; } .em55 input[size='16'] { width: 8.8em; } .em55 input[size='17'] { width: 9.35em; } .em55 input[size='18'] { width: 9.9em; } .em55 input[size='19'] { width: 10.45em; } .em55 input[size='20'] { width: 11.0em; } | |||||
.em60 input[size='1'] { width: 0.6em; } .em60 input[size='2'] { width: 1.2em; } .em60 input[size='3'] { width: 1.8em; } .em60 input[size='4'] { width: 2.4em; } .em60 input[size='5'] { width: 3.0em; } .em60 input[size='6'] { width: 3.6em; } .em60 input[size='7'] { width: 4.2em; } .em60 input[size='8'] { width: 4.8em; } .em60 input[size='9'] { width: 5.4em; } .em60 input[size='10'] { width: 6.0em; } .em60 input[size='11'] { width: 6.6em; } .em60 input[size='12'] { width: 7.2em; } .em60 input[size='13'] { width: 7.8em; } .em60 input[size='14'] { width: 8.4em; } .em60 input[size='15'] { width: 9.0em; } .em60 input[size='16'] { width: 9.6em; } .em60 input[size='17'] { width: 10.2em; } .em60 input[size='18'] { width: 10.8em; } .em60 input[size='19'] { width: 11.4em; } .em60 input[size='20'] { width: 12.0em; } | |||||
.em65 input[size='1'] { width: 0.65em; } .em65 input[size='2'] { width: 1.3em; } .em65 input[size='3'] { width: 1.95em; } .em65 input[size='4'] { width: 2.6em; } .em65 input[size='5'] { width: 3.25em; } .em65 input[size='6'] { width: 3.9em; } .em65 input[size='7'] { width: 4.55em; } .em65 input[size='8'] { width: 5.2em; } .em65 input[size='9'] { width: 5.85em; } .em65 input[size='10'] { width: 6.5em; } .em65 input[size='11'] { width: 7.15em; } .em65 input[size='12'] { width: 7.8em; } .em65 input[size='13'] { width: 8.45em; } .em65 input[size='14'] { width: 9.1em; } .em65 input[size='15'] { width: 9.75em; } .em65 input[size='16'] { width: 10.4em; } .em65 input[size='17'] { width: 11.05em; } .em65 input[size='18'] { width: 11.7em; } .em65 input[size='19'] { width: 12.35em; } .em65 input[size='20'] { width: 13.0em; } | |||||
.em70 input[size='1'] { width: 0.7em; } .em70 input[size='2'] { width: 1.4em; } .em70 input[size='3'] { width: 2.1em; } .em70 input[size='4'] { width: 2.8em; } .em70 input[size='5'] { width: 3.5em; } .em70 input[size='6'] { width: 4.2em; } .em70 input[size='7'] { width: 4.9em; } .em70 input[size='8'] { width: 5.6em; } .em70 input[size='9'] { width: 6.3em; } .em70 input[size='10'] { width: 7.0em; } .em70 input[size='11'] { width: 7.7em; } .em70 input[size='12'] { width: 8.4em; } .em70 input[size='13'] { width: 9.1em; } .em70 input[size='14'] { width: 9.8em; } .em70 input[size='15'] { width: 10.5em; } .em70 input[size='16'] { width: 11.2em; } .em70 input[size='17'] { width: 11.9em; } .em70 input[size='18'] { width: 12.6em; } .em70 input[size='19'] { width: 13.3em; } .em70 input[size='20'] { width: 14.0em; } | |||||
.em75 input[size='1'] { width: 0.75em; } .em75 input[size='2'] { width: 1.5em; } .em75 input[size='3'] { width: 2.25em; } .em75 input[size='4'] { width: 3.0em; } .em75 input[size='5'] { width: 3.75em; } .em75 input[size='6'] { width: 4.5em; } .em75 input[size='7'] { width: 5.25em; } .em75 input[size='8'] { width: 6.0em; } .em75 input[size='9'] { width: 6.75em; } .em75 input[size='10'] { width: 7.5em; } .em75 input[size='11'] { width: 8.25em; } .em75 input[size='12'] { width: 9.0em; } .em75 input[size='13'] { width: 9.75em; } .em75 input[size='14'] { width: 10.5em; } .em75 input[size='15'] { width: 11.25em; } .em75 input[size='16'] { width: 12.0em; } .em75 input[size='17'] { width: 12.75em; } .em75 input[size='18'] { width: 13.5em; } .em75 input[size='19'] { width: 14.25em; } .em75 input[size='20'] { width: 15.0em; } | |||||
.em80 input[size='1'] { width: 0.8em; } .em80 input[size='2'] { width: 1.6em; } .em80 input[size='3'] { width: 2.4em; } .em80 input[size='4'] { width: 3.2em; } .em80 input[size='5'] { width: 4.0em; } .em80 input[size='6'] { width: 4.8em; } .em80 input[size='7'] { width: 5.6em; } .em80 input[size='8'] { width: 6.4em; } .em80 input[size='9'] { width: 7.2em; } .em80 input[size='10'] { width: 8.0em; } .em80 input[size='11'] { width: 8.8em; } .em80 input[size='12'] { width: 9.6em; } .em80 input[size='13'] { width: 10.4em; } .em80 input[size='14'] { width: 11.2em; } .em80 input[size='15'] { width: 12.0em; } .em80 input[size='16'] { width: 12.8em; } .em80 input[size='17'] { width: 13.6em; } .em80 input[size='18'] { width: 14.4em; } .em80 input[size='19'] { width: 15.2em; } .em80 input[size='20'] { width: 16.0em; } | |||||
.em85 input[size='1'] { width: 0.85em; } .em85 input[size='2'] { width: 1.7em; } .em85 input[size='3'] { width: 2.55em; } .em85 input[size='4'] { width: 3.4em; } .em85 input[size='5'] { width: 4.25em; } .em85 input[size='6'] { width: 5.1em; } .em85 input[size='7'] { width: 5.95em; } .em85 input[size='8'] { width: 6.8em; } .em85 input[size='9'] { width: 7.65em; } .em85 input[size='10'] { width: 8.5em; } .em85 input[size='11'] { width: 9.35em; } .em85 input[size='12'] { width: 10.2em; } .em85 input[size='13'] { width: 11.05em; } .em85 input[size='14'] { width: 11.9em; } .em85 input[size='15'] { width: 12.75em; } .em85 input[size='16'] { width: 13.6em; } .em85 input[size='17'] { width: 14.45em; } .em85 input[size='18'] { width: 15.3em; } .em85 input[size='19'] { width: 16.15em; } .em85 input[size='20'] { width: 17.0em; } | |||||
.em90 input[size='1'] { width: 0.9em; } .em90 input[size='2'] { width: 1.8em; } .em90 input[size='3'] { width: 2.7em; } .em90 input[size='4'] { width: 3.6em; } .em90 input[size='5'] { width: 4.5em; } .em90 input[size='6'] { width: 5.4em; } .em90 input[size='7'] { width: 6.3em; } .em90 input[size='8'] { width: 7.2em; } .em90 input[size='9'] { width: 8.1em; } .em90 input[size='10'] { width: 9.0em; } .em90 input[size='11'] { width: 9.9em; } .em90 input[size='12'] { width: 10.8em; } .em90 input[size='13'] { width: 11.7em; } .em90 input[size='14'] { width: 12.6em; } .em90 input[size='15'] { width: 13.5em; } .em90 input[size='16'] { width: 14.4em; } .em90 input[size='17'] { width: 15.3em; } .em90 input[size='18'] { width: 16.2em; } .em90 input[size='19'] { width: 17.1em; } .em90 input[size='20'] { width: 18.0em; } | |||||
.em95 input[size='1'] { width: 0.95em; } .em95 input[size='2'] { width: 1.9em; } .em95 input[size='3'] { width: 2.85em; } .em95 input[size='4'] { width: 3.8em; } .em95 input[size='5'] { width: 4.75em; } .em95 input[size='6'] { width: 5.7em; } .em95 input[size='7'] { width: 6.65em; } .em95 input[size='8'] { width: 7.6em; } .em95 input[size='9'] { width: 8.55em; } .em95 input[size='10'] { width: 9.5em; } .em95 input[size='11'] { width: 10.45em; } .em95 input[size='12'] { width: 11.4em; } .em95 input[size='13'] { width: 12.35em; } .em95 input[size='14'] { width: 13.3em; } .em95 input[size='15'] { width: 14.25em; } .em95 input[size='16'] { width: 15.2em; } .em95 input[size='17'] { width: 16.15em; } .em95 input[size='18'] { width: 17.1em; } .em95 input[size='19'] { width: 18.05em; } .em95 input[size='20'] { width: 19.0em; } | |||||
.em100 input[size='1'] { width: 1.0em; } .em100 input[size='2'] { width: 2.0em; } .em100 input[size='3'] { width: 3.0em; } .em100 input[size='4'] { width: 4.0em; } .em100 input[size='5'] { width: 5.0em; } .em100 input[size='6'] { width: 6.0em; } .em100 input[size='7'] { width: 7.0em; } .em100 input[size='8'] { width: 8.0em; } .em100 input[size='9'] { width: 9.0em; } .em100 input[size='10'] { width: 10.0em; } .em100 input[size='11'] { width: 11.0em; } .em100 input[size='12'] { width: 12.0em; } .em100 input[size='13'] { width: 13.0em; } .em100 input[size='14'] { width: 14.0em; } .em100 input[size='15'] { width: 15.0em; } .em100 input[size='16'] { width: 16.0em; } .em100 input[size='17'] { width: 17.0em; } .em100 input[size='18'] { width: 18.0em; } .em100 input[size='19'] { width: 19.0em; } .em100 input[size='20'] { width: 20.0em; } | |||||
input.Folded { cursor: default; } | |||||
.Conditional { color: #C678DD; } | |||||
.Operator { color: #C678DD; } | |||||
.Keyword { color: #E06C75; } | |||||
.String { color: #98C379; } | |||||
.Identifier { color: #E06C75; } | |||||
.haskellType { color: #61AFEF; } | |||||
.Structure { color: #E5C07B; } | |||||
.Number { color: #D19A66; } | |||||
--> | |||||
</style> | |||||
<script type='text/javascript'> | |||||
<!-- | |||||
/* simulate a "ch" unit by asking the browser how big a zero character is */ | |||||
function FixCharWidth() { | |||||
/* get the hidden element which gives the width of a single character */ | |||||
var goodWidth = document.getElementById("oneCharWidth").clientWidth; | |||||
/* get all input elements, we'll filter on class later */ | |||||
var inputTags = document.getElementsByTagName("input"); | |||||
var ratio = 5; | |||||
var inputWidth = document.getElementById("oneInputWidth").clientWidth; | |||||
var emWidth = document.getElementById("oneEmWidth").clientWidth; | |||||
if (inputWidth > goodWidth) { | |||||
while (ratio < 100*goodWidth/emWidth && ratio < 100) { | |||||
ratio += 5; | |||||
} | |||||
document.getElementById("vimCodeElement").className = "em"+ratio; | |||||
} | |||||
} | |||||
--> | |||||
</script> | |||||
</head> | |||||
<body onload='FixCharWidth();'> | |||||
<!-- hidden divs used by javascript to get the width of a char --> | |||||
<div id='oneCharWidth'>0</div> | |||||
<div id='oneInputWidth'><input size='1' value='0'></div> | |||||
<div id='oneEmWidth' style='width: 1em;'></div> | |||||
<pre id='vimCodeElement'> | |||||
{-# LANGUAGE LambdaCase #-} | |||||
<span class="Structure">module</span> <span class="haskellType">Parser</span> | |||||
( <span class="haskellType">Parser</span>() | |||||
, <span class="Structure">module</span> <span class="haskellType">X</span> | |||||
, <span class="haskellType">Parser</span><span class="Operator">.</span>any | |||||
, satisfy | |||||
, string | |||||
, digit | |||||
, number | |||||
, spaces | |||||
, reserved | |||||
, lexeme | |||||
, (<span class="Operator"><?></span>) | |||||
, runParser | |||||
, between ) <span class="Structure">where</span> | |||||
<span class="Structure">import</span> <span class="haskellType">Control</span><span class="Operator">.</span><span class="haskellType">Applicative</span> <span class="Structure">as</span> <span class="haskellType">X</span> | |||||
<span class="Structure">import</span> <span class="haskellType">Control</span><span class="Operator">.</span><span class="haskellType">Monad</span> <span class="Structure">as</span> <span class="haskellType">X</span> | |||||
<span class="Structure">import</span> <span class="haskellType">Data</span><span class="Operator">.</span><span class="haskellType">Char</span> | |||||
<span class="Structure">newtype</span> <span class="haskellType">Parser</span> a | |||||
<span class="Operator">=</span> <span class="haskellType">Parser</span> { <span class="Identifier">parse</span> <span class="Operator">::</span> <span class="haskellType">String</span> <span class="Operator">-></span> <span class="haskellType">Either</span> <span class="haskellType">String</span> (a, <span class="haskellType">String</span>) } | |||||
<span class="Identifier">runParser</span> <span class="Operator">::</span> <span class="haskellType">Parser</span> a <span class="Operator">-></span> <span class="haskellType">String</span> <span class="Operator">-></span> <span class="haskellType">Either</span> <span class="haskellType">String</span> a | |||||
runParser (<span class="haskellType">Parser</span> p) s <span class="Operator">=</span> fst <span class="Operator"><$></span> p s | |||||
(<span class="Operator"><?></span>) <span class="Operator">::</span> <span class="haskellType">Parser</span> a <span class="Operator">-></span> <span class="haskellType">String</span> <span class="Operator">-></span> <span class="haskellType">Parser</span> a | |||||
p <span class="Operator"><?></span> err <span class="Operator">=</span> p <span class="Operator"><|></span> fail err | |||||
<span class="Keyword">infixl</span> <span class="Number">2</span> <span class="Operator"><?></span> | |||||
<span class="Structure">instance</span> <span class="haskellType">Functor</span> <span class="haskellType">Parser</span> <span class="Structure">where</span> | |||||
fn <span class="Operator">`fmap`</span> (<span class="haskellType">Parser</span> p) <span class="Operator">=</span> <span class="haskellType">Parser</span> go <span class="Structure">where</span> | |||||
go st <span class="Operator">=</span> <span class="Keyword">case</span> p st <span class="Keyword">of</span> | |||||
<span class="haskellType">Left</span> e <span class="Operator">-></span> <span class="haskellType">Left</span> e | |||||
<span class="haskellType">Right</span> (res, str') <span class="Operator">-></span> <span class="haskellType">Right</span> (fn res, str') | |||||
<span class="Structure">instance</span> <span class="haskellType">Applicative</span> <span class="haskellType">Parser</span> <span class="Structure">where</span> | |||||
pure x <span class="Operator">=</span> <span class="haskellType">Parser</span> <span class="Operator">$</span> <span class="Operator">\</span>str <span class="Operator">-></span> <span class="haskellType">Right</span> (x, str) | |||||
(<span class="haskellType">Parser</span> p) <span class="Operator"><*></span> (<span class="haskellType">Parser</span> p') <span class="Operator">=</span> <span class="haskellType">Parser</span> go <span class="Structure">where</span> | |||||
go st <span class="Operator">=</span> <span class="Keyword">case</span> p st <span class="Keyword">of</span> | |||||
<span class="haskellType">Left</span> e <span class="Operator">-></span> <span class="haskellType">Left</span> e | |||||
<span class="haskellType">Right</span> (fn, st') <span class="Operator">-></span> <span class="Keyword">case</span> p' st' <span class="Keyword">of</span> | |||||
<span class="haskellType">Left</span> e' <span class="Operator">-></span> <span class="haskellType">Left</span> e' | |||||
<span class="haskellType">Right</span> (v, st'') <span class="Operator">-></span> <span class="haskellType">Right</span> (fn v, st'') | |||||
<span class="Structure">instance</span> <span class="haskellType">Alternative</span> <span class="haskellType">Parser</span> <span class="Structure">where</span> | |||||
empty <span class="Operator">=</span> fail <span class="String">"nothing"</span> | |||||
(<span class="haskellType">Parser</span> p) <span class="Operator"><|></span> (<span class="haskellType">Parser</span> p') <span class="Operator">=</span> <span class="haskellType">Parser</span> go <span class="Structure">where</span> | |||||
go st <span class="Operator">=</span> <span class="Keyword">case</span> p st <span class="Keyword">of</span> | |||||
<span class="haskellType">Left</span> <span class="Operator">_</span> <span class="Operator">-></span> p' st | |||||
<span class="haskellType">Right</span> x <span class="Operator">-></span> <span class="haskellType">Right</span> x | |||||
<span class="Structure">instance</span> <span class="haskellType">Monad</span> <span class="haskellType">Parser</span> <span class="Structure">where</span> | |||||
return <span class="Operator">=</span> pure | |||||
(<span class="haskellType">Parser</span> p) <span class="Operator">>>=</span> f <span class="Operator">=</span> <span class="haskellType">Parser</span> go <span class="Structure">where</span> | |||||
go s <span class="Operator">=</span> <span class="Keyword">case</span> p s <span class="Keyword">of</span> | |||||
<span class="haskellType">Left</span> e <span class="Operator">-></span> <span class="haskellType">Left</span> e | |||||
<span class="haskellType">Right</span> (x, s') <span class="Operator">-></span> parse (f x) s' | |||||
fail m <span class="Operator">=</span> <span class="haskellType">Parser</span> go <span class="Structure">where</span> | |||||
go <span class="Operator">=</span> <span class="haskellType">Left</span> <span class="Operator">.</span> go' | |||||
go' [] <span class="Operator">=</span> <span class="String">"expected "</span> <span class="Operator">++</span> m <span class="Operator">++</span> <span class="String">", got to the end of stream"</span> | |||||
go' (x<span class="Operator">:</span>xs) <span class="Operator">=</span> <span class="String">"expected "</span> <span class="Operator">++</span> m <span class="Operator">++</span> <span class="String">", got '"</span> <span class="Operator">++</span> x<span class="Operator">:</span><span class="String">"'"</span> | |||||
<span class="Identifier">any</span> <span class="Operator">::</span> <span class="haskellType">Parser</span> <span class="haskellType">Char</span> | |||||
any <span class="Operator">=</span> <span class="haskellType">Parser</span> go <span class="Structure">where</span> | |||||
go [] <span class="Operator">=</span> <span class="haskellType">Left</span> <span class="String">"any: end of file"</span> | |||||
go (x<span class="Operator">:</span>xs) <span class="Operator">=</span> <span class="haskellType">Right</span> (x,xs) | |||||
<span class="Identifier">satisfy</span> <span class="Operator">::</span> (<span class="haskellType">Char</span> <span class="Operator">-></span> <span class="haskellType">Bool</span>) <span class="Operator">-></span> <span class="haskellType">Parser</span> <span class="haskellType">Char</span> | |||||
satisfy f <span class="Operator">=</span> <span class="Keyword">do</span> x <span class="Operator"><-</span> <span class="haskellType">Parser</span><span class="Operator">.</span>any | |||||
<span class="Conditional">if</span> f x | |||||
<span class="Conditional">then</span> return x | |||||
<span class="Conditional">else</span> fail <span class="String">"a solution to the function"</span> | |||||
<span class="Identifier">char</span> <span class="Operator">::</span> <span class="haskellType">Char</span> <span class="Operator">-></span> <span class="haskellType">Parser</span> <span class="haskellType">Char</span> | |||||
char c <span class="Operator">=</span> satisfy (c <span class="Operator">==</span>) <span class="Operator"><?></span> <span class="String">"literal "</span> <span class="Operator">++</span> [c] | |||||
<span class="Identifier">oneOf</span> <span class="Operator">::</span> <span class="haskellType">String</span> <span class="Operator">-></span> <span class="haskellType">Parser</span> <span class="haskellType">Char</span> | |||||
oneOf s <span class="Operator">=</span> satisfy (<span class="Operator">`elem`</span> s) <span class="Operator"><?></span> <span class="String">"one of '"</span> <span class="Operator">++</span> s <span class="Operator">++</span> <span class="String">"'"</span> | |||||
<span class="Identifier">string</span> <span class="Operator">::</span> <span class="haskellType">String</span> <span class="Operator">-></span> <span class="haskellType">Parser</span> <span class="haskellType">String</span> | |||||
string [] <span class="Operator">=</span> return [] | |||||
string (x<span class="Operator">:</span>xs) <span class="Operator">=</span> <span class="Keyword">do</span> char x | |||||
string xs | |||||
return <span class="Operator">$</span> x<span class="Operator">:</span>xs | |||||
<span class="Identifier">natural</span> <span class="Operator">::</span> <span class="haskellType">Parser</span> <span class="haskellType">Integer</span> | |||||
natural <span class="Operator">=</span> read <span class="Operator"><$></span> some (satisfy isDigit) | |||||
<span class="Identifier">lexeme</span> <span class="Operator">::</span> <span class="haskellType">Parser</span> a <span class="Operator">-></span> <span class="haskellType">Parser</span> a | |||||
lexeme <span class="Operator">=</span> (<span class="Operator"><*</span> spaces) | |||||
<span class="Identifier">reserved</span> <span class="Operator">::</span> <span class="haskellType">String</span> <span class="Operator">-></span> <span class="haskellType">Parser</span> <span class="haskellType">String</span> | |||||
reserved <span class="Operator">=</span> lexeme <span class="Operator">.</span> string | |||||
<span class="Identifier">spaces</span> <span class="Operator">::</span> <span class="haskellType">Parser</span> <span class="haskellType">String</span> | |||||
spaces <span class="Operator">=</span> many <span class="Operator">$</span> oneOf <span class="String">" \n\r"</span> | |||||
<span class="Identifier">digit</span> <span class="Operator">::</span> <span class="haskellType">Parser</span> <span class="haskellType">Char</span> | |||||
digit <span class="Operator">=</span> satisfy isDigit | |||||
<span class="Identifier">number</span> <span class="Operator">::</span> <span class="haskellType">Parser</span> <span class="haskellType">Int</span> | |||||
number <span class="Operator">=</span> <span class="Keyword">do</span> | |||||
s <span class="Operator"><-</span> string <span class="String">"-"</span> <span class="Operator"><|></span> empty | |||||
cs <span class="Operator"><-</span> some digit | |||||
return <span class="Operator">$</span> read (s <span class="Operator">++</span> cs) | |||||
<span class="Identifier">between</span> <span class="Operator">::</span> <span class="haskellType">Parser</span> b <span class="Operator">-></span> <span class="haskellType">Parser</span> c <span class="Operator">-></span> <span class="haskellType">Parser</span> a <span class="Operator">-></span> <span class="haskellType">Parser</span> a | |||||
between o c x <span class="Operator">=</span> o <span class="Operator">*></span> x <span class="Operator"><*</span> c | |||||
<span class="Identifier">contents</span> <span class="Operator">::</span> <span class="haskellType">Parser</span> a <span class="Operator">-></span> <span class="haskellType">Parser</span> a | |||||
contents x <span class="Operator">=</span> spaces <span class="Operator">*></span> x <span class="Operator"><*</span> spaces | |||||
<span class="Identifier">sep</span> <span class="Operator">::</span> <span class="haskellType">Parser</span> b <span class="Operator">-></span> <span class="haskellType">Parser</span> a <span class="Operator">-></span> <span class="haskellType">Parser</span> [a] | |||||
sep s c <span class="Operator">=</span> sep1 s c <span class="Operator"><|></span> return [] | |||||
<span class="Identifier">sep1</span> <span class="Operator">::</span> <span class="haskellType">Parser</span> b <span class="Operator">-></span> <span class="haskellType">Parser</span> a <span class="Operator">-></span> <span class="haskellType">Parser</span> [a] | |||||
sep1 s c <span class="Operator">=</span> <span class="Keyword">do</span> x <span class="Operator"><-</span> c | |||||
xs <span class="Operator"><-</span> many <span class="Operator">$</span> s <span class="Operator">>></span> c | |||||
return <span class="Operator">$</span> x<span class="Operator">:</span>xs | |||||
<span class="Identifier">option</span> <span class="Operator">::</span> a <span class="Operator">-></span> <span class="haskellType">Parser</span> a <span class="Operator">-></span> <span class="haskellType">Parser</span> a | |||||
option x p <span class="Operator">=</span> p <span class="Operator"><|></span> return x | |||||
<span class="Identifier">optionMaybe</span> <span class="Operator">::</span> <span class="haskellType">Parser</span> a <span class="Operator">-></span> <span class="haskellType">Parser</span> (<span class="haskellType">Maybe</span> a) | |||||
optionMaybe p <span class="Operator">=</span> option <span class="haskellType">Nothing</span> <span class="Operator">$</span> <span class="haskellType">Just</span> <span class="Operator"><$></span> p | |||||
<span class="Identifier">optional</span> <span class="Operator">::</span> <span class="haskellType">Parser</span> a <span class="Operator">-></span> <span class="haskellType">Parser</span> () | |||||
optional p <span class="Operator">=</span> void p <span class="Operator"><|></span> return () | |||||
<span class="Identifier">eof</span> <span class="Operator">::</span> <span class="haskellType">Parser</span> () | |||||
eof <span class="Operator">=</span> <span class="haskellType">Parser</span> go <span class="Structure">where</span> | |||||
go (x<span class="Operator">:_</span>) <span class="Operator">=</span> <span class="haskellType">Left</span> <span class="Operator">$</span> <span class="String">"expected eof, got '"</span> <span class="Operator">++</span> x<span class="Operator">:</span><span class="String">"'"</span> | |||||
go [] <span class="Operator">=</span> <span class="haskellType">Right</span> ((), []) | |||||
</pre> | |||||
</body> | |||||
</html> | |||||
<!-- vim: set foldmethod=manual : --> |
@ -0,0 +1,100 @@ | |||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> | |||||
<html> | |||||
<head> | |||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8"> | |||||
<title>~/Projects/blag/demorgan-1.ml.html</title> | |||||
<meta name="Generator" content="Vim/8.0"> | |||||
<meta name="plugin-version" content="vim8.1_v1"> | |||||
<meta name="syntax" content="amulet"> | |||||
<meta name="settings" content="use_css,pre_wrap,no_foldcolumn,expand_tabs,prevent_copy=t"> | |||||
<meta name="colorscheme" content="onedark"> | |||||
<style type="text/css"> | |||||
<!-- | |||||
pre { white-space: pre-wrap; font-family: 'Iosevka','Iosevka Term', monospace; color: #ABB2BF; background-color: #282C34; } | |||||
body { font-family: 'Iosevka','Iosevka Term', monospace; color: #ABB2BF; background-color: #282C34; } | |||||
* { font-size: 1em; } | |||||
input { border: none; margin: 0; padding: 0; font-family: 'Iosevka','Iosevka Term', monospace; } | |||||
input[size='1'] { width: 1em; width: 1ch; } | |||||
input[size='2'] { width: 2em; width: 2ch; } | |||||
input[size='3'] { width: 3em; width: 3ch; } | |||||
input[size='4'] { width: 4em; width: 4ch; } | |||||
input[size='5'] { width: 5em; width: 5ch; } | |||||
input[size='6'] { width: 6em; width: 6ch; } | |||||
input[size='7'] { width: 7em; width: 7ch; } | |||||
input[size='8'] { width: 8em; width: 8ch; } | |||||
input[size='9'] { width: 9em; width: 9ch; } | |||||
input[size='10'] { width: 10em; width: 10ch; } | |||||
input[size='11'] { width: 11em; width: 11ch; } | |||||
input[size='12'] { width: 12em; width: 12ch; } | |||||
input[size='13'] { width: 13em; width: 13ch; } | |||||
input[size='14'] { width: 14em; width: 14ch; } | |||||
input[size='15'] { width: 15em; width: 15ch; } | |||||
input[size='16'] { width: 16em; width: 16ch; } | |||||
input[size='17'] { width: 17em; width: 17ch; } | |||||
input[size='18'] { width: 18em; width: 18ch; } | |||||
input[size='19'] { width: 19em; width: 19ch; } | |||||
input[size='20'] { width: 20em; width: 20ch; } | |||||
#oneCharWidth, #oneEmWidth, #oneInputWidth { padding: 0; margin: 0; position: absolute; left: -999999px; visibility: hidden; } | |||||
.em5 input[size='1'] { width: 0.05em; } .em5 input[size='2'] { width: 0.1em; } .em5 input[size='3'] { width: 0.15em; } .em5 input[size='4'] { width: 0.2em; } .em5 input[size='5'] { width: 0.25em; } .em5 input[size='6'] { width: 0.3em; } .em5 input[size='7'] { width: 0.35em; } .em5 input[size='8'] { width: 0.4em; } .em5 input[size='9'] { width: 0.45em; } .em5 input[size='10'] { width: 0.5em; } .em5 input[size='11'] { width: 0.55em; } .em5 input[size='12'] { width: 0.6em; } .em5 input[size='13'] { width: 0.65em; } .em5 input[size='14'] { width: 0.7em; } .em5 input[size='15'] { width: 0.75em; } .em5 input[size='16'] { width: 0.8em; } .em5 input[size='17'] { width: 0.85em; } .em5 input[size='18'] { width: 0.9em; } .em5 input[size='19'] { width: 0.95em; } .em5 input[size='20'] { width: 1.0em; } | |||||
.em10 input[size='1'] { width: 0.1em; } .em10 input[size='2'] { width: 0.2em; } .em10 input[size='3'] { width: 0.3em; } .em10 input[size='4'] { width: 0.4em; } .em10 input[size='5'] { width: 0.5em; } .em10 input[size='6'] { width: 0.6em; } .em10 input[size='7'] { width: 0.7em; } .em10 input[size='8'] { width: 0.8em; } .em10 input[size='9'] { width: 0.9em; } .em10 input[size='10'] { width: 1.0em; } .em10 input[size='11'] { width: 1.1em; } .em10 input[size='12'] { width: 1.2em; } .em10 input[size='13'] { width: 1.3em; } .em10 input[size='14'] { width: 1.4em; } .em10 input[size='15'] { width: 1.5em; } .em10 input[size='16'] { width: 1.6em; } .em10 input[size='17'] { width: 1.7em; } .em10 input[size='18'] { width: 1.8em; } .em10 input[size='19'] { width: 1.9em; } .em10 input[size='20'] { width: 2.0em; } | |||||
.em15 input[size='1'] { width: 0.15em; } .em15 input[size='2'] { width: 0.3em; } .em15 input[size='3'] { width: 0.45em; } .em15 input[size='4'] { width: 0.6em; } .em15 input[size='5'] { width: 0.75em; } .em15 input[size='6'] { width: 0.9em; } .em15 input[size='7'] { width: 1.05em; } .em15 input[size='8'] { width: 1.2em; } .em15 input[size='9'] { width: 1.35em; } .em15 input[size='10'] { width: 1.5em; } .em15 input[size='11'] { width: 1.65em; } .em15 input[size='12'] { width: 1.8em; } .em15 input[size='13'] { width: 1.95em; } .em15 input[size='14'] { width: 2.1em; } .em15 input[size='15'] { width: 2.25em; } .em15 input[size='16'] { width: 2.4em; } .em15 input[size='17'] { width: 2.55em; } .em15 input[size='18'] { width: 2.7em; } .em15 input[size='19'] { width: 2.85em; } .em15 input[size='20'] { width: 3.0em; } | |||||
.em20 input[size='1'] { width: 0.2em; } .em20 input[size='2'] { width: 0.4em; } .em20 input[size='3'] { width: 0.6em; } .em20 input[size='4'] { width: 0.8em; } .em20 input[size='5'] { width: 1.0em; } .em20 input[size='6'] { width: 1.2em; } .em20 input[size='7'] { width: 1.4em; } .em20 input[size='8'] { width: 1.6em; } .em20 input[size='9'] { width: 1.8em; } .em20 input[size='10'] { width: 2.0em; } .em20 input[size='11'] { width: 2.2em; } .em20 input[size='12'] { width: 2.4em; } .em20 input[size='13'] { width: 2.6em; } .em20 input[size='14'] { width: 2.8em; } .em20 input[size='15'] { width: 3.0em; } .em20 input[size='16'] { width: 3.2em; } .em20 input[size='17'] { width: 3.4em; } .em20 input[size='18'] { width: 3.6em; } .em20 input[size='19'] { width: 3.8em; } .em20 input[size='20'] { width: 4.0em; } | |||||
.em25 input[size='1'] { width: 0.25em; } .em25 input[size='2'] { width: 0.5em; } .em25 input[size='3'] { width: 0.75em; } .em25 input[size='4'] { width: 1.0em; } .em25 input[size='5'] { width: 1.25em; } .em25 input[size='6'] { width: 1.5em; } .em25 input[size='7'] { width: 1.75em; } .em25 input[size='8'] { width: 2.0em; } .em25 input[size='9'] { width: 2.25em; } .em25 input[size='10'] { width: 2.5em; } .em25 input[size='11'] { width: 2.75em; } .em25 input[size='12'] { width: 3.0em; } .em25 input[size='13'] { width: 3.25em; } .em25 input[size='14'] { width: 3.5em; } .em25 input[size='15'] { width: 3.75em; } .em25 input[size='16'] { width: 4.0em; } .em25 input[size='17'] { width: 4.25em; } .em25 input[size='18'] { width: 4.5em; } .em25 input[size='19'] { width: 4.75em; } .em25 input[size='20'] { width: 5.0em; } | |||||
.em30 input[size='1'] { width: 0.3em; } .em30 input[size='2'] { width: 0.6em; } .em30 input[size='3'] { width: 0.9em; } .em30 input[size='4'] { width: 1.2em; } .em30 input[size='5'] { width: 1.5em; } .em30 input[size='6'] { width: 1.8em; } .em30 input[size='7'] { width: 2.1em; } .em30 input[size='8'] { width: 2.4em; } .em30 input[size='9'] { width: 2.7em; } .em30 input[size='10'] { width: 3.0em; } .em30 input[size='11'] { width: 3.3em; } .em30 input[size='12'] { width: 3.6em; } .em30 input[size='13'] { width: 3.9em; } .em30 input[size='14'] { width: 4.2em; } .em30 input[size='15'] { width: 4.5em; } .em30 input[size='16'] { width: 4.8em; } .em30 input[size='17'] { width: 5.1em; } .em30 input[size='18'] { width: 5.4em; } .em30 input[size='19'] { width: 5.7em; } .em30 input[size='20'] { width: 6.0em; } | |||||
.em35 input[size='1'] { width: 0.35em; } .em35 input[size='2'] { width: 0.7em; } .em35 input[size='3'] { width: 1.05em; } .em35 input[size='4'] { width: 1.4em; } .em35 input[size='5'] { width: 1.75em; } .em35 input[size='6'] { width: 2.1em; } .em35 input[size='7'] { width: 2.45em; } .em35 input[size='8'] { width: 2.8em; } .em35 input[size='9'] { width: 3.15em; } .em35 input[size='10'] { width: 3.5em; } .em35 input[size='11'] { width: 3.85em; } .em35 input[size='12'] { width: 4.2em; } .em35 input[size='13'] { width: 4.55em; } .em35 input[size='14'] { width: 4.9em; } .em35 input[size='15'] { width: 5.25em; } .em35 input[size='16'] { width: 5.6em; } .em35 input[size='17'] { width: 5.95em; } .em35 input[size='18'] { width: 6.3em; } .em35 input[size='19'] { width: 6.65em; } .em35 input[size='20'] { width: 7.0em; } | |||||
.em40 input[size='1'] { width: 0.4em; } .em40 input[size='2'] { width: 0.8em; } .em40 input[size='3'] { width: 1.2em; } .em40 input[size='4'] { width: 1.6em; } .em40 input[size='5'] { width: 2.0em; } .em40 input[size='6'] { width: 2.4em; } .em40 input[size='7'] { width: 2.8em; } .em40 input[size='8'] { width: 3.2em; } .em40 input[size='9'] { width: 3.6em; } .em40 input[size='10'] { width: 4.0em; } .em40 input[size='11'] { width: 4.4em; } .em40 input[size='12'] { width: 4.8em; } .em40 input[size='13'] { width: 5.2em; } .em40 input[size='14'] { width: 5.6em; } .em40 input[size='15'] { width: 6.0em; } .em40 input[size='16'] { width: 6.4em; } .em40 input[size='17'] { width: 6.8em; } .em40 input[size='18'] { width: 7.2em; } .em40 input[size='19'] { width: 7.6em; } .em40 input[size='20'] { width: 8.0em; } | |||||
.em45 input[size='1'] { width: 0.45em; } .em45 input[size='2'] { width: 0.9em; } .em45 input[size='3'] { width: 1.35em; } .em45 input[size='4'] { width: 1.8em; } .em45 input[size='5'] { width: 2.25em; } .em45 input[size='6'] { width: 2.7em; } .em45 input[size='7'] { width: 3.15em; } .em45 input[size='8'] { width: 3.6em; } .em45 input[size='9'] { width: 4.05em; } .em45 input[size='10'] { width: 4.5em; } .em45 input[size='11'] { width: 4.95em; } .em45 input[size='12'] { width: 5.4em; } .em45 input[size='13'] { width: 5.85em; } .em45 input[size='14'] { width: 6.3em; } .em45 input[size='15'] { width: 6.75em; } .em45 input[size='16'] { width: 7.2em; } .em45 input[size='17'] { width: 7.65em; } .em45 input[size='18'] { width: 8.1em; } .em45 input[size='19'] { width: 8.55em; } .em45 input[size='20'] { width: 9.0em; } | |||||
.em50 input[size='1'] { width: 0.5em; } .em50 input[size='2'] { width: 1.0em; } .em50 input[size='3'] { width: 1.5em; } .em50 input[size='4'] { width: 2.0em; } .em50 input[size='5'] { width: 2.5em; } .em50 input[size='6'] { width: 3.0em; } .em50 input[size='7'] { width: 3.5em; } .em50 input[size='8'] { width: 4.0em; } .em50 input[size='9'] { width: 4.5em; } .em50 input[size='10'] { width: 5.0em; } .em50 input[size='11'] { width: 5.5em; } .em50 input[size='12'] { width: 6.0em; } .em50 input[size='13'] { width: 6.5em; } .em50 input[size='14'] { width: 7.0em; } .em50 input[size='15'] { width: 7.5em; } .em50 input[size='16'] { width: 8.0em; } .em50 input[size='17'] { width: 8.5em; } .em50 input[size='18'] { width: 9.0em; } .em50 input[size='19'] { width: 9.5em; } .em50 input[size='20'] { width: 10.0em; } | |||||
.em55 input[size='1'] { width: 0.55em; } .em55 input[size='2'] { width: 1.1em; } .em55 input[size='3'] { width: 1.65em; } .em55 input[size='4'] { width: 2.2em; } .em55 input[size='5'] { width: 2.75em; } .em55 input[size='6'] { width: 3.3em; } .em55 input[size='7'] { width: 3.85em; } .em55 input[size='8'] { width: 4.4em; } .em55 input[size='9'] { width: 4.95em; } .em55 input[size='10'] { width: 5.5em; } .em55 input[size='11'] { width: 6.05em; } .em55 input[size='12'] { width: 6.6em; } .em55 input[size='13'] { width: 7.15em; } .em55 input[size='14'] { width: 7.7em; } .em55 input[size='15'] { width: 8.25em; } .em55 input[size='16'] { width: 8.8em; } .em55 input[size='17'] { width: 9.35em; } .em55 input[size='18'] { width: 9.9em; } .em55 input[size='19'] { width: 10.45em; } .em55 input[size='20'] { width: 11.0em; } | |||||
.em60 input[size='1'] { width: 0.6em; } .em60 input[size='2'] { width: 1.2em; } .em60 input[size='3'] { width: 1.8em; } .em60 input[size='4'] { width: 2.4em; } .em60 input[size='5'] { width: 3.0em; } .em60 input[size='6'] { width: 3.6em; } .em60 input[size='7'] { width: 4.2em; } .em60 input[size='8'] { width: 4.8em; } .em60 input[size='9'] { width: 5.4em; } .em60 input[size='10'] { width: 6.0em; } .em60 input[size='11'] { width: 6.6em; } .em60 input[size='12'] { width: 7.2em; } .em60 input[size='13'] { width: 7.8em; } .em60 input[size='14'] { width: 8.4em; } .em60 input[size='15'] { width: 9.0em; } .em60 input[size='16'] { width: 9.6em; } .em60 input[size='17'] { width: 10.2em; } .em60 input[size='18'] { width: 10.8em; } .em60 input[size='19'] { width: 11.4em; } .em60 input[size='20'] { width: 12.0em; } | |||||
.em65 input[size='1'] { width: 0.65em; } .em65 input[size='2'] { width: 1.3em; } .em65 input[size='3'] { width: 1.95em; } .em65 input[size='4'] { width: 2.6em; } .em65 input[size='5'] { width: 3.25em; } .em65 input[size='6'] { width: 3.9em; } .em65 input[size='7'] { width: 4.55em; } .em65 input[size='8'] { width: 5.2em; } .em65 input[size='9'] { width: 5.85em; } .em65 input[size='10'] { width: 6.5em; } .em65 input[size='11'] { width: 7.15em; } .em65 input[size='12'] { width: 7.8em; } .em65 input[size='13'] { width: 8.45em; } .em65 input[size='14'] { width: 9.1em; } .em65 input[size='15'] { width: 9.75em; } .em65 input[size='16'] { width: 10.4em; } .em65 input[size='17'] { width: 11.05em; } .em65 input[size='18'] { width: 11.7em; } .em65 input[size='19'] { width: 12.35em; } .em65 input[size='20'] { width: 13.0em; } | |||||
.em70 input[size='1'] { width: 0.7em; } .em70 input[size='2'] { width: 1.4em; } .em70 input[size='3'] { width: 2.1em; } .em70 input[size='4'] { width: 2.8em; } .em70 input[size='5'] { width: 3.5em; } .em70 input[size='6'] { width: 4.2em; } .em70 input[size='7'] { width: 4.9em; } .em70 input[size='8'] { width: 5.6em; } .em70 input[size='9'] { width: 6.3em; } .em70 input[size='10'] { width: 7.0em; } .em70 input[size='11'] { width: 7.7em; } .em70 input[size='12'] { width: 8.4em; } .em70 input[size='13'] { width: 9.1em; } .em70 input[size='14'] { width: 9.8em; } .em70 input[size='15'] { width: 10.5em; } .em70 input[size='16'] { width: 11.2em; } .em70 input[size='17'] { width: 11.9em; } .em70 input[size='18'] { width: 12.6em; } .em70 input[size='19'] { width: 13.3em; } .em70 input[size='20'] { width: 14.0em; } | |||||
.em75 input[size='1'] { width: 0.75em; } .em75 input[size='2'] { width: 1.5em; } .em75 input[size='3'] { width: 2.25em; } .em75 input[size='4'] { width: 3.0em; } .em75 input[size='5'] { width: 3.75em; } .em75 input[size='6'] { width: 4.5em; } .em75 input[size='7'] { width: 5.25em; } .em75 input[size='8'] { width: 6.0em; } .em75 input[size='9'] { width: 6.75em; } .em75 input[size='10'] { width: 7.5em; } .em75 input[size='11'] { width: 8.25em; } .em75 input[size='12'] { width: 9.0em; } .em75 input[size='13'] { width: 9.75em; } .em75 input[size='14'] { width: 10.5em; } .em75 input[size='15'] { width: 11.25em; } .em75 input[size='16'] { width: 12.0em; } .em75 input[size='17'] { width: 12.75em; } .em75 input[size='18'] { width: 13.5em; } .em75 input[size='19'] { width: 14.25em; } .em75 input[size='20'] { width: 15.0em; } | |||||
.em80 input[size='1'] { width: 0.8em; } .em80 input[size='2'] { width: 1.6em; } .em80 input[size='3'] { width: 2.4em; } .em80 input[size='4'] { width: 3.2em; } .em80 input[size='5'] { width: 4.0em; } .em80 input[size='6'] { width: 4.8em; } .em80 input[size='7'] { width: 5.6em; } .em80 input[size='8'] { width: 6.4em; } .em80 input[size='9'] { width: 7.2em; } .em80 input[size='10'] { width: 8.0em; } .em80 input[size='11'] { width: 8.8em; } .em80 input[size='12'] { width: 9.6em; } .em80 input[size='13'] { width: 10.4em; } .em80 input[size='14'] { width: 11.2em; } .em80 input[size='15'] { width: 12.0em; } .em80 input[size='16'] { width: 12.8em; } .em80 input[size='17'] { width: 13.6em; } .em80 input[size='18'] { width: 14.4em; } .em80 input[size='19'] { width: 15.2em; } .em80 input[size='20'] { width: 16.0em; } | |||||
.em85 input[size='1'] { width: 0.85em; } .em85 input[size='2'] { width: 1.7em; } .em85 input[size='3'] { width: 2.55em; } .em85 input[size='4'] { width: 3.4em; } .em85 input[size='5'] { width: 4.25em; } .em85 input[size='6'] { width: 5.1em; } .em85 input[size='7'] { width: 5.95em; } .em85 input[size='8'] { width: 6.8em; } .em85 input[size='9'] { width: 7.65em; } .em85 input[size='10'] { width: 8.5em; } .em85 input[size='11'] { width: 9.35em; } .em85 input[size='12'] { width: 10.2em; } .em85 input[size='13'] { width: 11.05em; } .em85 input[size='14'] { width: 11.9em; } .em85 input[size='15'] { width: 12.75em; } .em85 input[size='16'] { width: 13.6em; } .em85 input[size='17'] { width: 14.45em; } .em85 input[size='18'] { width: 15.3em; } .em85 input[size='19'] { width: 16.15em; } .em85 input[size='20'] { width: 17.0em; } | |||||
.em90 input[size='1'] { width: 0.9em; } .em90 input[size='2'] { width: 1.8em; } .em90 input[size='3'] { width: 2.7em; } .em90 input[size='4'] { width: 3.6em; } .em90 input[size='5'] { width: 4.5em; } .em90 input[size='6'] { width: 5.4em; } .em90 input[size='7'] { width: 6.3em; } .em90 input[size='8'] { width: 7.2em; } .em90 input[size='9'] { width: 8.1em; } .em90 input[size='10'] { width: 9.0em; } .em90 input[size='11'] { width: 9.9em; } .em90 input[size='12'] { width: 10.8em; } .em90 input[size='13'] { width: 11.7em; } .em90 input[size='14'] { width: 12.6em; } .em90 input[size='15'] { width: 13.5em; } .em90 input[size='16'] { width: 14.4em; } .em90 input[size='17'] { width: 15.3em; } .em90 input[size='18'] { width: 16.2em; } .em90 input[size='19'] { width: 17.1em; } .em90 input[size='20'] { width: 18.0em; } | |||||
.em95 input[size='1'] { width: 0.95em; } .em95 input[size='2'] { width: 1.9em; } .em95 input[size='3'] { width: 2.85em; } .em95 input[size='4'] { width: 3.8em; } .em95 input[size='5'] { width: 4.75em; } .em95 input[size='6'] { width: 5.7em; } .em95 input[size='7'] { width: 6.65em; } .em95 input[size='8'] { width: 7.6em; } .em95 input[size='9'] { width: 8.55em; } .em95 input[size='10'] { width: 9.5em; } .em95 input[size='11'] { width: 10.45em; } .em95 input[size='12'] { width: 11.4em; } .em95 input[size='13'] { width: 12.35em; } .em95 input[size='14'] { width: 13.3em; } .em95 input[size='15'] { width: 14.25em; } .em95 input[size='16'] { width: 15.2em; } .em95 input[size='17'] { width: 16.15em; } .em95 input[size='18'] { width: 17.1em; } .em95 input[size='19'] { width: 18.05em; } .em95 input[size='20'] { width: 19.0em; } | |||||
.em100 input[size='1'] { width: 1.0em; } .em100 input[size='2'] { width: 2.0em; } .em100 input[size='3'] { width: 3.0em; } .em100 input[size='4'] { width: 4.0em; } .em100 input[size='5'] { width: 5.0em; } .em100 input[size='6'] { width: 6.0em; } .em100 input[size='7'] { width: 7.0em; } .em100 input[size='8'] { width: 8.0em; } .em100 input[size='9'] { width: 9.0em; } .em100 input[size='10'] { width: 10.0em; } .em100 input[size='11'] { width: 11.0em; } .em100 input[size='12'] { width: 12.0em; } .em100 input[size='13'] { width: 13.0em; } .em100 input[size='14'] { width: 14.0em; } .em100 input[size='15'] { width: 15.0em; } .em100 input[size='16'] { width: 16.0em; } .em100 input[size='17'] { width: 17.0em; } .em100 input[size='18'] { width: 18.0em; } .em100 input[size='19'] { width: 19.0em; } .em100 input[size='20'] { width: 20.0em; } | |||||
input.Folded { cursor: default; } | |||||
.Operator { color: #C678DD; } | |||||
.Constant { color: #56B6C2; } | |||||
.Keyword { color: #E06C75; } | |||||
--> | |||||
</style> | |||||
<script type='text/javascript'> | |||||
<!-- | |||||
/* simulate a "ch" unit by asking the browser how big a zero character is */ | |||||
function FixCharWidth() { | |||||
/* get the hidden element which gives the width of a single character */ | |||||
var goodWidth = document.getElementById("oneCharWidth").clientWidth; | |||||
/* get all input elements, we'll filter on class later */ | |||||
var inputTags = document.getElementsByTagName("input"); | |||||
var ratio = 5; | |||||
var inputWidth = document.getElementById("oneInputWidth").clientWidth; | |||||
var emWidth = document.getElementById("oneEmWidth").clientWidth; | |||||
if (inputWidth > goodWidth) { | |||||
while (ratio < 100*goodWidth/emWidth && ratio < 100) { | |||||
ratio += 5; | |||||
} | |||||
document.getElementById("vimCodeElement").className = "em"+ratio; | |||||
} | |||||
} | |||||
--> | |||||
</script> | |||||
</head> | |||||
<body onload='FixCharWidth();'> | |||||
<!-- hidden divs used by javascript to get the width of a char --> | |||||
<div id='oneCharWidth'>0</div> | |||||
<div id='oneInputWidth'><input size='1' value='0'></div> | |||||
<div id='oneEmWidth' style='width: 1em;'></div> | |||||
<pre id='vimCodeElement'> | |||||
<span class="Constant">Equiv</span> <span class="Keyword">(fun</span> <span class="Keyword">(</span><span class="Constant">Not</span> g<span class="Keyword">)</span> <span class="Keyword">-></span> <span class="Keyword">(</span><span class="Constant">Not</span> <span class="Keyword">(fun</span> b <span class="Keyword">-></span> g <span class="Keyword">(</span><span class="Constant">L</span> b<span class="Keyword">))</span><span class="Operator">,</span> <span class="Constant">Not</span> <span class="Keyword">(fun</span> c <span class="Keyword">-></span> g <span class="Keyword">(</span><span class="Constant">R</span> c<span class="Keyword">)))</span><span class="Operator">,</span> <span class="Keyword">fun</span> <span class="Keyword">((</span><span class="Constant">Not</span> h<span class="Keyword">)</span><span class="Operator">,</span> <span class="Keyword">(</span><span class="Constant">Not</span> g<span class="Keyword">))</span> <span class="Keyword">-></span> <span class="Constant">Not</span> <span class="Keyword">function</span> | |||||
<span class="Operator">|</span> <span class="Keyword">(</span><span class="Constant">L</span> y<span class="Keyword">)</span> <span class="Keyword">-></span> h y | |||||
<span class="Operator">|</span> <span class="Keyword">(</span><span class="Constant">R</span> a<span class="Keyword">)</span> <span class="Keyword">-></span> h <span class="Keyword">(</span>g a<span class="Keyword">))</span> | |||||
</pre> | |||||
</body> | |||||
</html> | |||||
<!-- vim: set foldmethod=manual : --> |
@ -0,0 +1,119 @@ | |||||
let draw = SVG().addTo('#forth').size('100%', '100%') | |||||
let stack_element = (container, text) => { | |||||
let group = container.group() | |||||
group.add( | |||||
container.rect() | |||||
.size(100, 25) | |||||
.stroke('#000').fill('#ddd') | |||||
.attr('stroke-width', 2)); | |||||
group.add(container.text(text).dmove((65 - text.length) / 2, -2)); | |||||
console.log(group); | |||||
return group; | |||||
} | |||||
let the_code = [ | |||||
[ 'push', 2 ], | |||||
[ 'push', 3 ], | |||||
[ 'push', 4 ], | |||||
[ 'mul' ], | |||||
[ 'add' ] | |||||
] | |||||
let the_stack = [], pc = 0, final = false; | |||||
let stack_container = draw.nested().move(draw.width() - '10%', 0) | |||||
let code_node = document.getElementById('code'); | |||||
let push_val = (int) => { | |||||
let the_element = | |||||
stack_element(stack_container, int.toString()).move(10, 0); | |||||
the_element.animate(100, 0, 'now').move(10, 10); | |||||
the_stack.forEach(elem => elem.svg.animate(100, 0, 'now').dy(25)); | |||||
the_stack.push({ svg: the_element, val: int }); | |||||
} | |||||
let pop_val = () => { | |||||
let item = the_stack.pop() | |||||
item.svg.remove(); | |||||
the_stack.forEach(elem => elem.svg.dy(-25)); | |||||
return item.val; | |||||
} | |||||
let render_code = (code, pc) => { | |||||
while (code_node.firstChild) { | |||||
code_node.removeChild(code_node.firstChild); | |||||
} | |||||
let list = document.createElement('ul'); | |||||
list.style = 'list-style-type: none;'; | |||||
code.forEach((instruction, idx) => { | |||||
let i_type = instruction[0]; | |||||
let li = document.createElement('li'); | |||||
if (idx == pc) { | |||||
let cursor = document.createElement('span') | |||||
cursor.innerText = '> '; | |||||
cursor.classList.add('instruction-cursor'); | |||||
li.appendChild(cursor); | |||||
} | |||||
let type_field = document.createElement('span'); | |||||
type_field.innerText = i_type; | |||||
type_field.classList.add('instruction'); | |||||
li.appendChild(type_field); | |||||
for (let i = 1; i < instruction.length; i++) { | |||||
li.append(' '); | |||||
let operand_field = document.createElement('span'); | |||||
operand_field.innerText = instruction[i]; | |||||
operand_field.classList.add('operand'); | |||||
li.appendChild(operand_field); | |||||
} | |||||
list.appendChild(li); | |||||
}); | |||||
code_node.appendChild(list); | |||||
}; | |||||
let reset = () => { | |||||
the_stack.forEach(e => e.svg.remove()); | |||||
the_stack = []; | |||||
pc = 0; | |||||
final = false; | |||||
document.getElementById('step').disabled = false; | |||||
render_code(the_code, 0); | |||||
} | |||||
let step = () => { | |||||
if (!final) { | |||||
const insn = the_code[pc++]; | |||||
switch (insn[0]) { | |||||
case 'push': | |||||
push_val(insn[1]); | |||||
break; | |||||
case 'add': | |||||
if (the_stack.length < 2) { | |||||
console.error("machine error"); | |||||
document.getElementById('step').disabled = true; | |||||
} else { | |||||
let x = pop_val(), y = pop_val(); | |||||
push_val(x + y); | |||||
} | |||||
break; | |||||
case 'mul': | |||||
if (the_stack.length < 2) { | |||||
console.error("machine error"); | |||||
document.getElementById('step').disabled = true; | |||||
} else { | |||||
let x = pop_val(), y = pop_val(); | |||||
push_val(x * y); | |||||
} | |||||
break; | |||||
} | |||||
} | |||||
render_code(the_code, pc); | |||||
if (pc >= the_code.length) { | |||||
console.log("final state"); | |||||
document.getElementById('step').disabled = true; | |||||
final = true; | |||||
} | |||||
} | |||||
render_code(the_code, pc); |