
 = 

 ∈ 
 
 ⊂ 

 = 

 = 

\(
, \)
special characters make it possible to include parentheses in expressions in a way that does not cause them to interpreted as regular expression grouping symbols;\s
matches a single whitespace character;[09]
matches a single numeric digit;[az]
matches a single lowercase letter;[AZ]
matches a single uppercase letter;[azAZ]
matches a single letter;[azAZ09]
matches a single alphanumeric character.re
module provides functionality for automatically checking whether a string matches a particular regular expression. In order to check whether a string exactly matches a regular expression, it is necessary to wrap the regular expression in parentheses and then add special markers to ensure that the regular expression matches from the beginning of the string to the end of the string.
None
is returned.
>>> import re
>>> re.match(r"^(abc)$", "a") # Succeeds.
<_sre.SRE_Match object; span=(0, 1), match='a'>
>>> re.match(r"^(abc)$", "def") # Fails.
None
>>> re.match(r"^((redgreenblue)+)$", "redgreenblueredblue")
<_sre.SRE_Match object; span=(0, 19), match='redgreenblueredblue'>
>>> re.match(r"^([azAZ09]+)$", "redgreenblueredblue")
<_sre.SRE_Match object; span=(0, 19), match='redgreenblueredblue'>
>>> re.match(r"^([azAZ09]+)$", "!@#$")
None
 = 

 = 

 = 

 
 
 
 
 

 ::= 
 
 
 
⋮  
 

 = 

 ::= 

 = 

 = 

 ::= 
 
 

 = 

 = 

 ::= 
 
 
 
 
 
 
 
 

 = 

 
 
 
 
 

 = 

 ::= 
 
 
 
 
 
 ::= 
 
 ::= 
 
 ::= 
 
 ::= 

 = 

 ::= 
 
 
 
 ::= 
 
 
 
 ::= 
 
 
 
 
 
 
 
 

 ::= 
 
 

and (or (and (true, false), not(false)), or (true, false))
{ "And": [
{ "Or": [
{ "And": ["True","False"]},
{ "Not": ["False"]}
]
},
{ "Or": ["True","False"]}
]
}
 ::= 
 
 

import re
def tokenize(s):
# Use a regular expression to split the string into
# tokens or sequences of zero or more spaces.
tokens = [t for t in re.split(r"(\s+truefalseandornot,\(\))", s)]
# Throw out the spaces and return the result.
return [t for t in tokens if not t.isspace() and not t == ""]
>>> tokenize("and (or (and (true, false), not(false)), or (true, false))")
['and', '(', 'or', '(', 'and', '(', 'true', ',', 'false', ')', ',', 'not',
'(', 'false', ')', ')', ',', 'or', '(', 'true', ',', 'false', ')', ')']
character string (concrete syntax) 
⇒  lexer/ tokenizer 
⇒  token sequence 
⇒  parser  ⇒  parse tree (abstract syntax) 
character string (concrete syntax) 
⇒ 
parser 
⇒  parse tree (abstract syntax) 
 ::= 
 
 

def parse(tokens):
if tokens[0] == 'true':
return ('True', tokens[1:])
if tokens[0] == 'false':
return ('False', tokens[1:])
if tokens[0] == 'not' and tokens[1] == '(':
(e1, tokens) = parse(tokens[2:])
if tokens[0] == ')':
return ({'Not':[e1]}, tokens[1:])
if tokens[0] == 'or' and tokens[1] == '(':
(e1, tokens) = parse(tokens[2:])
if tokens[0] == ',':
(e2, tokens) = parse(tokens[1:])
if tokens[0] == ')':
return ({'Or':[e1,e2]}, tokens[1:])
if tokens[0] == 'and' and tokens[1] == '(':
(e1, tokens) = parse(tokens[2:])
if tokens[0] == ',':
(e2, tokens) = parse(tokens[1:])
if tokens[0] == ')':
return ({'And':[e1,e2]}, tokens[1:])
>>> import json
>>> (tree, tokens) =\
parse(tokenize(\
"and (or (and (true, false), not(false)), or (true, false))"\
)\
)
>>> print(json.dumps(tree, sort_keys=True, indent=2))
{
"And": [
{
"Or": [
{
"And": [
"True",
"False"
]
},
{
"Not": [
"False"
]
}
]
},
{
"Or": [
"True",
"False"
]
}
]
}
>>> print(tokens)
[]
 ::= 
 
 

def parse(tmp, top = True):
tokens = tmp[0:]
if tokens[0] == 'true':
tokens = tokens[1:]
if not top or len(tokens) == 0:
return ('True', tokens)
tokens = tmp[0:]
if tokens[0] == 'false':
tokens = tokens[1:]
if not top or len(tokens) == 0:
return ('False', tokens)
tokens = tmp[0:]
if tokens[0] == 'not' and tokens[1] == '(':
tokens = tokens[2:]
r = parse(tokens, False)
if not r is None:
(e1, tokens) = r
if tokens[0] == ')':
tokens = tokens[1:]
if not top or len(tokens) == 0:
return ({'Not':[e1]}, tokens)
tokens = tmp[0:]
if tokens[0] == '(':
tokens = tokens[1:]
r = parse(tokens, False)
if not r is None:
(e1, tokens) = r
if tokens[0] == 'or':
tokens = tokens[1:]
r = parse(tokens, False)
if not r is None:
(e2, tokens) = r
if tokens[0] == ')':
tokens = tokens[1:]
if not top or len(tokens) == 0:
return ({'Or':[e1,e2]}, tokens)
tokens = tmp[0:]
if tokens[0] == '(':
tokens = tokens[1:]
r = parse(tokens, False)
if not r is None:
(e1, tokens) = r
if tokens[0] == 'and':
tokens = tokens[1:]
r = parse(tokens, False)
if not r is None:
(e2, tokens) = r
if tokens[0] == ')':
tokens = tokens[1:]
if not top or len(tokens) == 0:
return ({'And':[e1,e2]}, tokens)
def parse(tmp, top = True):
seqs = [\
('True', ['true']), \
('False', ['false']), \
('Not', ['not', '(', parse, ')']), \
('And', ['(', parse, 'and', parse, ')']), \
('Or', ['(', parse, 'or', parse, ')']) \
]
# Try each choice sequence.
for (label, seq) in seqs:
tokens = tmp[0:]
ss = [] # To store matched terminals.
es = [] # To collect parse trees from recursive calls.
# Walk through the sequence and either
# match terminals to tokens or make
# recursive calls depending on whether
# the sequence entry is a terminal or
# parsing function.
for x in seq:
if type(x) == type(""): # Terminal.
if tokens[0] == x: # Does terminal match token?
tokens = tokens[1:]
ss = ss + [x]
else:
break # Terminal did not match token.
else: # Parsing function.
# Call parsing function recursively
r = x(tokens, False)
if not r is None:
(e, tokens) = r
es = es + [e]
# Check that we got either a matched token
# or a parse tree for each sequence entry.
if len(ss) + len(es) == len(seq):
if not top or len(tokens) == 0:
return ({label:es} if len(es) > 0 else label, tokens)
 ::= 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 ::= 
 
 
 
 
 
 

def command(tokens, top = True):
seqs = {\
("Start", "start"),\
("Suspend", "suspend"),\
("Wake", "wake"),\
("Terminate", "terminate"),\
("Reboot", "reboot")\
}
for (key, value) in seqs:
if tokens[0] == value:
tokens = tokens[1:]
if not top or len(tokens) == 0:
return (key, tokens)
if tokens[0] == 'repeat':
r = command(tokens[1:], False)
if not r is None:
(e, tokens) = r
if not top or len(tokens) == 0:
return ({"Repeat": [e]}, tokens)
if tokens[0] == 'while':
r = condition(tokens[1:], False)
if not r is None:
(e1, tokens) = r
if tokens[0] == 'then':
r = command(tokens[1:], False)
if not r is None:
(e2, tokens) = r
if not top or len(tokens) == 0:
return ({"While": [e1,e2]}, tokens)
def condition(tokens, top = True):
seqs = [\
("PowerLow", ["power", "low"]),\
("TempHigh", ["temperature", "high"]),\
("UserInput", ["user", "input"])\
]
for (key, seq) in seqs:
if tokens[0] == seq[0]:
if tokens[1] == seq[1]:
tokens = tokens[2:]
if not top or len(tokens) == 0:
return (key, tokens)
condition()
function with a generic parser for base cases that contain an arbitrary number of terminals in each sequence.
Any call to condition(...)
can then be replaced with a call to parseBaseCases(seqsCondition, ...)
.
seqsCondition = [\
("PowerLow", ["power", "low"]),\
("TempHigh", ["temperature", "high"]),\
("TempVeryLow", ["temperature", "very", "low"]),\
("UserInput", ["user", "input"])\
]
def parseBaseCases(seqs, tokens, top = True):
for (key, seq) in seqs:
# Check if token sequence matches sequence.
i = 0
for terminal in seq:
if terminal == tokens[i]:
pass
else:
break
i = i + 1
# Check if the previous loop succeeded.
if i == len(seq):
tokens = tokens[len(seq):]
if not top or len(tokens) == 0:
return (key, tokens)
 ::= 
 
 
 
 
 
 
 
 
 
 

def parse(tokens):
if tokens[0] == 'true':
return ('True', tokens[1:])
if tokens[0] == 'false':
return ('False', tokens[1:])
# ...
# Recursive call, but no tokens consumed.
(e1, tokens) = parse(tokens)
if tokens[0] == 'and':
(e2, tokens) = parse(tokens[1:])
return ({'And':[e1,e2]}, tokens[1:])
# ...
 ::= 
 
 
 
 
 
 ::= 
 
 
 
 
 
 

def formula(tmp):
tokens = tmp[0:]
(e1, tokens) = left(tokens)
if tokens[0] == 'and':
(e2, tokens) = formula(tokens[1:])
return ({'And':[e1,e2]}, tokens[1:])
# ...
def left(tokens):
if tokens[0] == 'true':
return ('True', tokens[1:])
if tokens[0] == 'false':
return ('False', tokens[1:])
# ...
formula()
uses backtracking because the formula production rule has three choices that all begin with the same nonterminal left. It would be possible to perform left factoring on the formula production rule as follows, which would then make it possible to implement formula()
as a predictive parser:
 ::= 
 
 ::= 
 
 
 
 
 ::= 

[NameofInferenceRule] 

[Example] 

[AlgorithmCase] 

[AlgorithmCase] 

expressions (abstract syntax) 
⇒  evaluation algorithm 
⇒  values (abstract syntax) 
 ::= 
 
 ::= 

[True] 

[False] 

[NotTrue] 

[NotFalse] 

[AndTrueTrue] 

[AndTrueFalse] 

[AndFalseTrue] 

[AndFalseFalse] 

[OrTrueTrue] 

[OrTrueFalse] 

[OrFalseTrue] 

[OrFalseFalse] 

 = 
 
 = 
 
 = 
 
 = 
 
 = 
 
 = 
 
 = 
 
 = 
 
 = 
 
 = 

[True] 

[False] 

[Not] 

[And] 

[Or] 

vnot
, vand
, and vor
correspond to ¬, ∧, and ∨, respectively, in the operational semantics.
def vnot(v):
if v == 'True': return 'False'
if v == 'False': return 'True'
def vand(v1, v2):
if v1 == 'True' and v2 == 'True': return 'True'
if v1 == 'True' and v2 == 'False': return 'False'
if v1 == 'False' and v2 == 'True': return 'False'
if v1 == 'False' and v2 == 'False': return 'False'
def vor(v1, v2):
if v1 == 'True' and v2 == 'True': return 'True'
if v1 == 'True' and v2 == 'False': return 'True'
if v1 == 'False' and v2 == 'True': return 'True'
if v1 == 'False' and v2 == 'False': return 'False'
Node = dict
Leaf = str
def evaluate(e):
if type(e) == Node:
for label in e:
children = e[label]
if label == 'Not':
f = children[0]
v = evaluate(f)
return vnot(v)
elif label == 'And':
# Notice that we can make the recursive calls
# below in any order we like; the order of the
# calls is not specified by the operational
# semantics.
f1 = children[0]
v1 = evaluate(f1)
f2 = children[1]
v2 = evaluate(f2)
return vand(v1, v2)
elif label == 'Or':
f2 = children[1]
v2 = evaluate(f2)
f1 = children[0]
v1 = evaluate(f1)
return vor(v1, v2)
elif type(e) == Leaf:
if e == 'True':
return 'True'
if e == 'False':
return 'False'
def evaluate(e):
if type(e) == Node:
for label in e:
children = e[label]
if label == 'Not':
f = children[0]
v = evaluate(f)
return not v # Use the Python not operator.
elif label == 'And':
f1 = children[0]
v1 = evaluate(f1)
f2 = children[1]
v2 = evaluate(f2)
return v1 and v2 # Use the Python and operator.
elif label == 'Or':
f2 = children[1]
v2 = evaluate(f2)
f1 = children[0]
v1 = evaluate(f1)
return v1 or v2 # Use the Python or operator.
elif type(e) == Leaf:
if e == 'True':
return True # Use the Python True constant.
if e == 'False':
return False # Use the Python False constant.
statement sequence (abstract syntax) 
⇒  execution algorithm 
⇒  output (record of side effects) 
 ::= 
 
 ::= 
 
 ::= 

[StatementPrint] 

[StatementEnd] 

[FormulaTrue] 

[FormulaFalse] 

[FormulaNot] 

[FormulaAnd] 

[FormulaOr] 

evaluate()
from a previous example.
def execute(s):
if type(s) == Leaf:
if s == 'End':
return []
elif type(s) == Node:
for label in s:
if label == 'Print':
children = s[label]
f = children[0]
p = children[1]
v = evaluate(f) # Implemented elsewhere.
o = execute(p)
return [v] + o
print()
function and turn this into an interpreter that uses the Python runtime system directly to interpret the program. However, we must be careful about where we put the call to print()
: it must be invoked in a way that conforms to a preorder traversal of the syntax tree.
def execute(s):
if type(s) == Leaf:
if s == 'End':
return []
elif type(s) == Node:
for label in s:
if label == 'Print':
children = s[label]
f = children[0]
p = children[1]
v = evaluate(f)
print(v) # Must appear before recursive call.
o = execute(p)
return [v] + o

⇒  execution algorithm 
⇒  output (record of side effects) 
 ::= 
 
 ::= 
 
 ::= 
 
 ::= 

[StatementAssign] 

[StatementPrint] 

[StatementEnd] 

[FormulaTrue] 

[FormulaFalse] 

[FormulaNot] 

[FormulaAnd] 

[FormulaOr] 

[FormulaVariable] 

vnot
, vand
, and vor
are the same as those in a previous example.
def evaluate(env, e):
if type(e) == Node:
for label in e:
children = e[label]
if label == 'Not':
f = children[0]
v = evaluate(env, f)
return vnot(v)
elif label == 'And':
f1 = children[0]
v1 = evaluate(env, f1)
f2 = children[1]
v2 = evaluate(env, f2)
return vand(v1, v2)
elif label == 'Or':
f2 = children[1]
v2 = evaluate(env, f2)
f1 = children[0]
v1 = evaluate(env, f1)
return vor(v1, v2)
elif label == 'Variable':
x = children[0]
if x in env:
return env[x]
else:
print(x + " is unbound.")
exit()
elif type(e) == Leaf:
if e == 'True':
return 'True'
if e == 'False':
return 'False'
def execute(env, s):
if type(s) == Leaf:
if s == 'End':
return []
elif type(s) == Node:
for label in s:
if label == 'Print':
children = s[label]
f = children[0]
p = children[1]
v = evaluate(env, f)
o = execute(env, p)
return [v] + o
if label == 'Assign':
children = s[label]
x = children[0]['Variable'][0]
f = children[1]
p = children[2]
v = evaluate(env, f)
env[x] = v
o = execute(env, p)
return o

⇒  execution algorithm 
⇒ 

 ::= 
 
 
 
 
 
 
 
⋮ 
print true;
repeat twice {
print false;
end;
}
print true;
end;
[StatementAssign] 

[StatementPrint] 

[StatementRepeatTwice] 

[StatementEnd] 

evaluate()
is the same as the one in a previous example, while the implementation of execute()
from that example must be modified so that it also returns an updated version of the environment data structure.
def execute(env, s):
if type(s) == Leaf:
if s == 'End':
return (env, [])
elif type(s) == Node:
for label in s:
if label == 'Print':
children = s[label]
f = children[0]
p = children[1]
v = evaluate(env, f)
(env, o) = execute(env, p)
return (env, [v] + o)
if label == 'Assign':
children = s[label]
x = children[0]['Variable'][0]
f = children[1]
p = children[2]
v = evaluate(env, f)
env[x] = v
(env, o) = execute(env, p)
return (env, o)
if label == 'RepeatTwice':
children = s[label]
body = children[0]
rest = children[1]
env1 = env
(env2, o1) = execute(env1, body)
(env3, o2) = execute(env2, body)
(env4, o3) = execute(env3, rest)
return (env4, o1 + o2 + o3)
[StatementWhileFalse] 

[StatementWhileTrue] 

[StatementUntilTrue] 

[StatementUntilFalse] 

if label == 'While':
[cond, body, rest] = s[label]
env1 = env
v = evaluate(env1, cond)
if v == 'False':
(env2, o1) = execute(env1, rest)
return (env2, o1)
if v == 'True':
(env2, o1) = execute(env1, body)
(env3, o2) = execute(env2, {'While':[cond, body, rest]})
return (env3, o1 + o2)
program (abstract syntax) 
⇒ 

⇒  output and/or value(s) 
character string (concrete syntax) 
⇒ 
interpreter 
⇒  output and/or value(s) 


program (e.g., input/configuration, instructions, etc.) 
computing device (e.g., difference engine, modern CPU, quantum computer, etc.) 
physical laws (mechanics, electricity, chemistry, biology, quantum mechanics, etc.) 
memory


central processing unit (CPU) 
machine program  
machine languauge  

FORTRAN program  
FORTRAN compiler  
machine program  
machine languauge  

application  
highlevel program  
compiler  
machine program  
machine languauge  

user A

user B


highlevel program 
highlevel program 
highlevel program 
highlevel program 

compiler  
machine program 
machine program 
machine program 
machine program 

machine languauge  
operating system  

application  application  application  application 
operating system  
hardware 
app.  app.  
web app. platform  
browser  app.  app.  
API  API  OS  OS  
web service  web service  virtual machine  virtual machine  
dist. platform  web server  virtualization layer  
controller  controller  OS  host operating system  
node  node  node  node  node  node  node  node  server  hardware (web/cloud server) 
source language abstract syntax 
⇒  compiler  ⇒  target language abstract syntax 
source language concrete syntax 
libraries  
⇓  ⇓  
parser  linker  ⇒  target language concrete syntax 

⇓  ⇑  
source language abstract syntax 
⇒  compilation algorithm 
⇒  target language abstract syntax 
source language abstract syntax 
⇒  comp. alg. A 
⇒  IR #1 
⇒  comp. alg. B 
⇒  IR #2 
⇒  comp. alg. C 
⇒  target language abstract syntax 
 ::= 
 
  
 ::= 
 
 
 
 
 
 
 
 
 
 
 
 ::= 
 
 ::= 
 
 ::= 

# Simulate a machine running the program represented by the string s.
def simulate(s):
instructions = [l.split(" ") for l in s.split("\n")]
mem = {0: 0}
control = 0
while control < len(instructions):
inst = instructions[control]
if inst[0] == 'label':
pass
# For two arguments, test the memory address in the second
# argument; given one argument, branch unconditionally.
if inst[0] == 'branch':
if len(inst) == 2 or mem[int(inst[2])] != 0:
control = instructions.index(['label', inst[1]])
continue
if inst[0] == 'set':
mem[int(inst[1])] = int(inst[2])
if inst[0] == 'copy':
mem[int(inst[2])] = mem[int(inst[1])]
if inst[0] == 'add':
mem[0] = mem[1] + mem[2]
control = control + 1
return mem[0]
>>> p = '''
label start
set 1 2
set 2 3
branch skip 1
set 2 40
label skip
add
'''
>>> simulate(p)
5
 ::= 
 
  
 ::= 
 
 
 
 
 
 
 
 
 
 
 
 
 
 ::= 
 
 ::= 
 
 ::= 

set 3 10
set 4 20
copy
copy()
function below can be viewed as a macro that generates a specific machine language program.
def copy(frm, to):
return [\
'set 3 ' + str(frm),\
'set 4 ' + str(to),\
'copy'\
]
increment()
that takes a single argument, a memory address, and generates a machine program that increments the value at the address specified memory address. We could then implement the following macro, which will set an entire region of memory to a particular constant.
def setAllToInRegion(constant, startAddr, endAddr):
# Memory location 8 contains the constant.
# Memory location 9 contains the counter.
return [\
'set 8 ' + str(constant),\
'set 9 ' + str(startAddr),\
'label loop'\
] \
# Set second argument (destination) of copy.
+ copy(9,4)\
+ [\
# Set first argument (source) for copy.
'set 3 8',\
'copy'\
]\
# Increment the counter.
+ increment(9) \
# If counter is endAddr, break out of loop;
# otherwise, go back to the beginning of the loop.
# We check by adding endAddr to the counter.
# First argument of add.
+ copy(9,1) \
+ [\
# Second argument of add.
'set 2 ' + str(endAddr),\
'add',\
'branch loop 0'\
]
setAllToInRegion()
on specific inputs generates a machine language program for a particular region.
>>> setAllToInRegion(3, 10, 100)
['set 8 3',
'set 9 10',
'label loop',
'set 3 9',
'set 4 4',
'copy',
'set 3 8',
'copy',
'# increment() done as homework.',
'set 3 9',
'set 4 1',
'copy',
'set 2 100',
'add',
'branch loop 0']
15
). Note that #increment#
is a macro provided by the simulator and not part of the machine language itself; you cannot use #increment#
when working on an assignment because simulator provided to you will not support such macros.
 ::= 

compileFormula()
that takes as its input the parse tree, and a variable heap
that represents the highest address in memory that contains a computed value (in our case, 1
can represent true and 0
can represent false). The function returns a list of machine language instructions that computes that value, as well as the memory address where that value would be stored if anyone actually executed (or simulated) the machine language program.
def compileFormula(f, heap = 3, fresh = 0):
if type(f) == Leaf:
if f == 'True':
# Find a new memory address on the heap.
heap = heap + 1
# Generate instruction to store the integer representing 'True' on the heap.
inst = 'set ' + str(heap) + ' 1'
# Return the instruction list and "top" of the heap.
return ([inst], heap, fresh)
if f == 'False':
# Find a new memory address on the heap.
heap = heap + 1
# Generate instruction to store the integer representing 'False' on the heap.
inst = 'set ' + str(heap) + ' 0'
# Return the instruction list and "top" of the heap.
return ([inst], heap, fresh)
if type(f) == Node:
for label in f:
children = f[label]
if label == 'Not':
# Compile the subtree f to obtain the list of
# instructions that computes the value represented
# by f.
f = children[0]
(insts, heap, fresh) = compileFormula(f, heap, fresh)
# Generate more instructions to change the memory
# location in accordance with the definition of the
# 'Not' operation.
instsNot = \
["branch setZero" + str(fresh) + " " + str(heap),\
"set " + str(heap) + " 1",\
"branch finish" + str(fresh),\
"label setZero" + str(fresh),\
"set " + str(heap) + " 0",\
"label finish" + str(fresh)\
]
return (insts + instsNot, heap, fresh + 1)
if label == 'Or':
# Compile the two subtrees and get the instructions
# lists as well as the addresses in which the results
# of computing the two subtrees would be stored if someone
# were to run those machine instructions.
f1 = children[0]
f2 = children[1]
(insts1, heap2, fresh1) = compileFormula(f1, heap, fresh)
(insts2, heap3, fresh2) = compileFormula(f2, heap2, fresh1)
# Increment the heap counter so we store the
# result of computing 'Or' in a new location.
heap4 = heap3 + 1
# Add instructions that compute the result of the
# 'Or' operation.
instsOr = \
["copy " + str(heap2) + " 1",\
"copy " + str(heap3) + " 2",\
"add",\
"branch setOne" + str(fresh2) + " 0",\
"branch finish" + str(fresh2),\
"label setOne" + str(fresh2),\
"set 0 1",\
"label finish" + str(fresh2),\
"copy 0 " + str(heap4)\
]
return (insts1 + insts2 + instsOr, heap4, fresh2 + 1)
x := true;
while x {
x := false;
}
print false;
true
is represented as 1, then our compiler would first emit the instruction set 8 1 and would store inside the environment a mapping {x ↦ 8}. It would then use this information to compile the expression condition and assignment statement in the body of the loop.
 ::= 
 
 
 
 
 
 
 
  
 ::= 

procedure printProc {
print true;
print false;
}
call printProc;
print false;
call printProc;
print true;
call printProc;
statement, or to the point corresponding to the second call printProc;
statement?
call printProc;
are compiled into machine code that stores the index of the current instruction at each call point at a particular memory location (in this example, suppose it is memory address 7
). Then, the block of machine code into which the procedure body is compiled can always consult the same memory location to determine to which point it should pass control back after the machine code for the procedure body finishes executing. Because control should not go back to the goto instruction but beyond it, it may be necessary to increment the value representing the program location before using the jump instruction. We provide machine language pseudocode below that does just that.
# The procedure.
goto printProcEnd # Skip procedure body.
label printProc
set 5 1 # print true;
set 5 0 # print false;
# Return control to the last calling point.
< increment integer stored in address 7 by 2 >
jump 7
label printProcEnd
# The first call.
< copy integer from address 6 to address 7 >
goto printProc
set 5 0 # print false;
# The second call.
< copy integer from address 6 to address 7 >
goto printProc
set 5 1 # print true;
7
, making it impossible to determine the program location to which control must return after the outermost procedure body finishes. To address this, it is possible to have the machine language program maintain a stack starting at memory location 7
that will keep track of all the program locations to which control must return after each nested procedure call finishes. We do so in the machine language pseudocode below.
# Set stack pointer to top of stack at address 8.
set 7 8
# The procedure.
goto printProcEnd # Skip procedure body.
label printProc
set 5 1 # print true;
set 5 0 # print false;
# Return control to the last calling point.
< copy integer from top of stack
(i.e., integer/address contained in address 7) to address 1 >
< set address 2 to some constant
(to skip the calling machine instructions) >
< add contents of address 1 and address 2 >
jump 0
label printProcEnd
# The first call.
< increment integer in address 7 (the stack pointer) >
< copy integer from address 6 to top of stack
(i.e., address/integer specified in address 7) >
goto printProc
set 5 0 # print false;
# The second call.
< increment integer in address 7 (the stack pointer) >
< copy integer from address 6 to top of stack
(i.e., address/integer specified in address 7) >
goto printProc
set 5 1 # print true;
source language abstract syntax 
⇒ ⇐ 
optimization algorithm #1 
⇓  
compilation algorithm A 

⇓  
intermediate representation (IR) #1 
⇒ ⇐ 
optimization algorithm #2 
⇓  
compilation algoritm B 

⇓  
intermediate representation (IR) #2 
⇒ ⇐ 
optimization algorithm #3 
⇓  
compilation algorithm C 

⇓  
target language abstract syntax 
⇒ ⇐ 
optimization algorithm #4 
i = 0;
while (i < 1000000) {
i++;
}
i
corresponds to memory address 123
).
label whileLoop
< increment integer in address 123 >
< compare integer in address 123 to integer 1000000 and store result in address 200 >
branch whileLoop 200
i = 0;
while (i < 1000000) {
i++;
i++;
i++;
i++;
i++;
i++;
i++;
i++;
i++;
i++;
}
label whileLoop
< increment integer in address 123 >
< increment integer in address 123 >
< increment integer in address 123 >
< increment integer in address 123 >
< increment integer in address 123 >
< increment integer in address 123 >
< increment integer in address 123 >
< increment integer in address 123 >
< increment integer in address 123 >
< increment integer in address 123 >
< compare integer in address 123 to integer 1000000 and store result in address 200 >
branch whileLoop 200
{'Print': [
{'Plus': [
{'Mult':[{'Number':[3]}, {'Number':[4]}]},
{'Variable':['x']}
]}
]}
{'Mult':[{'Number':[3]}, {'Number':[4]}]}
can be evaluated in a finite amount of time, and then replace it with the value to which it evaluates. The overall tree would then look as follows.
{'Print': [
{'Plus': [
{'Number':[12]},
{'Variable':['x']}
]}
]}
a := 2;
b := 5;
print a + b;
f := true and false;
print f;
a := 7;
g := true;
h := false;
b := 0;
print b;
print h;
print g;
print a;
a_0 := 2;
b_0 := 5;
print a_0 + b_0;
f_0 := true and false;
print f_0;
a_1 := 7;
g_0 := true;
h_0 := false;
b_1 := 0;
print b_1;
print h_0;
print g_0;
print a_1;
[StmtFor] 

[ExpComprehension] 

highlevel source language abstract syntax 
⇒ ⇐ 
loop unrolling 
⇓  
SSA conversion  
⇓  
IR in SSA form  ⇒ ⇐ 
constant folding 
⇓  
compilation  
⇓  
lowlevel IR  ⇒ ⇐ 
register allocation 
⇓  
instruction selection 

⇓  
lowlevel target machine language abstract syntax 
source language abstract syntax 
⇒  compilation algorithm 
⇒  target language abstract syntax 
⇓  ⇓  
source language operational semantics (interpreter) 
target language operational semantics (simulator) 

⇓  ⇓  
source language abstract syntax for values/outputs 
⇒  simple conversion algorithm for values/outputs 
⇒  target language abstract syntax for values/outputs 
 = 

 ::= 

Node = dict
Leaf = str
def metric(f):
if type(f) == Leaf:
return 1
if type(f) == Node:
for label in f:
return 1 + max([metric(child) for child in f[label]])
def formulas(n):
if n <= 0:
return []
elif n == 1:
return ['True', 'False']
else:
fs = formulas(n1)
fsN = []
fsN += [{'Not':[f]} for f in fs]
fsN += [{'And':[f1,f2]} for f1 in fs for f2 in fs]
fsN += [{'Or':[f1,f2]} for f1 in fs for f2 in fs]
return fs + fsN
source language concrete syntax 
target language abstract syntax 

⇓  ⇑  
parser  ⇒  source language abstract syntax 
⇒  static analysis algorithms 
⇒  abstract syntax or IR 
⇒  compilation algorithms 
⇓  ⇓  
syntax errors 
other errors (unbound variables, type errors, etc.) 
 ::= 
 
 
 
 
 
 
 
 

 ::= 
 
 

[FormulaTrue] 

[FormulaFalse] 

[FormulaNot] 

[FormulaAnd] 

[FormulaOr] 

[TermNumber] 

[TermPlus] 

def typeCheck(e):
if type(e) == Leaf:
if e == 'True' or e == 'False':
return 'boolean'
if type(e) == Node:
for label in e:
if label == 'Number':
return 'integer'
if label == 'Not':
[e1] = e[label]
t1 = typeCheck(e1)
if t1 != 'boolean':
print("not must be applied to a boolean")
return None
return 'boolean'
# ...
if label == 'Plus':
[e1,e2] = e[label]
t1 = typeCheck(e1)
t2 = typeCheck(e2)
if t1 != 'integer' or t2 != 'integer':
print("+ requires integer arguments")
return None
return 'integer'
 ::= 
 
  
 ::= 
 
 
 
 
 
 
 
 
 
 
 
 
 
 ::= 
 
 ::= 
 
 ::= 
 
 ::= 
 
 
 
 

[StatementAssign] 

[StatementEnd] 

[Variable] 

[FormulaTrue] 

[FormulaFalse] 

[FormulaNot] 

[FormulaAnd] 

[FormulaOr] 

[TermNumber] 

[TermPlus] 

[TermMult] 

[StatementWhileEvalTrue] 

[StatementWhileType] 

 ::= 
 
 
 
  
 ::= 
 
 ::= 
 
 ::= 
 
 ::= 
 
 
 
 

[StatementAssign] 

[StatementFunction] 

[StatementEnd] 

[TermVariable] 

[TermNumber] 

[TermPlus] 

[TermApply] 

def evaluate(env, e):
if type(e) == Node:
for label in e:
children = e[label]
if label == 'Number':
n = children[0]
return n
elif label == 'Plus':
[e1,e2] = children
return evaluate(env, e1) + evaluate(env, e2)
elif label == 'Variable':
x = children[0]
value = env[x]
return value
elif label == 'Apply':
[f, eArg] = children
f = f['Variable'][0] # Unpack.
x = env[f]['Function'][2]['Variable'][0]
eBody = env[f]['Function'][3] # Body.
vArg = evaluate(env, eArg)
envF = env.copy()
envF[x] = vArg
vResult = evaluate(envF, eBody)
return vResult
def execute(env, s):
if type(s) == Leaf:
if s == 'End':
return (env, [])
elif type(s) == Node:
for label in s:
if label == 'Assign':
[var, e, p] = s[label]
x = var['Variable'][0]
env[x] = evaluate(env, e)
(env, o) = execute(env, p)
return (env, o)
if label == 'Function':
[f, ty, x, e, p] = s[label]
f = f['Variable'][0] # Unpack.
env[f] = s
(env, o) = execute(env, p)
return (env, o)
def tyExpr(env, e):
if type(e) == Node:
for label in e:
children = e[label]
if label == 'Number':
return 'TyInt'
elif label == 'Variable':
x = children[0]
return env[x]
elif label == 'Apply':
[f, eArg] = children
f = f['Variable'][0] # Unpack.
tyArg = tyExpr(env, eArg)
tyFunArg = env[f]['TyArrow'][0]
if tyArg == tyFunArg:
return env[f]['TyArrow'][1]
def tyProg(env, s):
if type(s) == Leaf:
if s == 'End':
return 'TyVoid'
elif type(s) == Node:
for label in s:
if label == 'Assign':
[x,e,p] = s[label]
x = x['Variable'][0] # Unpack.
tExpr = tyExpr(env, e)
env[x] = tExpr
tProg = tyProg(env, p)
if tExpr == 'TyInt' and tProg == 'Void':
return 'TyVoid'
if label == 'Function':
[f, tyArg, x, e, p] = s[label]
name = f['Variable'][0]
x = x['Variable'][0]
envF = env.copy()
envF[x] = tyArg
tBody = tyExpr(envF, e)
if tBody == 'TyInt':
env[name] = {'TyArrow':[tyArg, tBody]}
tProg = tyProg(env, p)
return tProg
 ::= 
 
 
 
  
 ::= 
 
 ::= 
 
 ::= 
 
 ::= 
 
 
 
 
 
 ::= 
 
 
 
[StatementAssign] 

[StatementFunction] 

[StatementEnd] 

[TermVariable] 

[TermString] 

[TermConcat] 

[TermApply] 

 ::= 
 
 
 
 
 
 
 
  
 ::= 
 
 ::= 
 
 ::= 
 
 ::= 

[ExpressionNumber] 

[ExpressionMult] 

[StatementPrint] 

[StatementFor] 

[StatementProcedure] 

[StatementCall] 

[StatementEnd] 

def timeExpr(e):
if type(e) == Node:
for label in e:
children = e[label]
if label == 'Number':
return 1
elif label == 'Mult':
[e1,e2] = children
return 1 + timeExpr(e1) + timeExpr(e2)
def timeProg(env, s):
if type(s) == Leaf:
if s == 'End':
return 0
elif type(s) == Node:
for label in s:
if label == 'Print':
[e,p] = s[label]
return 1 + timeExpr(e) + timeProg(env, p)
if label == 'For':
[var, num, p1, p2] = s[label]
n = num['Number'][0]
return n * timeProg(env, p1) + timeProg(env, p2)
# ...
 ::= 
 
 
 
 
 
 
 
  
 ::= 
 
 
 
 
 
 ::= 
 
 ::= 
 
 ::= 
 
 ::= 

[DurationMinutes] 

[DurationHours] 

[DurationDays] 

[StatementReserve] 

[StatementFor] 

[StatementProcedure] 

[StatementCall] 

[StatementEnd] 

def costDuration(e):
if type(e) == Node:
for label in e:
children = e[label]
if label == 'Minutes':
[num] = children
n = num['Number'][0]
return n * 0.01
if label == 'Hours':
[num] = children
n = num['Number'][0]
return n * 0.60
if label == 'Days':
[num] = children
n = num['Number'][0]
return n * 14.40
def costProg(env, s):
if type(s) == Leaf:
if s == 'End':
return 0
elif type(s) == Node:
for label in s:
if label == 'Reserve':
[d,p] = s[label]
return costDuration(d) + costProg(env, p)
if label == 'For':
[var, num, p1, p2] = s[label]
n = num['Number'][0]
return n * costProg(env, p1) + costProg(env, p2)
# ...
 ::= 
 
 
 
 
 
  
 ::= 
 
 ::= 
 
 ::= 
 
 ::= 

[StatementAssignSecret] 

[StatementAssignPublic] 

[StatementPrint] 

[ExpressionVariable] 

[ExpressionNumberSecret] 

[ExpressionNumberPublic] 

[ExpressionPlusSecretSecret] 

[ExpressionPlusSecretPublic] 

[ExpressionPlusPublicSecret] 

[ExpressionPlusPublicPublic] 

imperative  procedural  object oriented 
declarative  functional  static type checking 
garbage collection 
side effects 

machine languages 
some support (indexed memory, branching) 
no support  no support  no support  no support  none  no  yes  
FORTRAN (initial release) 
full support  no support  no support  no support  no support  very limited 
no  yes  
C  full support  full support  no support  no support  some support (function pointers) 
very limited 
no  yes  
C++  full support  full support  full support  class declarations, some type checking 
function pointers and some extensions (e.g., STL) 
limited  no  yes  
Java  full support  full support  full support  type system, class declarations, abstract classes, interfaces, type checking, and type inference 
generics, interfaces, objects as wrappers, anonymous classes 
extensive  yes  yes  
PHP  full support  full support  full support  no support  some support  none  yes (≥ 5.3) 
yes  
Python  full support  full support  full support  no support  full support  none  yes  yes  
JavaScript  full support  full support  full support  no support  full support  none  yes  yes  
Haskell  metasupport (monads) 
metasupport (functions) 
no support  full support  full support  extensive  yes  some (IO monads) 

SQL  some support (insertion/ deletion; sequences of statements in versions like MSSQL) 
some support (procedures in versions like MSSQL) 
no support  full support (definition of schemas and tables; ability to evaluate queries over tables) 
some support (custom map and aggregation procedures in MSSQL) 
some  yes  some  
Amazon Web Services APIs 
some support  no support  no support  some support  some support  none  n/a  yes 
 ::= 
 
 ::= 

 = 

 = 

 = 

 = 

 = 

 ≠ 

 ::= 
 
 ::= 

 = 

 = 
 
 = 
 
 = 
 
 = 
 
 = 
 
 = 
 
==
operator), and Haskell supports the automatic derivation of this algorithm for any data type (using the deriving Eq
directive).
>>> (a,(b,c),(d,(e,f),g)) = (1,(2,3),(4,(5,6),7))
>>> (a,b,c,d,e,f,g)
(1,2,3,4,5,6,7)
module Example where
c = 5;
g (0) = 0;
g (x) = f(x) + f(x);
f (x) = 2 * x + 1;
*> c + c
10
*> f(4)
9
module AnotherExample where
f (1) = 1;
f (0) = 0;
f (1) = 1;
(a,(b,(c,d),e)) = (1,(2,(3,4),5));
g([x,y,z]) = [z,y,x];
*> f(1)
1
*> d
4
*> g([1,2,3])
[3,2,1]
module Tree where
data Tree = Leaf  Node Tree Tree;
data Tree = Leaf  Tree `Node` Tree;
Tree
algebraic data type. However, in order to actually display instances of the data type, it is necessary to ask the Haskell interpreter to derive a definition of a show
function automatically from the definition of the algebraic data type (something that is done implicitly in, for example, Python). Haskell requires that this be requested explicitly because it also allows users to define their own variants of such functions.
module Tree where
data Tree =
Leaf
 Node Tree Tree
deriving Show;
*> Leaf  We represented this using "Leaf" in Python.
Leaf
*> Node Leaf Leaf  We represented this using {"Node":["Leaf", "Leaf"]} in Python.
Node Leaf Leaf
*> Leaf `Node` Leaf
Node Leaf Leaf
*> Leaf `Node` ((Leaf `Node` Leaf) `Node` (Leaf `Node` Leaf))
Node Leaf (Node (Node Leaf Leaf) (Node Leaf Leaf))
 Count the number of nodes in a tree that have exactly one nonleaf child.
 The spaces between the ( ... ) are just for legibility.
nodesWithOneLeaf (Node Leaf Leaf) = 0;
nodesWithOneLeaf (Node Leaf t ) = 1 + nodesWithOneLeaf(t);
nodesWithOneLeaf (Node t Leaf) = 1 + nodesWithOneLeaf(t);
nodesWithOneLeaf (Node t1 t2 ) = nodesWithOneLeaf(t1) + nodesWithOneLeaf(t2);
nodesWithOneLeaf (Leaf ) = 0;
module Formula where
data Formula =
T
 F
 Not Formula
 And Formula Formula
 Or Formula Formula
deriving Show;
vNot (T) = F;
vNot (F) = T;
vAnd (T, T) = T;
vAnd (T, F) = F;
vAnd (F, T) = F;
vAnd (F, F) = F;
vOr (T, T) = T;
vOr (T, F) = T;
vOr (F, T) = T;
vOr (F, F) = F;
eval (T ) = T;
eval (F ) = F;
eval (Not f ) = vNot (eval f);
eval (And f1 f2) = vAnd (eval f1, eval f2);
eval (Or f1 f2) = vOr (eval f1, eval f2);
_
) in patterns makes it possible to write even more concise definitions.
vAnd (T, T) = T;
vAnd _ = F;
vOr (F, F) = F;
vOr _ = T;
eval (Not f ) = vNot (eval f);
eval (And f1 f2) = vAnd (eval f1, eval f2);
eval (Or f1 f2) = vOr (eval f1, eval f2);
eval (f) = f;
==
. To have the Haskell interpreter or compiler derive it automatically, you can use deriving Eq
. These can be combined with other deriving
directives, such as in the example below.
data Formula =
T
 F
 Not Formula
 And Formula Formula
 Or Formula Formula
deriving (Eq, Show);
 ::= 
 
 
 
 
 
 
 
 
 
 
 
 
 
⋮  
 
 
 
 
 
 
⋮  
[Integer]
.
list :: [Integer]
list = [1,2,3]
Tree
data type that we have already seen. If we were to define our own algebraic data type for lists in Haskell, for example, we could do so as follows (the terms "nil" and "cons" are commonly used in the functional language community for the empty list and for the list node constructor, respectively):
data List = Nil  Cons Integer List
example :: List
example = Cons 1 (Cons 2 (Cons 3 Nil))
[]
, and the list node constructor is the operator (:)
, which can also be written as an infix operator :
. The following three syntaxes are equivalent, and evaluate to the same list [1,2,3]
:
example1 :: [Integer]
example1 = (:) 1 ((:) 2 ((:) 3 []))
example2 :: [Integer]
example2 = 1 : (2 : (3 : []))
example3 :: [Integer]
example3 = [1,2,3]
*> [1..4]
[1,2,3,4]
type Distance = Integer
String
; strings in Haskell are just lists of characters:
type String = [Char]
example1 :: String
example1 = "abcdefg"
example2 :: String
example2 = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
GameStrategyAlgorithm
is equivalent to the type [[(Integer, Integer)]] > String
(i.e., the type for functions that take lists of lists of integer pairs and return a string):
type GameState = [(Integer, Integer)]
type Game = [GameState]
type Strategy = String
type GameStrategyAlgorithm = Game > Strategy
1
in every position:
infinite :: [Integer]
infinite = [1] ++ infinite
(:)
list node constructor:
infinite :: [Integer]
infinite = 1 : infinite
*> take 10 [5..]
[5,6,7,8,9,10,11,12,13,14]
n
elements of a list (note that this function is already included in the Prelude
; we use take_
for our examples to avoid a name collision).
take_ :: (Integer, [Integer]) > [Integer]
take_ (0, xs ) = []
take_ (n, (x:xs)) = x : take (n1, xs)
take
.
*> take_ (10, infinite)
[1,1,1,1,1,1,1,1,1,1]
addInfiniteLists :: ([Integer], [Integer]) > [Integer]
addInfiniteLists (x:xs, y:ys) = (x + y) : addInfiniteLists (xs, ys)
addInfiniteLists
function to add two infinite lists. Suppose we have the following definitions:
ones = 1 : ones
twos = 2 : twos
3
:
*> take 10 (addInfiniteLists (ones, twos))
[3,3,3,3,3,3,3,3,3,3]
f :: Integer > Integer
f (x) = x + 1
g :: Integer > (Integer > Integer)
g (y) = f
g
is a function that takes an integer as an argument. It then throws this argument away and returns the function f
. Its type reflects this. Thus, the result of evaluating an expression such as g(4)
is a function of type Integer > Integer
, and this function can be applied to another argument.
*> :t g(1)
g(1) :: Integer > Integer
*> :t g(1)(5)
g(1)(5) :: Integer
*> g(1)(5)
6
g
that can be partially applied to only some of its arguments, but that still uses all of its arguments? We can do so by listing multiple, spaceseparated parameters in the definition of g
.
g :: Integer > (Integer > Integer)
g (y) (x) = y + x
g
to arguments one at a time, as before.
*> :t g(3)
g(3) :: Integer > Integer
*> :t g(3)(4)
g(3)(4) :: Integer
*> g(3)(4)
7
data A = A
data B = B
data C = C
data D = D
data E = E
f :: A > (B > (C > (D > E)))
f a b c d = E
*> :t f
f :: A > (B > (C > (D > E)))
*> :t f(A)
f(A) :: B > (C > (D > E))
*> :t f(A)(B)
f(A)(B) :: C > (D > E)
*> :t f(A)(B)(C)
f(A)(B)(C) :: D > E
*> :t f(A)(B)(C)(D)
f(A)(B)(C)(D) :: E
f
of type Integer > Integer
and returns a function f'
that is its reflection across the xaxis (i.e., for all x
, f(x)
= f'(x)
).
reflect :: (Integer > Integer) > Integer > Integer
reflect (f) (x) =  f(x)
(Integer > Integer) > (Integer > Integer)
because >
is rightassociative. This makes it more obvious that reflect
takes a function of type Integer > Integer
as an input and returns a function of type Integer > Integer
as an output.
f
of type Integer > Integer
and returns an integer approximation of its definite integral starting at x = 0.
integral :: (Integer > Integer) > Integer > Integer
integral (f) (x) = sum [ f(k)  k < [0..x] ]
f
of type Double > Double
and returns an approximation of its definite integral starting at x = 0 for a given precision.
integral :: Double > (Double > Double) > Double > Double
integral (i) (f) (x) = sum [ f(k*i)*i  k < [0..(x/i)] ]
f
of type Double > Double
and returns an approximation of its derivative.
derivative :: Double > (Double > Double) > Double > Double
derivative (h) (f) (x) = (f(x + h)  f(x)) / h
user()
and owns()
either as tables in a database (with each declaration for the function acting as a row in the table) or as logical predicates.
data Constant = Alice  Bob  X  Y  Z deriving (Eq, Show)
 Define which constants are a file.
file (X) = True
file (Y) = True
file (Z) = True
file (_) = False
 Define which constants are a user.
user (Alice) = True
user (Bob) = True
user (_) = False
 Specify which users are administrators.
admin(Alice) = True
admin(Bob) = False
 Specify file ownership relation.
owns (Alice, X) = True
owns (Bob, Y) = True
owns (Bob, Z) = True
owns (_, _) = False
 Only owners and administrators can write files.
write (user', file') =
( owns(user', file')  admin(user'))
&& user(user')
&& file(file')
 Anyone can read files.
read (user, file) = True
file()
and user()
must take place when the program is running. We can make the code more efficient by instead declaring distinct types. Since type checking is performed only at compile time in Haskell, this will still ensure that there are no type errors, but it will be more efficient because fewer function invocations will occur when the program is running.
data User = Alice  Bob deriving (Eq, Show)
data File = X  Y  Z deriving (Eq, Show)
admin :: User > Bool
admin (Alice) = True
admin (Bob) = False
owns :: (User, File) > Bool
owns (Alice, X) = True
owns (Bob, Y) = True
owns (Bob, Z) = True
owns (_, _) = False
write :: (User, File) > Bool
write (user, file) = owns(user, file)  admin(user)
read :: (User, File) > Bool
read (user, file) = True
data Meters = Meters Integer
data Feet = Feet Integer
data Action = KeepGoing  SlowDown
updateVelocity :: Feet > Action
updateVelocity (Feet n) = if n < 100 then SlowDown else KeepGoing
0
, 1
, 2
, and so on) and builtin operators (such as +
, *
, and so on).
instance Num Meters where
fromInteger n = Meters n
(Meters x) + (Meters y) = Meters (x + y)
Meters
.
*> 1 + 2 + 3 :: Meters
Meters 6
Meters
to strings.
instance Show Meters where
show (Meters n) = show n ++ " meters"
*> 1 + 2 + 3 :: Meters
6 meters
data Term =
Number Integer
 Plus Term Term
deriving Show
data Formula =
T
 F
 Not Formula
 And Formula Formula
 Or Formula Formula
 Equal Term Term
 LessThan Term Term
deriving Show
data Statement =
Print Term Statement
 If Formula Statement Statement
 End
deriving Show
(<<<) :: Term > Term > Formula
(<<<) t1 t2 = LessThan t1 t2
(===) :: Term > Term > Formula
(===) t1 t2 = Equal t1 t2
(&&&) :: Formula > Formula > Formula
(&&&) f1 f2 = And f1 f2
() :: Formula > Formula > Formula
() f1 f2 = Or f1 f2
instance Num Term where
fromInteger n = Number n
t1 + t2 = Plus t1 t2
$
syntax to easily write down abstract syntax trees.
parseTree =
Print 5 $
Print 6 $
If ((5 <<< 6) &&& (6 <<< 7)) (
Print 6 $
Print 7 $
End
) $
End
parseTree
will yield the actual algebraic data type instance (i.e., a parse tree).
*> parseTree
Print (Number 5) (Print (Number 6) (
If
(And (LessThan (Number 5) (Number 6)) (LessThan (Number 6) (Number 7)))
(Print (Number 6) (Print (Number 7) End)) End)
)
if ... then ... else ...
for writing conditional expressions:
*> if True then 1 else 2
1
*> if False && True then "Yes" else "No"
"No"
if' :: (Bool, Integer, Integer) > Integer
if' (True, trueBranch, falseBranch) = trueBranch
if' (False, trueBranch, falseBranch) = falseBranch
if'' :: Bool > Integer > Integer > Integer
if'' True trueBranch _ = trueBranch
if'' False _ falseBranch = falseBranch
if ... then ... else ...
syntax (or function, as we have seen above) can be useful in certain situations.
String
and a value (e.g., an Integer
). For example:
[("x",1), ("y", 2)] :: [(String, Integer)]
lookup' :: String > [(String, Integer)] > Integer
lookup' x ((x',i) : rest) = if x == x' then i else lookup' x rest
lookup
. However, since it returns a result that is wrapped in a Maybe
constructor, we can unwrap it using, e.g.:
unwrap :: Maybe Integer > Integer
unwrap (Just n) = n
Maybe
in more detail in a subsequent section.
data Formula =
T
 F
 Variable String
 Xor Formula Formula
deriving Show
/=
on booleans):
eval :: [(String, Bool)] > Formula > Bool
eval env (T ) = True
eval env (F ) = False
eval env (Variable x) = lookup' x env
eval env (Xor f1 f2 ) = eval env f1 /= eval env f2
*> eval [("x", True), ("y", False), ("z", True)] (Xor (Variable "x") (Variable "y"))
True
data Color = Red  Green  Blue
Red
has the lowest frequency and Blue
has the highest frequency, we can define an ordering on all instances of this data type by making the Color
type a member of the Ord
type class. Note that we could use wildcards (_
) to make the definition more concise.
instance Ord Color where
Red <= Red = True
Red <= Green = True
Red <= Blue = True
Green <= Green = True
Green <= Blue = True
Blue <= Blue = True
_ <= _ = False
<
, <=
, >
, and >=
to compare colors, as well as library functions such as max :: Color > Color > Color
, min :: Color > Color > Color
, maximum :: [Color] > Color
, and minimum :: [Color] > Color
.
data IntegerTree = Node IntegerTree IntegerTree  Leaf Integer
ints :: IntegerTree > [Integer]
ints (Node t1 t2) = ints t1 ++ ints t2
ints (Leaf n ) = [n]
type Function = Integer > Integer
compose :: Function > Function > Function
compose
?
compose f g = ?
compose
; let's substitute one the rightmost Function
synonym with its definition:
compose :: Function > Function > (Integer > Integer)
>
type operator is rightassociative; this means we can remove the rightmost set of parentheses without changing the meaning of the type:
compose :: Function > Function > Integer > Integer
compose
: it takes two Function
arguments, and one Integer
argument:
compose :: Function > Function > Integer > Integer
compose f g x = f(g(x))
(.)
. So we could also have defined compose
as follows:
compose :: Function > Function > Function
compose f g = f . g
*> [x+1  x < [0..4]]
[1,2,3,4,5]
*> [(x,y)  x < [1..3], y < [1..2]]
[(1,1),(1,2),(2,1),(2,2),(3,1),(3,2)]
*> [(x,y)  x < [1..3], y < [1..2], x + y == 3, x >= 1]
[(1,2),(2,1)]
<
can be any pattern. If a pattern appears on the lefthand side of <
, then each element in the list on the righthand side will be checked against the pattern. Only those elements that unify with the pattern will be considered, and for each of them, the subsequent conditions in the comprehension, as well as the body of the comprehension, will be evaluated.
data Tree =
Node Tree Tree
 Leaf
deriving Show
example :: [Tree]
example = [
Node Leaf Leaf,
Node (Node Leaf Leaf) Leaf,
Node Leaf (Node Leaf Leaf),
Node Leaf (Node (Node Leaf Leaf) Leaf)
]
example
. The second expression builds a list containing the right subtrees that are not leaves. The last expression builds a list of subtree pairs for each nonleaf tree.
*> [r  Node _ r < example]
[Leaf, Leaf, Node Leaf Leaf, Node (Node Leaf Leaf) Leaf]
*> [r  Node _ r < example, r /= Leaf]
[Node Leaf Leaf, Node (Node Leaf Leaf) Leaf]
*> [(l,r)  Node l r < example]
[(Leaf, Leaf), (Node Leaf Leaf, Leaf), (Leaf, Node Leaf Leaf), (Leaf, Node (Node Leaf Leaf) Leaf)]
()
for each matching entry and then checking the length of the list resulting from the comprehension, or by comparing it to the empty list.
*> length [()  Node _ _ < example] > 0
True
*> [()  Node _ _ < example] /= []
True
data Cell = E  X  O deriving (Eq, Show)
type Pos = (Integer, Integer)
data Board = Board [(Pos, Cell)] deriving Eq
instance Show Board where
show (Board pcs) = show [c  (p,c) < pcs]
start :: Board
start = Board [((r,c),E)  r < [1..3], c < [1..3]]
*> start
[E,E,E,E,E,E,E,E,E]
X
or O
somewhere on the board. We can model this using a function that, given a move, transforms one board into another board. We use a list comprehension that updates only the cell with the corresponding position.
move :: ((Pos, Cell), Board) > Board
move ((pos', cell'), Board pcs) = Board [(pos, if pos' == pos then cell' else cell)  (pos, cell) < pcs]
X
and O
players take alternating turns.
turns :: [Cell]
turns = X:O:turns
Board
), and can have any number of child trees (including zero).
data DecisionTree = Move Board [DecisionTree] deriving (Eq, Show)
nextMoves
takes a Board
and a Cell
(to indicate which player is taking a turn), and uses a list comprehension to choose only the board cells that are empty. For each empty cell, it adds a new board state to the list in which that position is filled with the specified player's symbol.
nextMoves :: (Board, Cell) > [Board]
nextMoves (Board pcs, cell) = [move ((p, cell), Board pcs)  (p, E) < pcs]
tree :: Board > [Cell] > DecisionTree
tree b (turn:turns) = Move b [tree b' turns  b' < nextMoves (b, turn)]
win :: Cell > Board > Bool
that checks if a given board is already a win for a particular player. We can then add an Ord
instance to compare boards from player X
's perspective: a board is "greater" (or "better") than another board if either the board is a win for X
, or the other board is a loss for O
.
instance Ord Board where
b <= b' = win b' X  win b O
<
, <=
, >
, and >=
to compare boards, as well as library functions such as max :: Board > Board > Board
and maximum :: [Board] > Board
to choose the "best" boards.
len
for computing lengths of lists of integers.
len :: [Integer] > Integer
len [] = 0
len ((:) x xs) = 1 + len xs
Bool
values because the type [Integer]
does not unify with the type [Bool]
. We could write an alternative type to address this.
len :: [Bool] > Integer
len [] = 0
len ((:) x xs) = 1 + len xs
len
on lists of type [Bool]
, but we can no longer use it on lists of type [Integer]
. Notice that the only thing that has changed is the type of the function; the body is the same in both cases. Haskell allows us to take advantage of this by employing a type variable.
len :: [a] > Integer
len [] = 0
len ((:) x xs) = 1 + len xs
len
function to a list containing any type of element. This is because the type variable a
unifies with any other type (including Bool
and Integer
), so the input type [a]
will unify with both [Integer]
and [Bool]
, as well as types such as [String > String]
(a list of functions that take String
inputs and return String
outputs).
*> len [True, False, True]
3
*> len [1, 2, 3, 4, 5]
5
*> len [\s > s ++ ".", \t > t ++ ";"]
2
len
is a function that can be applied to different input types, but has only a single definition that works for all input types, it is an example of parametric polymorphism.
Set
data structure for holding sets of Integer
values that has an underlying list implementation.
module IntegerSet where
data Set =
Empty
 Set [Integer]
deriving Show
insert :: Set > Integer > Set
insert (Set l) x = Set (x:l)
insert (Empty) x = Set [x]
contains :: Integer > Set > Bool
contains x (Empty) = False
contains x (Set l) = x `elem` l
module SetClass where
class IsSet s where
empty :: s
insert :: s > Integer > s
contains :: Integer > s > Bool
inserts :: s > [Integer] > s
inserts s (x:xs) = inserts (insert s x) xs
inserts s [] = s
module IntegerSetList where
import SetClass
data IntegerSetList =
Empty
 Set [Integer]
deriving Show
instance IsSet IntegerSetList where
empty = Empty
insert (Set l) x = Set (x:l)
insert (Empty) x = Set [x]
contains x (Empty) = False
contains x (Set l) = x `elem` l
module IntegerSetTree where
import SetClass
data IntegerSetTree =
Leaf
 Node Integer IntegerSetTree IntegerSetTree
deriving Show
instance IsSet IntegerSetTree where
empty = Leaf
insert (Leaf ) x = Node x Leaf Leaf
insert (Node y s s') x =
if x < y then
Node y (insert s x) s'
else
Node y s (insert s' x)
contains x (Leaf ) = False
contains x (Node y s s') =
if y == x then
True
else if x < y then
contains x s
else
contains x s'
Type > Type
, and so on).
Set
data structure for holding sets of values of any type a
.
module Set where
data Set a =
Empty
 Set [a]
deriving Show
insert :: Set a > a > Set a
insert (Set l) x = Set (x:l)
insert (Empty) x = Set [x]
contains :: Eq a => a > Set a > Bool
contains x (Empty) = False
contains x (Set l) = x `elem` l
module SetClass where
class IsSet s where
empty :: s a
insert :: Ord a => s a > a > s a
contains :: (Ord a, Eq a) => a > s a > Bool
inserts :: Ord a => s a > [a] > s a
inserts s (x:xs) = inserts (insert s x) xs
inserts s [] = s
Ord a
constraint on the element type a
in case we want to compare them (e.g., if we are using a binary search tree implementation of sets).
module SetList where
import SetClass
data SetList a =
Empty
 Set [a]
deriving Show
instance IsSet SetList where
empty = Empty
insert (Set l) x = Set (x:l)
insert (Empty) x = Set [x]
contains x (Empty) = False
contains x (Set l) = x `elem` l
module SetTree where
import SetClass
data SetTree a =
Leaf
 Node a (SetTree a) (SetTree a)
deriving Show
instance IsSet SetTree where
empty = Leaf
insert (Leaf ) x = Node x Leaf Leaf
insert (Node y s s') x =
if x < y then
Node y (insert s x) s'
else
Node y s (insert s' x)
contains x (Leaf ) = False
contains x (Node y s s') =
if y == x then
True
else if x < y then
contains x s
else
contains x s'
Set
data structure for holding sets of values of any type a
that has a corresponding size :: Set a > Int
function.
module Set where
data Set a =
Empty
 Set [a]
deriving (Eq, Show)
size :: Set a > Int
size (Set l) = length l
size (Empty) = 0
<
operator by making the type Set a
(for any possible a
) a member of the Ord
type class.
instance Eq a => Ord (Set a) where
s < s' = size s < size s'
a
must be in the Eq
type class. This is because the definition of Set a
contains a deriving Eq
clause, which means that a type Set a
can only be used in a module if the parameter type a
is a member of the Eq
type class (otherwise, Haskell would not be able to compare two sets for equality using the automatically derived definition of (==)
generated by the deriving Eq
clause).
Integer
lists that does nothing.
fold :: [Integer] > [Integer]
fold [] = []
fold (x:xs) = x : fold xs
(:)
and []
) in the input list with a binary operator and a base case (in this case, (+)
and 0
). This way, the fold
function now computes the sum of the elements in the list.
fold :: [Integer] > Integer
fold [] = 0
fold (x:xs) = x + fold xs
(:)
, so it must be of type Integer > Integer > Integer
, and a replacement for []
, which must be of type Integer
.
fold :: (Integer > Integer > Integer) > Integer > [Integer] > Integer
fold f b [] = b
fold f b (x:xs) = f x (fold f b xs)
fold
implementation to define many common functions on lists of integers.
sum :: [Integer] > Integer
sum = fold (+) 0
product :: [Integer] > Integer
product = fold (*) 1
maximum :: [Integer] > Integer
maximum = fold max 0  Assumes nonnegative integers.
minimum :: [Integer] > Integer
minimum l = fold min (fold max 0 l) l
(:)
and []
) is called foldr
.
foldr :: (a > b > b) > b > [a] > b
foldr f base [] = base
foldr f base (x:xs) = f x (foldr f base xs)
foldr
function can be used to implement many of the usual library functions one might find useful when working with lists, and this is what is done in the Haskell libraries. Below are some examples (some of these are simplified versions of what is actually found in the libraries).
sum :: Num a > [a] > a
sum xs = foldr (+) 0 xs
max :: Ord a => a > a > a
max x y = if x > y then x else y
maximum :: Ord a => [a] > a
maximum xs = foldr max 0 xs
1
to every element in an Integer
list. For example, we want to achieve the following using a call to foldr
:
*> [x + 1  x < [1,2,3,4,5]]
[2,3,4,5,6]
1
to its first argument, then builds the list node:
addOneThenCons :: Integer > [Integer] > [Integer]
addOneThenCons x xs = (x + 1) : xs
foldr
:
*> foldr addOneThenCons [] [1,2,3,4,5]
[2,3,4,5,6]
addOneThenCons
:
*> foldr (\x xs > (x+1) : xs) [] [1,2,3,4,5]
[2,3,4,5,6]
*> [x  x < [3,2,1,0,1,2,3], x > 0]
[1,2,3]
consIfPositive :: Integer > [Integer] > [Integer]
consIfPositive x xs = if x > 0 then x : xs else xs
foldr
:
*> foldr consIfPositive [] [3,2,1,0,1,2,3]
[1,2,3]
consIfPositive
:
*> foldr (\x xs > if x > 0 then x : xs else xs) [] [3,2,1,0,1,2,3]
[1,2,3]
qsort :: [Integer] > [Integer]
qsort [] = []
qsort (x:xs) = qsort [y  y < xs, y < x] ++ [x] ++ qsort [y  y < xs, y >= x]
fold :: (a > a > a) > a > [a] > a
fold f b [] = b
fold f b [x] = x
fold f b xs =
fold f b (take (length xs `div` 2) xs)
`f`
fold f b (drop (length xs `div` 2) xs)
(Name, Age)
:
++
 People 
++
 Name  Age 
+++
 Alice  20 
 Bob  17 
 Carl  23 
+++
Name
entries in the table; this query amounts to a map operation:
> SELECT Name from People
++
 Name 
++
 Alice 
 Bob 
 Carl 
++
Age
range; this query amounts to a filter operation:
> SELECT * from People where Age >= 18
++
 Name  Age 
+++
 Alice  20 
 Carl  23 
+++
Age
fields of all the entries; this query amounts to fold operation (it is often called an aggregation operation):
> SELECT SUM(Age) from People
++
 Age 
++
 60 
++
type Name = String
type Age = Integer
type Table = [(Name, Age)]
people = [("Alice", 20), ("Bob", 17), ("Carl", 23)]
*> [name  (name, age) < people]
["Alice", "Bob", "Carl"]
*> [(name, age)  (name, age) < people, age >= 18]
[("Alice", 20), ("Carl", 23)]
*> foldr (+) 0 [age  (name, age) < people]
60
map
, filter
, foldr
, and λ abstractions instead of list comprehensions (the Haskell compiler simply converts list comprehensions into some combination of these during compilation):
*> map (\(name, age) > name) people
["Alice", "Bob", "Carl"]
*> map fst people
["Alice", "Bob", "Carl"]
*> filter (\(name, age) > age >= 18) people]
[("Alice", 20), ("Carl", 23)]
*> foldr (+) 0 (map snd people)
60
>>> People = [("Alice", 20), ("Bob", 17), ("Carl", 23)]
>>> [name for (name, age) in People]
["Alice", "Bob", "Carl"]
>>> [(name, age) for (name, age) in People if age >= 18]
[('Alice', 20), ('Carl', 23)]
>>> sum([age for (name, age) in People])
60
map
, filter
, reduce
(similar to Haskell's foldr
), and lambda abstractions:
>>> list(map((lambda person: person[0]), People))
['Alice', 'Bob', 'Carl']
>>> list(filter((lambda person: person[1] >= 18), People))
[('Alice', 20), ('Carl', 23)]
>>> from functools import reduce
>>> reduce(lambda x,y: x + y, [age for (name, age) in People])
60
li
elements and updates the text content of each to indicate the item number corresponding to that element. Notice that the .text()
function takes a function as an argument (the function supplied as an argument takes an index as an input and returns a string):
$("ul li").text(
function( index ) {
return "item number " + ( index + 1 );
}
);
.each()
function.
Tree a
:
data Tree a =
Node a (Tree a) (Tree a)
 Leaf
deriving Show
foldTree
function for values of type Tree a
, we first list all the constructors for Tree a
(we could also get this information using the :t
command in GHCi):
Node :: a > Tree a > Tree a > Tree a
Leaf :: Tree a
Node
and Leaf
in a tree with different value or function (for example, let's call Node
's replacement n
and Leaf
's replacement l
); this means that the replacement values and functions must have a type that has the same structure (i.e., they must take the same number of arguments) as the constructors they replace. However, they will fold into some new type of value b
, so we replace every instance of Tree a
with the new result type b
:
n :: a > b > b > b
l :: b
foldTree
function to take the above two arguments, and then the tree that is being folded as the last argument. Every Node
will be replaced with n
and every Leaf
will be replaced with l
. Notice that in the case of Node
, we also recursively fold the trees, since we need to turn them into the result type b
before we can apply n :: a > b > b > b
.
foldTree :: (a > b > b > b) > b > Tree a > b
foldTree n l (Node x t1 t2) = n x (foldTree n l t1) (foldTree n l t2)
foldTree n l Leaf = l
mapTree
function is just a special case of fold in which n x t1 t2 = Node (f x) t1 t2
for some function f
, and where l = Leaf
, since we want to change each value of type a
inside the tree, but not the nodes of the tree themselves. This also means we no longer need n
and l
as arguments; we only need f
.
mapTree :: (a > c) > (Tree a > Tree c)
mapTree f (Node x t1 t2) = Node (f x) (mapTree f t1) (mapTree f t2)
mapTree f Leaf = Leaf
mapTree f = foldTree (\x > Node (f x)) Leaf
(and we could have defined mapTree
in this way):
mapTree f = foldTree (\x > Node (f x)) Leaf
Node
is partially applied to its result, which means Node (f x)
is a function still waiting for two more arguments (the two subtrees). Alternatively but equivalently, we could have written the below definition:
mapTree f t = foldTree (\x t1 t2 > Node (f x) t1 t2) Leaf t
data Formula =
T
 F
 Not Formula
 And Formula Formula
 Or Formula Formula
deriving Show
Formula
(we could also get this information using the :t
command in GHCi):
T :: Formula
F :: Formula
Not :: Formula > Formula
And :: Formula > Formula > Formula
Or :: Formula > Formula > Formula
And
's replacement a
and T
's replacement t
); this means that the replacement values and functions must have a type that has the same structure (i.e., they must take the same number of arguments) as the constructors they replace. However, they will fold into some new type of value b
, so we replace every instance of Formula
with the new result type b
:
t :: b
f :: b
n :: b > b
a :: b > b > b
o :: b > b > b
foldFormula
function to take the above five arguments, and then the formula tree that is being folded as the last argument.
foldFormula :: b > b > (b > b) > (b > b > b) > (b > b > b) > Formula > b
foldFormula t f n a o (T ) = t
foldFormula t f n a o (F ) = f
foldFormula t f n a o (Not formula) = n (foldFormula t f n a o formula)
foldFormula t f n a o (And f1 f2 ) = a (foldFormula t f n a o f1) (foldFormula t f n a o f2)
foldFormula t f n a o (Or f1 f2 ) = o (foldFormula t f n a o f1) (foldFormula t f n a o f2)
Bool
result, and one that evaluates the tree as a native Haskell Int
result:
evalAsBool :: Formula > Bool
evalAsBool = foldFormula True False not (&&) ()
evalAsInt :: Formula > Int
evalAsInt = foldFormula 1 0 (() 1) (*) max
foldFormula
function acts as a form of encapsulation for the Formula
data structure. Suppose we want to change the data type definition to the following (using a terminology for formulas that corresponds to terminology from the study of logic):
data Formula =
Top
 Bottom
 Neg Formula
 Conj Formula Formula
 Disj Formula Formula
deriving Show
foldFormula
once, and would not need to change our implementations of evalAsBool
and evalAsInt
at all.
foldFormula :: b > b > (b > b) > (b > b > b) > (b > b > b) > Formula > b
foldFormula t f n a o (Top ) = t
foldFormula t f n a o (Bottom ) = f
foldFormula t f n a o (Neg formula ) = n (foldFormula t f n a o formula)
foldFormula t f n a o (Conj f1 f2 ) = a (foldFormula t f n a o f1) (foldFormula t f n a o f2)
foldFormula t f n a o (Disj f1 f2 ) = o (foldFormula t f n a o f1) (foldFormula t f n a o f2)
neg :: Integer > Integer
neg n = 1 * n
log2 :: Integer > Integer
log2 1 = 0
log2 2 = 1
log2 4 = 2
log2 _ = 1  Undefined in all other cases.
log2
is that the output 1
is ambiguous: it may be an actual result or an error. We can document that this result is exceptional by introducing a new parametric polymorphic data type that allows us to return either a result or a constructor indicating that there is no result. In the Haskell Prelude
, the Maybe a
data type is defined for such situations (although we could create our own, as well).
data Maybe a = Just a  Nothing
log2
.
log2 :: Integer > Maybe Integer
log2 1 = Just 0
log2 2 = Just 1
log2 4 = Just 2
log2 _ = Nothing
neg
directly to the result of log2
because Maybe Integer
and Integer
do not unify, so the Haskell type inference rule for function application is not satisfied. To address this, we can create a parametric polymorphic map function for Maybe a
.
mapMaybe :: (a > b) > (Maybe a > Maybe b)
mapMaybe f (Just x ) = Just (f x)
mapMaybe f (Nothing) = Nothing
neg
to an input of type Maybe Integer
.
*> (mapMaybe neg) (log2 (2))
Nothing
*> (mapMaybe neg) (log2 4)
Just (2)
mapMaybe
can be described as "lifting" a function of type a > b
to a more powerful function Maybe a > Maybe b
, another common name for the map operation is lift.
liftMaybe :: (a > b) > (Maybe a > Maybe b)
liftMaybe = mapMaybe
*> :t (liftMaybe log2) (log2 (2))
Maybe (Maybe Integer)
joinMaybe :: Maybe (Maybe a) > Maybe a
joinMaybe (Just (Just x)) = Just x
joinMaybe (Just Nothing ) = Nothing
joinMaybe (Nothing ) = Nothing
Maybe a
.
*> joinMaybe $ (liftMaybe log2) (log2 (2))
Nothing
type Noun = String
type Adjective = String
type Verb = String
type Adverb = String
data NounPhrase = AN Adjective NounPhrase  N Noun
data VerbPhrase = AV Adverb VerbPhrase  V Verb
data Sentence = S NounPhrase VerbPhrase
 = 

 = 

"cat"
is the subject?
 ::= 
 
 
 
 

data Tree =
Red Tree Tree
 Blue Tree Tree
 Green Integer
[???] 

[???] 

 ::= 
 
 ::= 
 



gsubmit
. This section reproduces and extends some of the instructions already made available by the BU Computer Science Department.
csa
machines maintained by the CS Dept. You will need to physically visit the undergraduate computing lab located at 730 Commonwealth Avenue, on the third floor in room 302.csa
home directory. If you are using Windows, you will also need an SSH client (such as PuTTY).
local device

your csa2 /csa3 home directory 
your gsubmit directory for CS 320 
csa2
or csa3
using an SCP or SSH client and create a directory for your submission in your CS account home directory. Note that in the examples below %>
represents a terminal prompt, which may look different on your system.
%> cd ~
%> mkdir hw1
local device

your csa2 /csa3 home directory

your gsubmit directory for CS 320 
csa2
or csa3
using an SCP client and copy your completed file(s) into that directory.
local device

⇒ 
your csa2 /csa3 home directory

your gsubmit directory for CS 320 
csa2
or csa3
using an SSH client and run the gsubmit
commands to copy the files from your CS account home directory to the gsubmit
directories to which the course staff has access.
%> cd ~
%> gsubmit cs320 hw1
local device

⇒ 
your csa2 /csa3 home directory

⇒ 
your gsubmit directory for CS 320

%> gsubmit cs320 ls
%> gsubmit cs320 cat hw1/hw1.py
%> gsubmit cs320 cat grade.hw1.txt
csa2/csa3
..py
. For example, suppose the following is contained within a file called example.py
:
# This is a comment in "example.py".
# Below is a Python statement.
print("Hello, world.")
python3
, python3.2
, python3.3
, etc. depending on the Python installation you're using). Note that in the examples below %>
represents a terminal prompt, which may look different on your system.
%> python example.py
Hello, world.
%> python
Python 3.2 ...
Type "help", "copyright", "credits" or "license" for more information.
>>> exec(open("example.py").read()) # Load "example.py" module.
Hello, world.
>>> x = "Hello." # Execute an assignment statement.
>>> print(x) # Execute a "print" statement.
Hello.
>>> x # Evaluate a string expression.
'Hello.'
>>> 1 + 2 # Evaluate a numerical expression.
3
True
and False
.and
, or
, and not
.
>>> True # A boolean constant.
True
>>> False # A boolean constant.
False
>>> True and False or True and (not False) # A boolean expression.
True
+
, *
, 
, and /
. The infix operator //
represents integer division, and the infix operators **
represents exponentiation. Negative integers are prefixed with the negation operator 
.==
, !=
, <
, >
, <=
, >=
are available.int()
function can convert a string that looks like an integer into an integer.
>>> 123 # An integer constant.
123
>>> 1 * (2 + 3) // 4  5 # An integer expression.
4
>>> 4 * 5 >= 19 # A boolean expression involving integers.
True
>>> int("123") # A string being converted into an integer
123
'
or "
characters. Strings can be treated as lists of singlecharacter strings. Another way to look at this is that there is no distinction between a character and a string: all characters are just strings of length 1. Multiline strings can be delimited using """
or '''
(i.e., three quotation mark characters at the beginning and end of the string literal).''
or ""
.+
.len()
returns the length of a string.s[i]
). These characters are also strings themselves.
>>> 'Example.' # A string.
'Example.'
>>> "Example." # Alternate notation for a string.
'Example.'
>>> len("ABCD") # String length.
4
>>> "ABCD" + "EFG" # String concatenation.
'ABCDEFG'
>>> "ABCD"[2] # Third character in the string.
'C'
[
and ]
, with the individual list entries separated by commas. Lists cannot be members of sets.[]
.+
.len()
returns the length of a list.a[i]
).in
relational operator.
>>> [1,2,"A","B"] # A list.
[1, 2, 'A', 'B']
>>> [1, 2] + ['A','B'] # Concatenating lists.
[1, 2, 'A', 'B']
>>> len([1,2,"A","B"] ) # List length.
4
>>> [1,2,"A","B"][0] # First entry in the list.
1
>>> 1 in [1, 2] # List containment check.
True
(
and )
, with entries separated by commas. The main distinction between lists and tuples is that tuples are hashable (i.e., they can be members of sets).()
.x
is denoted using (x, )
.+
.list()
function.tuple()
function.len()
returns the length of a tuple.t[i]
).in
relational operator.
>>> (1,2,"A","B") # A tuple.
(1, 2, 'A', 'B')
>>> (1,) # Another tuple.
(1,)
>>> (1, 2) + ('A','B') # Concatenating tuples.
(1, 2, 'A', 'B')
>>> list((1, 2, 'A','B')) # A tuple being converted into a list.
[1, 2, 'A', 'B']
>>> tuple([1, 2, 'A','B']) # A list being converted into a tuple.
(1, 2, 'A', 'B')
>>> len((1,2,"A","B")) # Tuple length.
4
>>> (1,2,"A","B")[0] # First entry in the tuple.
1
>>> 1 in (1, 2) # Tuple containment check.
True
set()
..union()
and .intersect
correspond to the standard set operations.set()
function.list()
or list()
function, respectively.len()
returns the size of a set.in
relational operator.
>>> {1,2,"A","B"} # A set.
{1, 2, 'A', 'B'}
>>> ({1,2}.union({3,4})).intersection({4,5}) # Set operations.
{4}
>>> set([1, 2]).union(set(('A','B'))) # Converting a list and a tuple to sets.
{'A', 1, 2, 'B'}
>>> len({1,2,"A","B"}) # Set size.
4
>>> 1 in {1,2,"A","B"} # Tuple containment check.
True
frozenset()
function.
>>> frozenset({1,2,3}) # A frozen set.
frozenset({1, 2, 3})
>>> {frozenset({1,2}), frozenset({3,4})} # Set of frozen sets.
{frozenset({3, 4}), frozenset({1, 2})}
{}
.list(d.keys())
.list(d.values())
.len()
returns the number of entries in the dictionary.d[key]
).
>>> {"A":1, "B":2} # A dictionary.
{'A': 1, 'B': 2}
>>> list({"A":1, "B":2}.keys()) # Dictionary keys.
['A', 'B']
>>> list({"A":1, "B":2}.values()) # Dictionary values.
[1, 2]
>>> len({"A":1, "B":2}) # Dictionary size.
2
>>> {"A":1, "B":2}["A"] # Obtain a dictionary value using a key.
1
example()
is defined as follows:
def example(x, y, z):
print("Invoked.")
return x + y + z
>>> example(1,2,3)
Invoked.
6
*
operator, the scatter operator, or the splat operator) involves providing a list to the function, preceded by the *
symbol; the arguments will be drawn from the elements in the list.
>>> args = [1,2,3]
>>> example(*args)
Invoked.
6
**
operator) involves providing a dictionary to the function, preceded by the **
symbol; each named paramter in the function definition will be looked up in the dictionary, and the value associated with that dictionary key will be used as the argument passed to that parameter.
>>> args = {'z':3, 'x':1, 'y':2}
>>> example(**args)
Invoked.
6
def example(x = 1, y = 2, z = 3):
return x + y + z
>>> example(0, 0)
3
>>> example(0)
5
>>> example()
6
>>> [ x for x in [1,2,3] ]
[1, 2, 3]
>>> [ 2 * x for x in {1,2,3} ]
[2, 4, 6]
>>> [ x + y for x in {1,2,3} for y in (1,2,3) ]
[2, 3, 4, 3, 4, 5, 4, 5, 6]
for
clause. This will filter which combinations are actually used to add a value to the resulting list.
>>> [ x for x in {1,2,3} if x < 3 ]
[1, 2]
>>> [ x + y for x in {1,2,3} for y in (1,2,3) if x > 2 and y > 1 ]
[5, 6]
>>> { x for x in [1,2,3,1,2,3] }
{1, 2, 3}
>>> { key : 2 for key in ["A","B","C"] }
{'A': 2, 'C': 2, 'B': 2}
type()
can be used to determine the type of a value. Below, we provide examples of how to check whether a given expression has one of the common Python types:
>>> type(True) == bool
True
>>> type(123) == int
True
>>> type("ABC") == str
True
>>> type([1,2,3]) == list
True
>>> type(("A",1,{1,2})) == tuple
True
>>> type({1,2,3}) == set
True
>>> type({"A":1, "B":2}) == dict
True
if
, else
, and/or elif
), an iteration construct (i.e., a for
or while
loop), a return
statement, or a break
or continue
statement.
x = 10
(x, y) = (1, 2)
def
keyword, followed by the name of the function or procedure, and then by one or more arguments (delimited by parentheses and separated by commas).
def example(a, b, c):
return a + b + c
else
). The body of each branch is an indented sequence of statements.
def fibonacci(n):
# Computes the nth Fibonacci number.
if n <= 0:
return 0
elif n <= 2:
return 1
else:
return fibonacci(n1) + fibonacci(n2)
if
). The body is executed over and over until the expression in the condition evaluates to False
, or a break
statement is encountered.
def example1(n):
# Takes an integer n and returns the sum of
# the integers from 1 to n1.
i = 0
sum = 0
while i < n:
sum = sum + i
i = i + 1
return sum
def example2(n):
# Takes an integer n and returns the sum of
# the integers from 1 to n1.
i = 0
sum = 0
while True:
sum = sum + i
i = i + 1
if i == n:
break
return sum
def example3(n):
# Takes an integer n and returns the sum of
# the integers from 1 to n1.
sum = 0
for i in range(0,n):
sum = sum + i
return sum
def example4(d):
# Takes a dictionary d that maps keys to
# integers and returns the sum of the integers.
sum = 0
for key in d:
sum = sum + d[key]
return sum
csa2/csa3
.Example.hs
should contain the Example
module definition). The module body consists of a series of declarations, which can appear in any order; the only exception is that if there are multiple declarations for a function, they must all be grouped together.
module Example where
g (x) = f(x) + f(x);
f (0) = 0;
f (x) = f(x  1) + 1;
 This is a comment.
 Expressionlevel declaration (defines a new function called "f").
f(x) = 2 * x
 Typelevel declaration (defines a new data type called "Tree".
data Tree = Leaf  Node Tree Tree
True
and False
.&&
, 
, and not
.if ... then ... else ...
is also available (similar to Python's ... if ... else ...
and C/C++/Java's ternary operator ... ? ... : ...
.
*> True
True
*> False
False
*> True  False && True && (not False)
True
*> if True then "Yes" else "No"
"Yes"
Int
and an unboundedsize type Integer
.+
, *
, 
, /
, and ^
. The Prelude
function div
represents integer division. The Prelude
functions min
and max
make it possible to compute the minimum and maximum of two integers, respectively, and the function minimum
and maximum
make it possible to compute the minimum and maximum of a list of integers. Negative integers are prefixed with the negation operator 
.==
, /=
(not equal), <
, >
, <=
, >=
are available.
*> 123
123
*> 1 * (2 + 3) `div` 4  5
4
*> 4 * 5 >= 19
True
*> max 10 20
20
*> minimum [1,2,3]
1
"
characters (individual characters are always delimited using '
). Strings can be treated as lists of characters.""
.++
.Prelude
function length
returns the length of a string.
*> "Example."  A string.
"Example."
*> length "ABCD"  String length.
4
*> "ABCD" ++ "EFG"  String concatenation.
"ABCDEFG"
[
and ]
, with the individual list entries separated by commas.[]
.:
.++
.Prelude
function length
returns the length of a list.Prelude
function elem
(note that this relies on the ==
operation being defined on the type of the elements in the list).
*> [1,2,3]  A list.
[1,2,3]
*> 1 : [2,3]  Adding an element to the front.
[1,2,3]
*> [1,2,3] ++ [4,5]  Concatenating lists.
[1,2,3,4,5]
*> length [1,2,3,4]  List length.
4
*> 1 `elem` [1, 2]  List containment check.
True
(
and )
, with entries separated by commas.()
.(e)
is equivalent to e
for any Haskell expression e
.
*> (1,2,3)  A tuple.
(1,2,3)