expr_grammar = convert_ebnf_grammar(EXPR_EBNF_GRAMMAR)
expr_grammar

{'<start>': ['<expr>'],
 '<expr>': ['<term> + <expr>', '<term> - <expr>', '<term>'],
 '<term>': ['<factor> * <term>', '<factor> / <term>', '<factor>'],
 '<factor>': ['<sign-1><factor>', '(<expr>)', '<integer><symbol-1>'],
 '<sign>': ['+', '-'],
 '<integer>': ['<digit-1>'],
 '<digit>': ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
 '<symbol>': ['.<integer>'],
 '<sign-1>': ['', '<sign>'],
 '<symbol-1>': ['', '<symbol>'],
 '<digit-1>': ['<digit>', '<digit><digit-1>']}


with ExpectTimeout(1):
    simple_grammar_fuzzer(grammar=expr_grammar, max_nonterminals=3)

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_81067/3259437052.py", line 2, in <cell line: 1>
    simple_grammar_fuzzer(grammar=expr_grammar, max_nonterminals=3)
  File "Grammars.ipynb", line 87, in simple_grammar_fuzzer
    symbol_to_expand = random.choice(nonterminals(term))
  File "Grammars.ipynb", line 61, in nonterminals
    return RE_NONTERMINAL.findall(expansion)
  File "Timeout.ipynb", line 43, in timeout_handler
    raise TimeoutError()
TimeoutError (expected)


quiz("Why does `simple_grammar_fuzzer()` hang?",
     [
         "It produces an infinite number of additions",
         "It produces an infinite number of digits",
         "It produces an infinite number of parentheses",
         "It produces an infinite number of signs",
     ], '(3 * 3 * 3) ** (3 / (3 * 3))')


expr_grammar['<factor>']

['<sign-1><factor>', '(<expr>)', '<integer><symbol-1>']


trials = 50
xs = []
ys = []
for i in range(trials):
    with Timer() as t:
        s = simple_grammar_fuzzer(EXPR_GRAMMAR, max_nonterminals=15)
    xs.append(len(s))
    ys.append(t.elapsed_time())
    print(i, end=" ")
print()

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49


average_time = sum(ys) / trials
print("Average time:", average_time)

Average time: 0.14829922830220313


%matplotlib inline

import matplotlib.pyplot as plt
plt.scatter(xs, ys)
plt.title('Time required for generating an output');


# ignore
tree


# ignore
tree


# ignore
tree


# ignore
tree

(SYMBOL_NAME, CHILDREN)


DerivationTree = Tuple[str, Optional[List[Any]]]


derivation_tree: DerivationTree = ("<start>",
                   [("<expr>",
                     [("<expr>", None),
                      (" + ", []),
                         ("<term>", None)]
                     )])


display_tree(derivation_tree)


quiz("And which of these is the internal representation of `derivation_tree`?",
    [
        "`('<start>', [('<expr>', (['<expr> + <term>']))])`",
        "`('<start>', [('<expr>', (['<expr>', ' + ', <term>']))])`",
        "`" + repr(derivation_tree) + "`",
        "`(" + repr(derivation_tree) + ", None)`"
    ], len("eleven") - len("one"))


derivation_tree

('<start>', [('<expr>', [('<expr>', None), (' + ', []), ('<term>', None)])])


def all_terminals(tree: DerivationTree) -> str:
    (symbol, children) = tree
    if children is None:
        # This is a nonterminal symbol not expanded yet
        return symbol

    if len(children) == 0:
        # This is a terminal symbol
        return symbol

    # This is an expanded symbol:
    # Concatenate all terminal symbols from all children
    return ''.join([all_terminals(c) for c in children])


all_terminals(derivation_tree)

'<expr> + <term>'


def tree_to_string(tree: DerivationTree) -> str:
    symbol, children, *_ = tree
    if children:
        return ''.join(tree_to_string(c) for c in children)
    else:
        return '' if is_nonterminal(symbol) else symbol


tree_to_string(derivation_tree)

' + '


class GrammarFuzzer(Fuzzer):
    """Produce strings from grammars efficiently, using derivation trees."""

    def __init__(self,
                 grammar: Grammar,
                 start_symbol: str = START_SYMBOL,
                 min_nonterminals: int = 0,
                 max_nonterminals: int = 10,
                 disp: bool = False,
                 log: Union[bool, int] = False) -> None:
        """Produce strings from `grammar`, starting with `start_symbol`.
        If `min_nonterminals` or `max_nonterminals` is given, use them as limits 
        for the number of nonterminals produced.  
        If `disp` is set, display the intermediate derivation trees.
        If `log` is set, show intermediate steps as text on standard output."""

        self.grammar = grammar
        self.start_symbol = start_symbol
        self.min_nonterminals = min_nonterminals
        self.max_nonterminals = max_nonterminals
        self.disp = disp
        self.log = log
        self.check_grammar()  # Invokes is_valid_grammar()

class GrammarFuzzer(GrammarFuzzer):
    def new_method(self, args):
        pass


class GrammarFuzzer(GrammarFuzzer):
    def init_tree(self) -> DerivationTree:
        return (self.start_symbol, None)


f = GrammarFuzzer(EXPR_GRAMMAR)
display_tree(f.init_tree())


class GrammarFuzzer(GrammarFuzzer):
    def choose_node_expansion(self, node: DerivationTree,
                              children_alternatives: List[List[DerivationTree]]) -> int:
        """Return index of expansion in `children_alternatives` to be selected.
           'children_alternatives`: a list of possible children for `node`.
           Defaults to random. To be overloaded in subclasses."""
        return random.randrange(0, len(children_alternatives))


expansion_to_children("<term> + <expr>")

[('<term>', None), (' + ', []), ('<expr>', None)]


expansion_to_children("")

[('', [])]


expansion_to_children(("+<term>", {"extra_data": 1234}))

[('+', []), ('<term>', None)]


class GrammarFuzzer(GrammarFuzzer):
    def expansion_to_children(self, expansion: Expansion) -> List[DerivationTree]:
        return expansion_to_children(expansion)


f = GrammarFuzzer(EXPR_GRAMMAR, log=True)

print("Before expand_node_randomly():")
expr_tree = ("<integer>", None)
display_tree(expr_tree)

Before expand_node_randomly():


print("After expand_node_randomly():")
expr_tree = f.expand_node_randomly(expr_tree)
display_tree(expr_tree)

After expand_node_randomly():
Expanding <integer> randomly


# docassert
assert expr_tree[1][0][0] == '<digit>'


quiz("What tree do we get if we expand the `<digit>` subtree?",
     [
         "We get another `<digit>` as new child of `<digit>`",
         "We get some digit as child of `<digit>`",
         "We get another `<digit>` as second child of `<integer>`",
         "The entire tree becomes a single node with a digit"
     ], 'len("2") + len("2")')


digit_subtree = expr_tree[1][0]  # type: ignore
display_tree(digit_subtree)


print("After expanding the <digit> subtree:")
digit_subtree = f.expand_node_randomly(digit_subtree)
display_tree(digit_subtree)

After expanding the <digit> subtree:
Expanding <digit> randomly


quiz("Is the original `expr_tree` affected by this change?",
     [
         "No, it is unchanged",
         "Yes, it has also gained a new child"
     ], "1 ** (1 - 1)")


display_tree(expr_tree)


class GrammarFuzzer(GrammarFuzzer):
    def possible_expansions(self, node: DerivationTree) -> int:
        (symbol, children) = node
        if children is None:
            return 1

        return sum(self.possible_expansions(c) for c in children)


f = GrammarFuzzer(EXPR_GRAMMAR)
print(f.possible_expansions(derivation_tree))

2


class GrammarFuzzer(GrammarFuzzer):
    def any_possible_expansions(self, node: DerivationTree) -> bool:
        (symbol, children) = node
        if children is None:
            return True

        return any(self.any_possible_expansions(c) for c in children)


f = GrammarFuzzer(EXPR_GRAMMAR)
f.any_possible_expansions(derivation_tree)

True


derivation_tree = ("<start>",
                   [("<expr>",
                     [("<expr>", None),
                      (" + ", []),
                         ("<term>", None)]
                     )])
display_tree(derivation_tree)


f = GrammarFuzzer(EXPR_GRAMMAR, log=True)
derivation_tree = f.expand_tree_once(derivation_tree)
display_tree(derivation_tree)

Expanding <expr> randomly


derivation_tree = f.expand_tree_once(derivation_tree)
display_tree(derivation_tree)

Expanding <term> randomly


f = GrammarFuzzer(EXPR_GRAMMAR)
assert f.symbol_cost("<digit>") == 1


assert f.symbol_cost("<expr>") == 5


class GrammarFuzzer(GrammarFuzzer):
    def expand_node_min_cost(self, node: DerivationTree) -> DerivationTree:
        if self.log:
            print("Expanding", all_terminals(node), "at minimum cost")

        return self.expand_node_by_cost(node, min)


class GrammarFuzzer(GrammarFuzzer):
    def expand_node(self, node: DerivationTree) -> DerivationTree:
        return self.expand_node_min_cost(node)


f = GrammarFuzzer(EXPR_GRAMMAR, log=True)
display_tree(derivation_tree)


# docassert
assert f.any_possible_expansions(derivation_tree)


if f.any_possible_expansions(derivation_tree):
    derivation_tree = f.expand_tree_once(derivation_tree)
display_tree(derivation_tree)

Expanding <expr> at minimum cost


# docassert
assert f.any_possible_expansions(derivation_tree)


if f.any_possible_expansions(derivation_tree):
    derivation_tree = f.expand_tree_once(derivation_tree)
display_tree(derivation_tree)

Expanding <factor> at minimum cost


# docassert
assert f.any_possible_expansions(derivation_tree)


if f.any_possible_expansions(derivation_tree):
    derivation_tree = f.expand_tree_once(derivation_tree)
display_tree(derivation_tree)

Expanding <term> at minimum cost


while f.any_possible_expansions(derivation_tree):
    derivation_tree = f.expand_tree_once(derivation_tree)

Expanding <factor> at minimum cost
Expanding <integer> at minimum cost
Expanding <integer> at minimum cost
Expanding <term> at minimum cost
Expanding <digit> at minimum cost
Expanding <digit> at minimum cost
Expanding <factor> at minimum cost
Expanding <integer> at minimum cost
Expanding <digit> at minimum cost


display_tree(derivation_tree)


class GrammarFuzzer(GrammarFuzzer):
    def expand_node_max_cost(self, node: DerivationTree) -> DerivationTree:
        if self.log:
            print("Expanding", all_terminals(node), "at maximum cost")

        return self.expand_node_by_cost(node, max)


class GrammarFuzzer(GrammarFuzzer):
    def expand_node(self, node: DerivationTree) -> DerivationTree:
        return self.expand_node_max_cost(node)


derivation_tree = ("<start>",
                   [("<expr>",
                     [("<expr>", None),
                      (" + ", []),
                         ("<term>", None)]
                     )])


f = GrammarFuzzer(EXPR_GRAMMAR, log=True)
display_tree(derivation_tree)


# docassert
assert f.any_possible_expansions(derivation_tree)


if f.any_possible_expansions(derivation_tree):
    derivation_tree = f.expand_tree_once(derivation_tree)
display_tree(derivation_tree)

Expanding <expr> at maximum cost


# docassert
assert f.any_possible_expansions(derivation_tree)


if f.any_possible_expansions(derivation_tree):
    derivation_tree = f.expand_tree_once(derivation_tree)
display_tree(derivation_tree)

Expanding <expr> at maximum cost


# docassert
assert f.any_possible_expansions(derivation_tree)


if f.any_possible_expansions(derivation_tree):
    derivation_tree = f.expand_tree_once(derivation_tree)
display_tree(derivation_tree)

Expanding <term> at maximum cost


initial_derivation_tree: DerivationTree = ("<start>",
                   [("<expr>",
                     [("<expr>", None),
                      (" + ", []),
                         ("<term>", None)]
                     )])


display_tree(initial_derivation_tree)


f = GrammarFuzzer(
    EXPR_GRAMMAR,
    min_nonterminals=3,
    max_nonterminals=5,
    log=True)
derivation_tree = f.expand_tree(initial_derivation_tree)

Tree: <expr> + <term>
Expanding <term> at maximum cost
Tree: <expr> + <factor> / <term>
Expanding <factor> randomly
Tree: <expr> + <integer> / <term>
Expanding <integer> randomly
Tree: <expr> + <digit><integer> / <term>
Expanding <expr> randomly
Tree: <term> + <digit><integer> / <term>
Expanding <term> randomly
Tree: <factor> + <digit><integer> / <term>
Expanding <term> randomly
Tree: <factor> + <digit><integer> / <factor> * <term>
Expanding <integer> at minimum cost
Tree: <factor> + <digit><digit> / <factor> * <term>
Expanding <factor> at minimum cost
Tree: <integer> + <digit><digit> / <factor> * <term>
Expanding <integer> at minimum cost
Tree: <digit> + <digit><digit> / <factor> * <term>
Expanding <digit> at minimum cost
Tree: 1 + <digit><digit> / <factor> * <term>
Expanding <digit> at minimum cost
Tree: 1 + 4<digit> / <factor> * <term>
Expanding <factor> at minimum cost
Tree: 1 + 4<digit> / <integer> * <term>
Expanding <digit> at minimum cost
Tree: 1 + 43 / <integer> * <term>
Expanding <term> at minimum cost
Tree: 1 + 43 / <integer> * <factor>
Expanding <factor> at minimum cost
Tree: 1 + 43 / <integer> * <integer>
Expanding <integer> at minimum cost
Tree: 1 + 43 / <integer> * <digit>
Expanding <digit> at minimum cost
Tree: 1 + 43 / <integer> * 2
Expanding <integer> at minimum cost
Tree: 1 + 43 / <digit> * 2
Expanding <digit> at minimum cost
Tree: 1 + 43 / 3 * 2


display_tree(derivation_tree)


all_terminals(derivation_tree)

'1 + 43 / 3 * 2'


class GrammarFuzzer(GrammarFuzzer):
    def fuzz_tree(self) -> DerivationTree:
        """Produce a derivation tree from the grammar."""
        tree = self.init_tree()
        # print(tree)

        # Expand all nonterminals
        tree = self.expand_tree(tree)
        if self.log:
            print(repr(all_terminals(tree)))
        if self.disp:
            display(display_tree(tree))
        return tree

    def fuzz(self) -> str:
        """Produce a string from the grammar."""
        self.derivation_tree = self.fuzz_tree()
        return all_terminals(self.derivation_tree)


f = GrammarFuzzer(EXPR_GRAMMAR)
f.fuzz()

'10.3 * 9 * (9 * 9 + 8 + 7 - 6) * 55.3 * 3 / 9'


display_tree(f.derivation_tree)


f = GrammarFuzzer(URL_GRAMMAR)
f.fuzz()

'ftp://user:password@fuzzingbook.com/abc?x58=def&def=3&x97=x40&abc=abc'


display_tree(f.derivation_tree)


f = GrammarFuzzer(CGI_GRAMMAR, min_nonterminals=3, max_nonterminals=5)
f.fuzz()

'%d213'


display_tree(f.derivation_tree)


trials = 50
xs = []
ys = []
f = GrammarFuzzer(EXPR_GRAMMAR, max_nonterminals=20)
for i in range(trials):
    with Timer() as t:
        s = f.fuzz()
    xs.append(len(s))
    ys.append(t.elapsed_time())
    print(i, end=" ")
print()

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49


average_time = sum(ys) / trials
print("Average time:", average_time)

Average time: 0.014464827459305525


%matplotlib inline

import matplotlib.pyplot as plt
plt.scatter(xs, ys)
plt.title('Time required for generating an output');


f = GrammarFuzzer(expr_grammar, max_nonterminals=10)
f.fuzz()

'--((3) / 7 * 9 / 0) * (9 + (7) * 4 * 3)'


phone_fuzzer = GrammarFuzzer(US_PHONE_GRAMMAR)
phone_fuzzer.fuzz()

'(236)844-1154'


area_fuzzer = GrammarFuzzer(US_PHONE_GRAMMAR, start_symbol='<area>')
area_fuzzer.fuzz()

'996'


# ignore
import inspect


# ignore
print(inspect.getdoc(GrammarFuzzer.__init__))

Produce strings from `grammar`, starting with `start_symbol`.
If `min_nonterminals` or `max_nonterminals` is given, use them as limits 
for the number of nonterminals produced.  
If `disp` is set, display the intermediate derivation trees.
If `log` is set, show intermediate steps as text on standard output.


# ignore
from ClassDiagram import display_class_hierarchy


# ignore
display_class_hierarchy([GrammarFuzzer],
                        public_methods=[
                            Fuzzer.__init__,
                            Fuzzer.fuzz,
                            Fuzzer.run,
                            Fuzzer.runs,
                            GrammarFuzzer.__init__,
                            GrammarFuzzer.fuzz,
                            GrammarFuzzer.fuzz_tree,
                        ],
                        types={
                            'DerivationTree': DerivationTree,
                            'Expansion': Expansion,
                            'Grammar': Grammar
                        },
                        project='fuzzingbook')


display_tree(phone_fuzzer.derivation_tree)


phone_fuzzer.derivation_tree

('<start>',
 [('<phone-number>',
   [('(', []),
    ('<area>',
     [('<lead-digit>', [('2', [])]),
      ('<digit>', [('3', [])]),
      ('<digit>', [('6', [])])]),
    (')', []),
    ('<exchange>',
     [('<lead-digit>', [('8', [])]),
      ('<digit>', [('4', [])]),
      ('<digit>', [('4', [])])]),
    ('-', []),
    ('<line>',
     [('<digit>', [('1', [])]),
      ('<digit>', [('1', [])]),
      ('<digit>', [('5', [])]),
      ('<digit>', [('4', [])])])])])


class ExerciseGrammarFuzzer(GrammarFuzzer):
    def expand_node_randomly(self, node: DerivationTree) -> DerivationTree:
        if self.log:
            print("Expanding", all_terminals(node), "randomly by cost")

        return self.expand_node_by_cost(node, random.choice)

Efficient Grammar Fuzzing¶

An Insufficient Algorithm¶

Quiz

Derivation Trees¶

Representing Derivation Trees¶

Quiz

Expanding a Node¶

Picking a Children Alternative to be Expanded¶

Getting a List of Possible Expansions¶

Putting Things Together¶

Quiz

Quiz

Expanding a Tree¶

Closing the Expansion¶

Node Inflation¶

Three Expansion Phases¶

Putting it all Together¶

Synopsis¶

Efficient Grammar Fuzzing¶

Derivation Trees¶

Lessons Learned¶

Background¶

Exercises¶

Exercise 1: Caching Method Results¶

Exercise 2: Grammar Pre-Compilation¶

Exercise 3: Maintaining Trees to be Expanded¶

Exercise 4: Alternate Random Expansions¶